implement the sorting logic in unicode generator
parent
80aadd906e
commit
2b291bca39
|
@ -32,6 +32,12 @@ SOFTWARE.**
|
||||||
Additionally some code from the libc++ project has been used as a reference;
|
Additionally some code from the libc++ project has been used as a reference;
|
||||||
libc++ is a part of the LLVM project.
|
libc++ is a part of the LLVM project.
|
||||||
|
|
||||||
|
The libutf project (https://github.com/cls/libutf) was used as an inspiration
|
||||||
|
for some of the Unicode implementation bits within libostd and the gen\_unicode
|
||||||
|
program used to generate the necessary ctype tables, but all of the code was
|
||||||
|
written from scratch; nevertheless, I would like to give some credit and thanks
|
||||||
|
to the project here.
|
||||||
|
|
||||||
Additional thanks to Dale Weiler aka graphitemaster (reference code in the
|
Additional thanks to Dale Weiler aka graphitemaster (reference code in the
|
||||||
Neothyne project) and cppreference.com.
|
Neothyne project) and cppreference.com.
|
||||||
|
|
||||||
|
|
7
build.cc
7
build.cc
|
@ -1,4 +1,9 @@
|
||||||
/* A build system for libostd. */
|
/* A simple build system to build libostd.
|
||||||
|
*
|
||||||
|
* This file is a part of the libostd project. Libostd is licensed under the
|
||||||
|
* University of Illinois/NCSA Open Source License, as is this file. See the
|
||||||
|
* COPYING.md file further information.
|
||||||
|
*/
|
||||||
|
|
||||||
/* for Windows so that we avoid dllimport/dllexport */
|
/* for Windows so that we avoid dllimport/dllexport */
|
||||||
#define OSTD_BUILD_LIB
|
#define OSTD_BUILD_LIB
|
||||||
|
|
143
gen_unicode.cc
143
gen_unicode.cc
|
@ -1,16 +1,28 @@
|
||||||
|
/* This simple program generates the tables necessary for Unicode character
|
||||||
|
* types. It's inspired by the mkrunetype.awk generator from the libutf
|
||||||
|
* project, see COPYING.md.
|
||||||
|
*
|
||||||
|
* This file is a part of the libostd project. Libostd is licensed under the
|
||||||
|
* University of Illinois/NCSA Open Source License, as is this file. See the
|
||||||
|
* COPYING.md file further information.
|
||||||
|
*/
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <array>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|
||||||
#include <ostd/io.hh>
|
#include <ostd/io.hh>
|
||||||
#include <ostd/string.hh>
|
#include <ostd/string.hh>
|
||||||
#include <ostd/algorithm.hh>
|
#include <ostd/algorithm.hh>
|
||||||
|
|
||||||
|
using ostd::string_range;
|
||||||
|
|
||||||
using code_t = std::uint32_t;
|
using code_t = std::uint32_t;
|
||||||
using code_vec = std::vector<code_t>;
|
using code_vec = std::vector<code_t>;
|
||||||
|
|
||||||
code_t hex_to_code(ostd::string_range hs) {
|
code_t hex_to_code(string_range hs) {
|
||||||
code_t ret = 0;
|
code_t ret = 0;
|
||||||
for (char c: hs) {
|
for (char c: hs) {
|
||||||
if (!std::isxdigit(c |= 32)) {
|
if (!std::isxdigit(c |= 32)) {
|
||||||
|
@ -38,8 +50,8 @@ struct parse_state {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void parse_line(ostd::string_range line) {
|
void parse_line(string_range line) {
|
||||||
std::array<ostd::string_range, 15> bits;
|
std::array<string_range, 15> bits;
|
||||||
for (std::size_t n = 0;;) {
|
for (std::size_t n = 0;;) {
|
||||||
auto sc = ostd::find(line, ';');
|
auto sc = ostd::find(line, ';');
|
||||||
if (!sc) {
|
if (!sc) {
|
||||||
|
@ -96,6 +108,105 @@ struct parse_state {
|
||||||
}
|
}
|
||||||
/* good enough for now, ignore the rest */
|
/* good enough for now, ignore the rest */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void build(code_vec const &codes, code_vec const &cases = code_vec{}) {
|
||||||
|
code_vec singles;
|
||||||
|
code_vec singles_cases;
|
||||||
|
code_vec ranges_beg;
|
||||||
|
code_vec ranges_end;
|
||||||
|
code_vec ranges_cases;
|
||||||
|
code_vec laces_beg[2];
|
||||||
|
code_vec laces_end[2];
|
||||||
|
|
||||||
|
if (!cases.empty() && (cases.size() != codes.size())) {
|
||||||
|
throw std::runtime_error{"mismatched code lists"};
|
||||||
|
}
|
||||||
|
|
||||||
|
auto match_pair = [&codes](std::size_t i, std::size_t off) {
|
||||||
|
return (
|
||||||
|
((i + 1) < codes.size()) && (codes[i + 1] == (codes[i] + off))
|
||||||
|
);
|
||||||
|
};
|
||||||
|
auto match_range = [&codes, &cases, &match_pair](std::size_t i) {
|
||||||
|
return match_pair(i, 1) && (
|
||||||
|
cases.empty() || (cases[i + 1] == (cases[i] + 1))
|
||||||
|
);
|
||||||
|
};
|
||||||
|
auto match_lace = [
|
||||||
|
&codes, &cases, &match_pair
|
||||||
|
](std::size_t i, int off) {
|
||||||
|
return match_pair(i, 2) && (cases.empty() || (
|
||||||
|
(cases[i + 1] == (codes[i + 1] + off)) &&
|
||||||
|
(cases[i ] == (codes[i ] + off))
|
||||||
|
));
|
||||||
|
};
|
||||||
|
|
||||||
|
for (std::size_t i = 0, ncodes = codes.size(); i < ncodes; ++i) {
|
||||||
|
if (match_range(i)) {
|
||||||
|
ranges_beg.push_back(codes[i]);
|
||||||
|
if (!cases.empty()) {
|
||||||
|
ranges_cases.push_back(cases[i]);
|
||||||
|
}
|
||||||
|
/* go to the end of sequence */
|
||||||
|
for (++i; match_range(i); ++i) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* end of range, try others */
|
||||||
|
ranges_end.push_back(codes[i]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (size_t j = 0; match_lace(i, 1) || match_lace(i, -1)) {
|
||||||
|
laces_beg[j].push_back(codes[i]);
|
||||||
|
for (++i; match_lace(i, j); ++i) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
laces_end[j].push_back(codes[i]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
singles.push_back(codes[i]);
|
||||||
|
if (!cases.empty()) {
|
||||||
|
singles_cases.push_back(cases[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto build_list = [](
|
||||||
|
string_range name, std::size_t ncol,
|
||||||
|
code_vec const &col1, code_vec const &col2, code_vec const &col3
|
||||||
|
) {
|
||||||
|
if (col1.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ostd::writefln("%s:", name);
|
||||||
|
for (std::size_t i = 0; i < col1.size(); ++i) {
|
||||||
|
switch (ncol) {
|
||||||
|
case 1:
|
||||||
|
ostd::writefln(" 0x%06X", col1[i]);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
ostd::writefln(" 0x%06X, 0x%06X", col1[i], col2[i]);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
ostd::writefln(
|
||||||
|
" 0x%06X, 0x%06X, 0x%06X",
|
||||||
|
col1[i], col2[i], col3[i]
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw std::runtime_error{"invalid column number"};
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
build_list(
|
||||||
|
"ranges", !cases.empty() + 2, ranges_beg, ranges_end, ranges_cases
|
||||||
|
);
|
||||||
|
build_list("laces1", 2, laces_beg[0], laces_end[0], laces_beg[0]);
|
||||||
|
build_list("laces2", 2, laces_beg[1], laces_end[1], laces_beg[1]);
|
||||||
|
build_list(
|
||||||
|
"singles", !cases.empty() + 1, singles, singles_cases, singles
|
||||||
|
);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
|
@ -104,7 +215,7 @@ int main(int argc, char **argv) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ostd::string_range fname = argv[1];
|
string_range fname = argv[1];
|
||||||
ostd::file_stream f{fname};
|
ostd::file_stream f{fname};
|
||||||
if (!f.is_open()) {
|
if (!f.is_open()) {
|
||||||
ostd::writefln("cannot open file '%s'", fname);
|
ostd::writefln("cannot open file '%s'", fname);
|
||||||
|
@ -113,7 +224,27 @@ int main(int argc, char **argv) {
|
||||||
|
|
||||||
parse_state ps;
|
parse_state ps;
|
||||||
|
|
||||||
for (auto const &line: f.iter_lines()) {
|
try {
|
||||||
ps.parse_line(line);
|
for (auto const &line: f.iter_lines()) {
|
||||||
|
ps.parse_line(line);
|
||||||
|
}
|
||||||
|
} catch (std::runtime_error const &e) {
|
||||||
|
ostd::writeln(e.what());
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ostd::writeln("ALPHAS:");
|
||||||
|
ps.build(ps.alphas);
|
||||||
|
ostd::writeln("CONTROL:");
|
||||||
|
ps.build(ps.controls);
|
||||||
|
ostd::writeln("DIGITS:");
|
||||||
|
ps.build(ps.digits);
|
||||||
|
ostd::writeln("LOWERCASE:");
|
||||||
|
ps.build(ps.lowers, ps.touppers);
|
||||||
|
ostd::writeln("SPACES:");
|
||||||
|
ps.build(ps.spaces);
|
||||||
|
ostd::writeln("TITLES:");
|
||||||
|
ps.build(ps.titles);
|
||||||
|
ostd::writeln("UPPERCASE:");
|
||||||
|
ps.build(ps.uppers, ps.tolowers);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue