implement the sorting logic in unicode generator
parent
80aadd906e
commit
2b291bca39
|
@ -32,6 +32,12 @@ SOFTWARE.**
|
|||
Additionally some code from the libc++ project has been used as a reference;
|
||||
libc++ is a part of the LLVM project.
|
||||
|
||||
The libutf project (https://github.com/cls/libutf) was used as an inspiration
|
||||
for some of the Unicode implementation bits within libostd and the gen\_unicode
|
||||
program used to generate the necessary ctype tables, but all of the code was
|
||||
written from scratch; nevertheless, I would like to give some credit and thanks
|
||||
to the project here.
|
||||
|
||||
Additional thanks to Dale Weiler aka graphitemaster (reference code in the
|
||||
Neothyne project) and cppreference.com.
|
||||
|
||||
|
|
7
build.cc
7
build.cc
|
@ -1,4 +1,9 @@
|
|||
/* A build system for libostd. */
|
||||
/* A simple build system to build libostd.
|
||||
*
|
||||
* This file is a part of the libostd project. Libostd is licensed under the
|
||||
* University of Illinois/NCSA Open Source License, as is this file. See the
|
||||
* COPYING.md file further information.
|
||||
*/
|
||||
|
||||
/* for Windows so that we avoid dllimport/dllexport */
|
||||
#define OSTD_BUILD_LIB
|
||||
|
|
143
gen_unicode.cc
143
gen_unicode.cc
|
@ -1,16 +1,28 @@
|
|||
/* This simple program generates the tables necessary for Unicode character
|
||||
* types. It's inspired by the mkrunetype.awk generator from the libutf
|
||||
* project, see COPYING.md.
|
||||
*
|
||||
* This file is a part of the libostd project. Libostd is licensed under the
|
||||
* University of Illinois/NCSA Open Source License, as is this file. See the
|
||||
* COPYING.md file further information.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include <cctype>
|
||||
#include <vector>
|
||||
#include <array>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <ostd/io.hh>
|
||||
#include <ostd/string.hh>
|
||||
#include <ostd/algorithm.hh>
|
||||
|
||||
using ostd::string_range;
|
||||
|
||||
using code_t = std::uint32_t;
|
||||
using code_vec = std::vector<code_t>;
|
||||
|
||||
code_t hex_to_code(ostd::string_range hs) {
|
||||
code_t hex_to_code(string_range hs) {
|
||||
code_t ret = 0;
|
||||
for (char c: hs) {
|
||||
if (!std::isxdigit(c |= 32)) {
|
||||
|
@ -38,8 +50,8 @@ struct parse_state {
|
|||
}
|
||||
}
|
||||
|
||||
void parse_line(ostd::string_range line) {
|
||||
std::array<ostd::string_range, 15> bits;
|
||||
void parse_line(string_range line) {
|
||||
std::array<string_range, 15> bits;
|
||||
for (std::size_t n = 0;;) {
|
||||
auto sc = ostd::find(line, ';');
|
||||
if (!sc) {
|
||||
|
@ -96,6 +108,105 @@ struct parse_state {
|
|||
}
|
||||
/* good enough for now, ignore the rest */
|
||||
}
|
||||
|
||||
void build(code_vec const &codes, code_vec const &cases = code_vec{}) {
|
||||
code_vec singles;
|
||||
code_vec singles_cases;
|
||||
code_vec ranges_beg;
|
||||
code_vec ranges_end;
|
||||
code_vec ranges_cases;
|
||||
code_vec laces_beg[2];
|
||||
code_vec laces_end[2];
|
||||
|
||||
if (!cases.empty() && (cases.size() != codes.size())) {
|
||||
throw std::runtime_error{"mismatched code lists"};
|
||||
}
|
||||
|
||||
auto match_pair = [&codes](std::size_t i, std::size_t off) {
|
||||
return (
|
||||
((i + 1) < codes.size()) && (codes[i + 1] == (codes[i] + off))
|
||||
);
|
||||
};
|
||||
auto match_range = [&codes, &cases, &match_pair](std::size_t i) {
|
||||
return match_pair(i, 1) && (
|
||||
cases.empty() || (cases[i + 1] == (cases[i] + 1))
|
||||
);
|
||||
};
|
||||
auto match_lace = [
|
||||
&codes, &cases, &match_pair
|
||||
](std::size_t i, int off) {
|
||||
return match_pair(i, 2) && (cases.empty() || (
|
||||
(cases[i + 1] == (codes[i + 1] + off)) &&
|
||||
(cases[i ] == (codes[i ] + off))
|
||||
));
|
||||
};
|
||||
|
||||
for (std::size_t i = 0, ncodes = codes.size(); i < ncodes; ++i) {
|
||||
if (match_range(i)) {
|
||||
ranges_beg.push_back(codes[i]);
|
||||
if (!cases.empty()) {
|
||||
ranges_cases.push_back(cases[i]);
|
||||
}
|
||||
/* go to the end of sequence */
|
||||
for (++i; match_range(i); ++i) {
|
||||
continue;
|
||||
}
|
||||
/* end of range, try others */
|
||||
ranges_end.push_back(codes[i]);
|
||||
continue;
|
||||
}
|
||||
if (size_t j = 0; match_lace(i, 1) || match_lace(i, -1)) {
|
||||
laces_beg[j].push_back(codes[i]);
|
||||
for (++i; match_lace(i, j); ++i) {
|
||||
continue;
|
||||
}
|
||||
laces_end[j].push_back(codes[i]);
|
||||
continue;
|
||||
}
|
||||
singles.push_back(codes[i]);
|
||||
if (!cases.empty()) {
|
||||
singles_cases.push_back(cases[i]);
|
||||
}
|
||||
}
|
||||
|
||||
auto build_list = [](
|
||||
string_range name, std::size_t ncol,
|
||||
code_vec const &col1, code_vec const &col2, code_vec const &col3
|
||||
) {
|
||||
if (col1.empty()) {
|
||||
return;
|
||||
}
|
||||
ostd::writefln("%s:", name);
|
||||
for (std::size_t i = 0; i < col1.size(); ++i) {
|
||||
switch (ncol) {
|
||||
case 1:
|
||||
ostd::writefln(" 0x%06X", col1[i]);
|
||||
break;
|
||||
case 2:
|
||||
ostd::writefln(" 0x%06X, 0x%06X", col1[i], col2[i]);
|
||||
break;
|
||||
case 3:
|
||||
ostd::writefln(
|
||||
" 0x%06X, 0x%06X, 0x%06X",
|
||||
col1[i], col2[i], col3[i]
|
||||
);
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error{"invalid column number"};
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
build_list(
|
||||
"ranges", !cases.empty() + 2, ranges_beg, ranges_end, ranges_cases
|
||||
);
|
||||
build_list("laces1", 2, laces_beg[0], laces_end[0], laces_beg[0]);
|
||||
build_list("laces2", 2, laces_beg[1], laces_end[1], laces_beg[1]);
|
||||
build_list(
|
||||
"singles", !cases.empty() + 1, singles, singles_cases, singles
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
@ -104,7 +215,7 @@ int main(int argc, char **argv) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
ostd::string_range fname = argv[1];
|
||||
string_range fname = argv[1];
|
||||
ostd::file_stream f{fname};
|
||||
if (!f.is_open()) {
|
||||
ostd::writefln("cannot open file '%s'", fname);
|
||||
|
@ -113,7 +224,27 @@ int main(int argc, char **argv) {
|
|||
|
||||
parse_state ps;
|
||||
|
||||
for (auto const &line: f.iter_lines()) {
|
||||
ps.parse_line(line);
|
||||
try {
|
||||
for (auto const &line: f.iter_lines()) {
|
||||
ps.parse_line(line);
|
||||
}
|
||||
} catch (std::runtime_error const &e) {
|
||||
ostd::writeln(e.what());
|
||||
return 1;
|
||||
}
|
||||
|
||||
ostd::writeln("ALPHAS:");
|
||||
ps.build(ps.alphas);
|
||||
ostd::writeln("CONTROL:");
|
||||
ps.build(ps.controls);
|
||||
ostd::writeln("DIGITS:");
|
||||
ps.build(ps.digits);
|
||||
ostd::writeln("LOWERCASE:");
|
||||
ps.build(ps.lowers, ps.touppers);
|
||||
ostd::writeln("SPACES:");
|
||||
ps.build(ps.spaces);
|
||||
ostd::writeln("TITLES:");
|
||||
ps.build(ps.titles);
|
||||
ostd::writeln("UPPERCASE:");
|
||||
ps.build(ps.uppers, ps.tolowers);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue