move compileblockstr/compileunescapestr logic into gen

2021-04-09 02:54:36 +02:00 · 2021-04-09 02:54:36 +02:00 · bac186a0c7
parent c0fc1aa452
commit bac186a0c7
3 changed files with 97 additions and 81 deletions
--- a/src/cs_gen.cc
+++ b/src/cs_gen.cc
@ -73,6 +73,97 @@ void gen_state::gen_val_string(std::string_view v) {
    code.push_back(u);
 }

+/* FIXME: figure out how to do without the intermediate buffer */
+template<typename F>
+static void gen_str_filter(
+    valbuf<std::uint32_t> &code, thread_state &ts, std::string_view v, F &&func
+) {
+    code.push_back(BC_INST_VAL | BC_RET_STRING);
+    auto ncode = code.size();
+    /* we're reserving a proper number of words */
+    auto nwords = (v.size() / sizeof(std::uint32_t)) + 1;
+    code.reserve(ncode + nwords);
+    /* allocate a character buffer that's at least that many words */
+    auto al = std_allocator<char>{ts.istate};
+    auto *buf = al.allocate(nwords * sizeof(std::uint32_t));
+    /* the body */
+    auto len = func(&buf[0]);
+    /* fill the leftover bytes with zeroes */
+    memset(&buf[len], 0, sizeof(std::uint32_t) - len % sizeof(std::uint32_t));
+    /* set the actual length */
+    code.back() |= (len << 8);
+    auto *ubuf = reinterpret_cast<std::uint32_t *>(buf);
+    code.append(ubuf, ubuf + ((len / sizeof(std::uint32_t)) + 1));
+    al.deallocate(buf, nwords * sizeof(std::uint32_t));
+}
+
+void gen_state::gen_val_string_unescape(std::string_view v) {
+    gen_str_filter(code, ts, v, [&v](auto *buf) {
+        auto *wbuf = unescape_string(buf, v);
+        return std::size_t(wbuf - buf);
+    });
+}
+
+void gen_state::gen_val_block(std::string_view v) {
+    gen_str_filter(code, ts, v, [&v, this](auto *buf) {
+        auto *str = v.data();
+        auto *send = v.data() + v.size();
+        std::size_t len = 0;
+        for (std::string_view chrs{"\r/\"@]"}; str < send;) {
+            auto *orig = str;
+            /* find a boundary character */
+            str = std::find_first_of(str, send, chrs.begin(), chrs.end());
+            /* copy everything up until boundary character */
+            std::memcpy(&buf[len], orig, str - orig);
+            len += (str - orig);
+            /* found nothing: bail out */
+            if (str == send) {
+                return len;
+            }
+            switch (*str) {
+                case '\r': /* filter out */
+                    ++str;
+                    break;
+                case '\"': { /* quoted string */
+                    char const *start = str;
+                    str = parse_string(
+                        *ts.pstate, std::string_view{str, send}
+                    );
+                    std::memcpy(&buf[len], start, std::size_t(str - start));
+                    len += (str - start);
+                    break;
+                }
+                case '/':
+                    if (((str + 1) != send) && (str[1] == '/')) {
+                        /* comment */
+                        char const *start = str;
+                        str = std::find(str, send, '\n');
+                        if (((start + 2) != send) && std::ispunct(start[2])) {
+                            /* these comments will be preserved */
+                            std::memcpy(
+                                &buf[len], start, std::size_t(str - start)
+                            );
+                            len += (str - start);
+                        }
+                    } else {
+                        /* write and skip */
+                        buf[len++] = *str++;
+                    }
+                    break;
+                case '@':
+                case ']':
+                    if (str <send) {
+                        buf[len++] = *str++;
+                    } else {
+                        return len;
+                    }
+                    break;
+            }
+        }
+        return len;
+    });
+}
+
 void gen_state::gen_val_ident() {
    gen_val_ident(*ts.istate->id_dummy);
 }
--- a/src/cs_gen.hh
+++ b/src/cs_gen.hh
@ -33,6 +33,8 @@ struct gen_state {
    void gen_val_float(std::string_view v);

    void gen_val_string(std::string_view v = std::string_view{});
+    void gen_val_string_unescape(std::string_view str);
+    void gen_val_block(std::string_view str);

    void gen_val_ident();
    void gen_val_ident(ident &i);
--- a/src/cs_parser.cc
+++ b/src/cs_parser.cc
@ -403,26 +403,6 @@ static inline int ret_code(int type, int def = 0) {
    return type << BC_INST_RET;
 }

-static inline void compileunescapestr(parser_state &gs) {
-    auto str = gs.get_str();
-    gs.gs.code.push_back(BC_INST_VAL | BC_RET_STRING);
-    gs.gs.code.reserve(
-        gs.gs.code.size() + str.size() / sizeof(uint32_t) + 1
-    );
-    size_t bufs = (gs.gs.code.capacity() - gs.gs.code.size()) * sizeof(uint32_t);
-    auto alloc = std_allocator<char>{gs.ts.istate};
-    auto *buf = alloc.allocate(bufs + 1);
-    char *wbuf = unescape_string(&buf[0], str);
-    memset(
-        &buf[wbuf - buf], 0,
-        sizeof(uint32_t) - (wbuf - buf) % sizeof(uint32_t)
-    );
-    gs.gs.code.back() |= (wbuf - buf) << 8;
-    uint32_t *ubuf = reinterpret_cast<uint32_t *>(buf);
-    gs.gs.code.append(ubuf, ubuf + ((wbuf - buf) / sizeof(uint32_t) + 1));
-    alloc.deallocate(buf, bufs + 1);
-}
-
 static bool compilearg(
    parser_state &gs, int wordtype, charbuf *word = nullptr
 );
@ -640,62 +620,6 @@ invalid:
    }
 }

-static bool compileblockstr(parser_state &gs, char const *str, char const *send) {
-    std::size_t startc = gs.gs.code.size();
-    gs.gs.code.push_back(BC_INST_VAL | BC_RET_STRING);
-    gs.gs.code.reserve(gs.gs.code.size() + (send - str) / sizeof(uint32_t) + 1);
-    auto alloc = std_allocator<char>{gs.ts.istate};
-    auto asz = ((send - str) / sizeof(uint32_t) + 1) * sizeof(uint32_t);
-    char *buf = alloc.allocate(asz);
-    std::size_t len = 0;
-    while (str < send) {
-        std::string_view chrs{"\r/\"@]"};
-        char const *orig = str;
-        str = std::find_first_of(str, send, chrs.begin(), chrs.end());
-        memcpy(&buf[len], orig, str - orig);
-        len += (str - orig);
-        if (str == send) {
-            goto done;
-        }
-        switch (*str) {
-            case '\r':
-                ++str;
-                break;
-            case '\"': {
-                char const *start = str;
-                str = parse_string(
-                    *gs.ts.pstate, std::string_view{str, send}
-                );
-                memcpy(&buf[len], start, std::size_t(str - start));
-                len += (str - start);
-                break;
-            }
-            case '/':
-                if (((str + 1) != send) && str[1] == '/') {
-                    str = std::find(str, send, '\n');
-                } else {
-                    buf[len++] = *str++;
-                }
-                break;
-            case '@':
-            case ']':
-                if (str < send) {
-                    buf[len++] = *str++;
-                } else {
-                    goto done;
-                }
-                break;
-        }
-    }
-done:
-    memset(&buf[len], '\0', sizeof(uint32_t) - len % sizeof(uint32_t));
-    uint32_t *ubuf = reinterpret_cast<uint32_t *>(buf);
-    gs.gs.code.append(ubuf, ubuf + (len / sizeof(uint32_t) + 1));
-    gs.gs.code[startc] |= len << 8;
-    alloc.deallocate(buf, asz);
-    return true;
-}
-
 static bool compileblocksub(parser_state &gs) {
    charbuf lookup{gs.ts};
    switch (gs.current()) {
@ -790,9 +714,8 @@ static void compileblockmain(parser_state &gs, int wordtype) {
                    throw error{*gs.ts.pstate, "too many @s"};
                    return;
                }
-                if (compileblockstr(gs, start, esc)) {
-                    concs++;
-                }
+                gs.gs.gen_val_block(std::string_view{start, esc});
+                concs++;
                if (compileblocksub(gs)) {
                    concs++;
                }
@ -829,7 +752,7 @@ static void compileblockmain(parser_state &gs, int wordtype) {
                    return;
            }
        }
-        compileblockstr(gs, start, gs.source - 1);
+        gs.gs.gen_val_block(std::string_view{start, gs.source - 1});
        if (concs > 1) {
            concs++;
        }
@ -918,7 +841,7 @@ static bool compilearg(
                    break;
                case VAL_ANY:
                case VAL_STRING:
-                    compileunescapestr(gs);
+                    gs.gs.gen_val_string_unescape(gs.get_str());
                    break;
                default: {
                    int line = int(gs.current_line);