/* smu - simple markup * Copyright (C) <2007, 2008> Enno Boland * Copyright (C) 2025 Enno Tensing * * See LICENSE for further informations */ #define _LARGEFILE64_SOURCE #include #include #include #include #include #include #include #include #include #include #define LENGTH(x) sizeof(x) / sizeof(x[0]) #define ADDC(b, i, a) \ do { \ if (i % BUFSIZ == 0) { \ b = realloc(b, (i + BUFSIZ) * sizeof(char)); \ if (!b) { \ eprint("Malloc failed."); \ return -1; \ } \ } \ b[i] = a; \ } while (0) typedef int (*Parser)(const char *, const char *, int); struct tag { char *search; int process; char *before; char *after; }; off64_t get_file_size(const char *); char *read_file(const char *, off64_t); static int doamp(const char *begin, const char *end, int newblock); static int docomment(const char *begin, const char *end, int newblock); static int dogtlt(const char *begin, const char *end, int newblock); static int dohtml(const char *begin, const char *end, int newblock); static int dolineprefix(const char *begin, const char *end, int newblock); static int dolink(const char *begin, const char *end, int newblock); static int dolist(const char *begin, const char *end, int newblock); static int doparagraph(const char *begin, const char *end, int newblock); static int doreplace(const char *begin, const char *end, int newblock); static int doshortlink(const char *begin, const char *end, int newblock); static int dosurround(const char *begin, const char *end, int newblock); static int dounderline(const char *begin, const char *end, int newblock); static void *ereallocz(void *p, size_t size); static void eprint(const char *format, ...); static void hprint(const char *begin, const char *end); static void process(const char *begin, const char *end, int isblock); /* list of parsers */ static Parser parsers[] = { dounderline, docomment, dolineprefix, dolist, doparagraph, dogtlt, dosurround, dolink, doshortlink, dohtml, doamp, doreplace }; static int nohtml = 0; static struct tag lineprefix[] = { { " ", 0, "
", "\n
" }, { "\t", 0, "
", "\n
" }, { ">", 2, "
", "
" }, { "###### ", 1, "
", "
" }, { "##### ", 1, "
", "
" }, { "#### ", 1, "

", "

" }, { "### ", 1, "

", "

" }, { "## ", 1, "

", "

" }, { "# ", 1, "

", "

" }, { "- - -\n", 1, "
", "" }, }; static struct tag underline[] = { { "=", 1, "

", "

\n" }, { "-", 1, "

", "

\n" }, }; static struct tag surround[] = { { "```", 0, "", "" }, { "``", 0, "", "" }, { "`", 0, "", "" }, { "___", 1, "", "" }, { "***", 1, "", "" }, { "__", 1, "", "" }, { "**", 1, "", "" }, { "_", 1, "", "" }, { "*", 1, "", "" }, }; static const char *replace[][2] = { { "\\\\", "\\" }, { "\\`", "`" }, { "\\*", "*" }, { "\\_", "_" }, { "\\{", "{" }, { "\\}", "}" }, { "\\[", "[" }, { "\\]", "]" }, { "\\(", "(" }, { "\\)", ")" }, { "\\#", "#" }, { "\\+", "+" }, { "\\-", "-" }, { "\\.", "." }, { "\\!", "!" }, }; static const char *insert[][2] = { { " \n", "
" }, }; off64_t get_file_size(const char *path) { struct stat st; if (stat(path, &st) == 0) return st.st_size; return -1; } char *read_file(const char *path, off64_t file_size) { int fd = open(path, O_LARGEFILE | O_NONBLOCK); ssize_t bytes; char *buf = calloc(file_size + 4, sizeof(char)); if (!buf) { perror(""); close(fd); return NULL; } bytes = read(fd, buf, file_size); if (bytes != file_size) { perror(""); close(fd); free(buf); return NULL; } close(fd); return buf; } void eprint(const char *format, ...) { va_list ap; va_start(ap, format); vfprintf(stderr, format, ap); va_end(ap); } int doamp(const char *begin, const char *end, int newblock) { const char *p; if (*begin != '&') return 0; if (!nohtml) { for (p = begin + 1; p != end && !strchr("; \\\n\t", *p); p++) ; if (p == end || *p == ';') return 0; } fputs("&", stdout); return 1; } int dogtlt(const char *begin, const char *end, int newblock) { int brpos; char c; if (nohtml || begin + 1 >= end) return 0; brpos = begin[1] == '>'; if (!brpos && *begin != '<') return 0; c = begin[brpos ? 0 : 1]; if (!brpos && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z')) { fputs("<", stdout); return 1; } else if (brpos && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z') && !strchr("/\"'", c)) { fprintf(stdout, "%c>", c); return 2; } return 0; } int docomment(const char *begin, const char *end, int newblock) { char *p; if (nohtml || strncmp(""); if (!p || p + 3 >= end) return 0; fprintf(stdout, "%.*s\n", (int)(p + 3 - begin), begin); return (p + 3 - begin) * (newblock ? -1 : 1); } int dohtml(const char *begin, const char *end, int newblock) { const char *p, *tag, *tagend; if (nohtml || begin + 2 >= end) return 0; p = begin; if (p[0] != '<' || !isalpha(p[1])) return 0; p++; tag = p; for (; isalnum(*p) && p < end; p++) ; tagend = p; if (p > end || tag == tagend) return 0; while ((p = strstr(p, "') { p++; fwrite(begin, sizeof(char), p - begin + tagend - tag - 1, stdout); return p - begin + tagend - tag - 1; } } p = strchr(tagend, '>'); if (p) { fwrite(begin, sizeof(char), p - begin + 1, stdout); return p - begin + 1; } else return 0; } int dolineprefix(const char *begin, const char *end, int newblock) { unsigned int i, j, l; char *buffer; const char *p; if (newblock) p = begin; else if (*begin == '\n') p = begin + 1; else return 0; for (i = 0; i < LENGTH(lineprefix); i++) { l = strlen(lineprefix[i].search); if (end - p < l) continue; if (strncmp(lineprefix[i].search, p, l)) continue; if (*begin == '\n') fputc('\n', stdout); fputs(lineprefix[i].before, stdout); if (lineprefix[i].search[l - 1] == '\n') { fputc('\n', stdout); return l - 1; } if (!(buffer = malloc(BUFSIZ))) eprint("Malloc failed."); buffer[0] = '\0'; /* Collect lines into buffer while they start with the prefix */ j = 0; while ((strncmp(lineprefix[i].search, p, l) == 0) && p + l < end) { p += l; /* Special case for blockquotes: optional space after > */ if (lineprefix[i].search[0] == '>' && *p == ' ') { p++; } while (p < end) { ADDC(buffer, j, *p); j++; if (*(p++) == '\n') break; } } /* Skip empty lines in block */ while (buffer + j - 1 >= buffer && *(buffer + j - 1) == '\n') { j--; } ADDC(buffer, j, '\0'); if (lineprefix[i].process) process(buffer, buffer + strlen(buffer), lineprefix[i].process >= 2); else hprint(buffer, buffer + strlen(buffer)); puts(lineprefix[i].after); free(buffer); return -(p - begin); } return 0; } int dolink(const char *begin, const char *end, int newblock) { int img, len, sep, parens_depth = 1; const char *desc, *link, *p, *q, *descend, *linkend; const char *title = NULL, *titleend = NULL; if (*begin == '[') img = 0; else if (strncmp(begin, "![", 2) == 0) img = 1; else return 0; p = desc = begin + 1 + img; if (!(p = strstr(desc, "](")) || p > end) return 0; for (q = strstr(desc, "!["); q && q < end && q < p; q = strstr(q + 1, "![")) if (!(p = strstr(p + 1, "](")) || p > end) return 0; descend = p; link = p + 2; /* find end of link while handling nested parens */ q = link; while (parens_depth) { if (!(q = strpbrk(q, "()")) || q > end) return 0; if (*q == '(') parens_depth++; else parens_depth--; if (parens_depth && q < end) q++; } if ((p = strpbrk(link, "\"'")) && p < end && q - 1 > p + 1) { sep = p[0]; /* separator: can be " or ' */ title = p + 1; /* strip trailing whitespace */ for (linkend = p; linkend > link && isspace(*(linkend - 1)); linkend--) ; for (titleend = q - 1; titleend > title && isspace(*(titleend)); titleend--) ; if (*titleend != sep) { return 0; } } else { linkend = q; } /* Links can be given in angular brackets */ if (*link == '<' && *(linkend - 1) == '>') { link++; linkend--; } len = q + 1 - begin; if (img) { fputs("\"",", stdout); } else { fputs("", stdout); process(desc, descend, 0); fputs("", stdout); } return len; } int dolist(const char *begin, const char *end, int newblock) { unsigned int i, j, indent, run, ul, isblock; const char *p, *q; char *buffer = NULL; char marker = 0; isblock = 0; if (newblock) p = begin; else if (*begin == '\n') p = begin + 1; else return 0; q = p; if (*p == '-' || *p == '*' || *p == '+') { ul = 1; marker = *p; } else { ul = 0; for (; p < end && *p >= '0' && *p <= '9'; p++) ; if (p >= end || *p != '.') return 0; } p++; if (p >= end || !(*p == ' ' || *p == '\t')) return 0; for (p++; p != end && (*p == ' ' || *p == '\t'); p++) ; indent = p - q; buffer = ereallocz(buffer, BUFSIZ); if (!newblock) fputc('\n', stdout); fputs(ul ? "
    \n" : "
      \n", stdout); run = 1; for (; p < end && run; p++) { for (i = 0; p < end && run; p++, i++) { if (*p == '\n') { if (p + 1 == end) break; else { /* Handle empty lines */ for (q = p + 1; (*q == ' ' || *q == '\t') && q < end; q++) ; if (*q == '\n') { ADDC(buffer, i, '\0'); i++; run = 0; isblock++; p = q; } } q = p + 1; j = 0; if (ul && *q == marker) j = 1; else if (!ul) { for (; q + j != end && q[j] >= '0' && q[j] <= '9' && j < indent; j++) ; if (q + j == end) break; if (j > 0 && q[j] == '.') j++; else j = 0; } if (q + indent < end) for (; (q[j] == ' ' || q[j] == '\t') && j < indent; j++) ; if (j == indent) { ADDC(buffer, i, '\n'); i++; p += indent; run = 1; if (*q == ' ' || *q == '\t') p++; else break; } else if (j < indent) run = 0; } ADDC(buffer, i, *p); } ADDC(buffer, i, '\0'); fputs("
    1. ", stdout); process(buffer, buffer + i, isblock > 1 || (isblock == 1 && run)); fputs("
    2. \n", stdout); } fputs(ul ? "
\n" : "\n", stdout); free(buffer); p--; while (*(--p) == '\n') ; return -(p - begin + 1); } int doparagraph(const char *begin, const char *end, int newblock) { const char *p; if (!newblock) return 0; p = strstr(begin, "\n\n"); if (!p || p > end) p = end; if (p - begin <= 1) return 0; fputs("

", stdout); process(begin, p, 0); fputs("

\n", stdout); return -(p - begin); } int doreplace(const char *begin, const char *end, int newblock) { unsigned int i, l; for (i = 0; i < LENGTH(insert); i++) if (strncmp(insert[i][0], begin, strlen(insert[i][0])) == 0) fputs(insert[i][1], stdout); for (i = 0; i < LENGTH(replace); i++) { l = strlen(replace[i][0]); if (end - begin < l) continue; if (strncmp(replace[i][0], begin, l) == 0) { fputs(replace[i][1], stdout); return l; } } return 0; } int doshortlink(const char *begin, const char *end, int newblock) { const char *p, *c; int ismail = 0; if (*begin != '<') return 0; for (p = begin + 1; p != end; p++) { switch (*p) { case ' ': case '\t': case '\n': return 0; case '#': case ':': ismail = -1; break; case '@': if (ismail == 0) ismail = 1; break; case '>': if (ismail == 0) return 0; fputs("", stdout); for (c = begin + 1; *c != '>'; c++) fprintf(stdout, "&#%u;", *c); } else { hprint(begin + 1, p); fputs("\">", stdout); hprint(begin + 1, p); } fputs("", stdout); return p - begin + 1; } } return 0; } int dosurround(const char *begin, const char *end, int newblock) { unsigned int i, l; const char *p, *start, *stop; for (i = 0; i < LENGTH(surround); i++) { l = strlen(surround[i].search); if (end - begin < 2 * l || strncmp(begin, surround[i].search, l) != 0) continue; start = begin + l; p = start - 1; do { stop = p; p = strstr(p + 1, surround[i].search); } while (p && p[-1] == '\\'); if (p && p[-1] != '\\') stop = p; if (!stop || stop < start || stop >= end) continue; fputs(surround[i].before, stdout); /* Single space at start and end are ignored */ if (stop - start > 1 && *start == ' ' && *(stop - 1) == ' ') { start++; stop--; l++; } if (surround[i].process) process(start, stop, 0); else hprint(start, stop); fputs(surround[i].after, stdout); return stop - begin + l; } return 0; } int dounderline(const char *begin, const char *end, int newblock) { unsigned int i, j, l; const char *p; if (!newblock) return 0; p = begin; for (l = 0; p + l + 1 != end && p[l] != '\n'; l++) ; p += l + 1; if (l == 0) return 0; for (i = 0; i < LENGTH(underline); i++) { for (j = 0; p + j != end && p[j] != '\n' && p[j] == underline[i].search[0]; j++) ; if (j >= l) { fputs(underline[i].before, stdout); if (underline[i].process) process(begin, begin + l, 0); else hprint(begin, begin + l); fputs(underline[i].after, stdout); return -(j + p - begin); } } return 0; } void *ereallocz(void *p, size_t size) { void *res; if (p) res = realloc(p, size); else res = calloc(1, size); if (!res) eprint("fatal: could not malloc() %u bytes\n", size); return res; } void hprint(const char *begin, const char *end) { const char *p; for (p = begin; p != end; p++) { if (*p == '&') fputs("&", stdout); else if (*p == '"') fputs(""", stdout); else if (*p == '>') fputs(">", stdout); else if (*p == '<') fputs("<", stdout); else fputc(*p, stdout); } } void process(const char *begin, const char *end, int newblock) { const char *q; const char *p; int affected; unsigned int i; for (p = begin; p < end;) { if (newblock) while (*p == '\n') if (++p == end) return; affected = 0; for (i = 0; i < LENGTH(parsers) && !affected; i++) affected = parsers[i](p, end, newblock); p += abs(affected); if (!affected) { if (nohtml) hprint(p, p + 1); else fputc(*p, stdout); p++; } for (q = p; q != end && *q == '\n'; q++) ; if (q == end) return; else if (p[0] == '\n' && p + 1 != end && p[1] == '\n') newblock = 1; else newblock = affected < 0; } } int main(int argc, char *argv[]) { char *buffer = NULL; const char *path = "STDIN"; int i; for (i = 1; i < argc; i++) { if (!strcmp("-v", argv[i])) eprint("simple markup %s (C) Enno Boland\n", VERSION); else if (!strcmp("-n", argv[i])) nohtml = 1; else if (argv[i][0] != '-') break; else if (!strcmp("--", argv[i])) { i++; break; } else eprint("Usage %s [-n] [file]\n -n escape html strictly\n", argv[0]); } if (i < argc) path = argv[i]; off64_t len = get_file_size(path); if (len == -1) { eprint("%s: %s: %s\n", argv[0], path, strerror(errno)); return EXIT_FAILURE; } buffer = read_file(path, len); if (!buffer) return EXIT_FAILURE; buffer[len] = '\0'; process(buffer, buffer + len, 1); free(buffer); return EXIT_SUCCESS; }