/* smu - simple markup * Copyright (C) <2007, 2008> Enno boland * * See LICENSE for further informations */ #include #include #include #include #define BUFFERSIZE 512 #define LENGTH(x) sizeof(x)/sizeof(x[0]) #define ADDC(b,i) if(i % BUFFERSIZE == 0) \ { b = realloc(b, (i + BUFFERSIZE) * sizeof(b)); if(!b) eprint("Malloc failed."); } b[i] typedef int (*Parser)(const char *, const char *, int); struct Tag { char *search; int process; char *before, *after; }; void eprint(const char *format, ...); /* Prints error and exits */ int doamp(const char *begin, const char *end, int newblock); /* Parser for & */ int dogtlt(const char *begin, const char *end, int newblock); /* Parser for < and > */ int dohtml(const char *begin, const char *end, int newblock); /* Parser for html */ int dolineprefix(const char *begin, const char *end, int newblock); /* Parser for line prefix tags */ int dolink(const char *begin, const char *end, int newblock); /* Parser for links and images */ int dolist(const char *begin, const char *end, int newblock); /* Parser for lists */ int doparagraph(const char *begin, const char *end, int newblock); /* Parser for paragraphs */ int doreplace(const char *begin, const char *end, int newblock); /* Parser for simple replaces */ int doshortlink(const char *begin, const char *end, int newblock); /* Parser for links and images */ int dosurround(const char *begin, const char *end, int newblock); /* Parser for surrounding tags */ int dounderline(const char *begin, const char *end, int newblock); /* Parser for underline tags */ void hprint(const char *begin, const char *end); /* escapes HTML and prints it to stdout*/ void process(const char *begin, const char *end, int isblock); /* Processes range between begin and end. */ Parser parsers[] = { dounderline, dohtml, dolineprefix, dolist, doparagraph, dogtlt, dosurround, dolink, doshortlink, doamp, doreplace }; /* list of parsers */ FILE *source; unsigned int nohtml = 0; struct Tag lineprefix[] = { { " ", 0, "
", "
" }, { "\t", 0, "
", "
" }, { "> ", 2, "
", "
" }, { "###### ", 1, "
", "
" }, { "##### ", 1, "
", "
" }, { "#### ", 1, "

", "

" }, { "### ", 1, "

", "

" }, { "## ", 1, "

", "

" }, { "# ", 1, "

", "

" }, { "- - -\n", 1, "
", ""}, }; struct Tag underline[] = { { "=", 1, "

", "

\n" }, { "-", 1, "

", "

\n" }, }; struct Tag surround[] = { { "``", 0, "", "" }, { "`", 0, "", "" }, { "___", 1, "", "" }, { "***", 1, "", "" }, { "__", 1, "", "" }, { "**", 1, "", "" }, { "_", 1, "", "" }, { "*", 1, "", "" }, }; char * replace[][2] = { { "\\\\", "\\" }, { "\\`", "`" }, { "\\*", "*" }, { "\\_", "_" }, { "\\{", "{" }, { "\\}", "}" }, { "\\[", "[" }, { "\\]", "]" }, { "\\(", "(" }, { "\\)", ")" }, { "\\#", "#" }, { "\\+", "+" }, { "\\-", "-" }, { "\\.", "." }, { "\\!", "!" }, }; char * insert[][2] = { { " \n", "
" }, }; void eprint(const char *format, ...) { va_list ap; va_start(ap, format); vfprintf(stderr, format, ap); va_end(ap); exit(EXIT_FAILURE); } int doamp(const char *begin, const char *end, int newblock) { const char *p; if(*begin != '&') return 0; if(!nohtml) { for(p = begin + 1; p != end && !strchr("; \\\n\t", *p); p++); if(p == end || *p == ';') return 0; } fputs("&", stdout); return 1; } int dogtlt(const char *begin, const char *end, int newblock) { int brpos; char c; if(nohtml || begin + 1 >= end) return 0; brpos = begin[1] == '>'; if(!brpos && *begin != '<') return 0; c = begin[brpos ? 0 : 1]; if(!brpos && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z')) { fputs("<",stdout); return 1; } else if(brpos && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z') && !strchr("/\"'",c)) { printf("%c>",c); return 2; } return 0; } int dohtml(const char *begin, const char *end, int newblock) { const char *p, *tag, *tagend; if(nohtml || !newblock || *begin == '\n' || begin + 2 >= end) return 0; p = begin; if(p[1] == '\n') p++; if(p[1] != '<' || strchr(" /\n\t\\", p[2])) return 0; tag = p + 2; p += 2; for(; !strchr(" >", *p); p++); tagend = p; while((p = strstr(p, "\n') { p++; fwrite(begin, sizeof(char), p - begin + tagend - tag, stdout); puts("\n"); return -(p - begin + tagend - tag); } } return 0; } int dolineprefix(const char *begin, const char *end, int newblock) { unsigned int i, j, l; char *buffer; const char *p; if(newblock) p = begin; else if(*begin == '\n') p = begin + 1; else return 0; for(i = 0; i < LENGTH(lineprefix); i++) { l = strlen(lineprefix[i].search); if(end - p < l) continue; if(strncmp(lineprefix[i].search, p, l)) continue; if(*begin == '\n') fputc('\n', stdout); fputs(lineprefix[i].before, stdout); if(lineprefix[i].search[l-1] == '\n') { fputc('\n', stdout); return l; } if(!(buffer = malloc(BUFFERSIZE))) eprint("Malloc failed."); buffer[0] = '\0'; for(j = 0, p += l; p < end; p++, j++) { ADDC(buffer, j) = *p; if(*p == '\n' && p + l < end) { if(strncmp(lineprefix[i].search, p + 1, l) != 0) break; p += l; } } ADDC(buffer, j) = '\0'; if(lineprefix[i].process) process(buffer, buffer + strlen(buffer), lineprefix[i].process >= 2); else hprint(buffer, buffer + strlen(buffer)); puts(lineprefix[i].after); free(buffer); return -(p - begin); } return 0; } int dolink(const char *begin, const char *end, int newblock) { int img; const char *desc, *link, *p, *q, *descend, *linkend; if(*begin == '[') img = 0; else if(strncmp(begin, "![", 2) == 0) img = 1; else return 0; p = desc = begin + 1 + img; if(!(p = strstr(desc, "](")) || p > end) return 0; for(q = strstr(desc, "!["); q && q < end && q < p; q = strstr(q + 1, "![")) if(!(p = strstr(p + 1, "](")) || p > end) return 0; descend = p; link = p + 2; if(!(p = strstr(link, ")")) || p > end) return 0; linkend = p; if(img) { fputs("\"",", stdout); } else { fputs("", stdout); process(desc, descend, 0); fputs("", stdout); } return p + 1 - begin; } int dolist(const char *begin, const char *end, int newblock) { unsigned int i, j, indent, run, ul, isblock; const char *p, *q; char *buffer; isblock = 0; if(newblock) p = begin; else if(*begin == '\n') p = begin + 1; else return 0; q = p; if(*p == '-' || *p == '*' || *p == '+') ul = 1; else { ul = 0; for(; p < end && *p >= '0' && *p <= '9'; p++); if(p >= end || *p != '.') return 0; } p++; if(p >= end || !(*p == ' ' || *p == '\t')) return 0; for(p++; p != end && (*p == ' ' || *p == '\t'); p++); indent = p - q; if(!(buffer = malloc(BUFFERSIZE))) eprint("Malloc failed."); if(!newblock) putchar('\n'); fputs(ul ? "
    \n" : "
      \n", stdout); run = 1; for(; p < end && run; p++) { for(i = 0; p < end && run; p++, i++) { if(*p == '\n') { if(p + 1 == end) break; else if(p[1] == '\n') { p++; ADDC(buffer, i) = '\n'; i++; run = 0; isblock++; } q = p + 1; j = 0; if(ul && (*q == '-' || *q == '*' || *q == '+')) j = 1; else if(!ul) { for(; q + j != end && q[j] >= '0' && q[j] <= '9' && j < indent; j++); if(q + j == end) break; if(j > 0 && q[j] == '.') j++; else j = 0; } if(q + indent < end) for(; (q[j] == ' ' || q[j] == '\t') && j < indent; j++); if(j == indent) { ADDC(buffer, i) = '\n'; i++; p += indent; run = 1; if(*q == ' ' || *q == '\t') p++; else break; } } ADDC(buffer, i) = *p; } ADDC(buffer, i) = '\0'; fputs("
    1. ", stdout); process(buffer, buffer + i, isblock > 1 || (isblock == 1 && run)); fputs("
    2. \n", stdout); } fputs(ul ? "
\n" : "\n", stdout); free(buffer); p--; while(*(--p) == '\n'); return -(p - begin + 1); } int doparagraph(const char *begin, const char *end, int newblock) { const char *p; if(!newblock) return 0; p = strstr(begin, "\n\n"); if(!p || p > end) p = end; if(p - begin <= 1) return 0; fputs("

\n", stdout); process(begin, p, 0); fputs("

\n", stdout); return -(p - begin); } int doreplace(const char *begin, const char *end, int newblock) { unsigned int i, l; for(i = 0; i < LENGTH(insert); i++) if(strncmp(insert[i][0], begin, strlen(insert[i][0])) == 0) fputs(insert[i][1], stdout); for(i = 0; i < LENGTH(replace); i++) { l = strlen(replace[i][0]); if(end - begin < l) continue; if(strncmp(replace[i][0], begin, l) == 0) { fputs(replace[i][1], stdout); return l; } } return 0; } int doshortlink(const char *begin, const char *end, int newblock) { const char *p, *c; int ismail = 0; if(*begin != '<') return 0; for(p = begin + 1; p != end; p++) { switch(*p) { case ' ': case '\t': case '\n': return 0; case '#': case ':': ismail = -1; break; case '@': if(ismail == 0) ismail = 1; break; case '>': if(ismail == 0) return 0; fputs("", stdout); for(c = begin + 1; *c != '>'; c++) printf("&#%u;", *c); } else { hprint(begin + 1, p); fputs("\">", stdout); hprint(begin + 1, p); } fputs("", stdout); return p - begin + 1; } } return 0; } int dosurround(const char *begin, const char *end, int newblock) { unsigned int i, l; const char *p, *start, *stop; for(i = 0; i < LENGTH(surround); i++) { l = strlen(surround[i].search); if(end - begin < 2*l || strncmp(begin, surround[i].search, l) != 0) continue; start = begin + l; p = start - 1; do { p = strstr(p + 1, surround[i].search); } while(p && p[-1] == '\\'); if(!p || p >= end || !(stop = strstr(start, surround[i].search)) || stop >= end) continue; fputs(surround[i].before, stdout); if(surround[i].process) process(start, stop, 0); else hprint(start, stop); fputs(surround[i].after, stdout); return stop - begin + l; } return 0; } int dounderline(const char *begin, const char *end, int newblock) { unsigned int i, j, l; const char *p; if(!newblock) return 0; p = begin; for(l = 0; p + l != end && p[l] != '\n'; l++); p += l + 1; if(l == 0) return 0; for(i = 0; i < LENGTH(underline); i++) { for(j = 0; p + j != end && p[j] != '\n' && p[j] == underline[i].search[0]; j++); if(j >= l) { fputs(underline[i].before, stdout); if(underline[i].process) process(begin, begin + l, 0); else hprint(begin, begin + l); fputs(underline[i].after, stdout); return -(j + p - begin); } } return 0; } void hprint(const char *begin, const char *end) { const char *p; for(p = begin; p != end; p++) { if(*p == '&') fputs("&", stdout); else if(*p == '"') fputs(""", stdout); else if(*p == '>') fputs(">", stdout); else if(*p == '<') fputs("<", stdout); else putchar(*p); } } void process(const char *begin, const char *end, int newblock) { const char *p, *q; int affected; unsigned int i; for(p = begin; p != end;) { if(newblock) while(*p == '\n') if (++p == end) return; affected = 0; for(i = 0; i < LENGTH(parsers) && affected == 0; i++) affected = parsers[i](p, end, newblock); p += abs(affected); if(!affected) { if(nohtml) hprint(p, p + 1); else putchar(*p); p++; } for(q = p; q != end && *q == '\n'; q++); if(q == end) return; else if(p[0] == '\n' && p + 1 != end && p[1] == '\n') newblock = 1; else newblock = affected < 0; } } int main(int argc, char *argv[]) { char *buffer; int s; unsigned long len, bsize; source = stdin; if(argc > 1 && strcmp("-v", argv[1]) == 0) eprint("simple markup %s (C) Enno Boland\n",VERSION); else if(argc > 1 && strcmp("-h", argv[1]) == 0) eprint("Usage %s [-n] [file]\n -n escape html strictly\n",argv[0]); if(argc > 1 && strcmp("-n", argv[1]) == 0) nohtml = 1; if(argc > 1 + nohtml && strcmp("-", argv[1 + nohtml]) != 0 && !(source = fopen(argv[1 + nohtml],"r"))) eprint("Cannot open file `%s`\n",argv[1 + nohtml]); bsize = 2 * BUFFERSIZE; if(!(buffer = malloc(bsize))) eprint("Malloc failed."); len = 0; while((s = fread(buffer + len, 1, BUFFERSIZE, source))) { len += s; if(BUFFERSIZE + len + 1 > bsize) { bsize += BUFFERSIZE; if(!(buffer = realloc(buffer, bsize))) eprint("Malloc failed."); } } buffer[len] = '\0'; process(buffer, buffer + len, 1); fclose(source); free(buffer); return EXIT_SUCCESS; }