
According to https://spec.commonmark.org/0.29/#code-spans : > If the resulting string both begins and ends with a space character, but does not consist entirely of space characters, a single space character is removed from the front and back. This allows you to include code that begins or ends with backtick characters, which must be separated by whitespace from the opening or closing backtick strings.
656 lines
15 KiB
C
656 lines
15 KiB
C
/* smu - simple markup
|
|
* Copyright (C) <2007, 2008> Enno Boland <g s01 de>
|
|
*
|
|
* See LICENSE for further informations
|
|
*/
|
|
#include <stdarg.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
|
|
#define LENGTH(x) sizeof(x)/sizeof(x[0])
|
|
#define ADDC(b,i) if(i % BUFSIZ == 0) { b = realloc(b, (i + BUFSIZ) * sizeof(char)); if(!b) eprint("Malloc failed."); } b[i]
|
|
|
|
typedef int (*Parser)(const char *, const char *, int);
|
|
typedef struct {
|
|
char *search;
|
|
int process;
|
|
char *before, *after;
|
|
} Tag;
|
|
|
|
static int doamp(const char *begin, const char *end, int newblock); /* Parser for & */
|
|
static int docomment(const char *begin, const char *end, int newblock); /* Parser for html-comments */
|
|
static int dogtlt(const char *begin, const char *end, int newblock); /* Parser for < and > */
|
|
static int dohtml(const char *begin, const char *end, int newblock); /* Parser for html */
|
|
static int dolineprefix(const char *begin, const char *end, int newblock);/* Parser for line prefix tags */
|
|
static int dolink(const char *begin, const char *end, int newblock); /* Parser for links and images */
|
|
static int dolist(const char *begin, const char *end, int newblock); /* Parser for lists */
|
|
static int doparagraph(const char *begin, const char *end, int newblock); /* Parser for paragraphs */
|
|
static int doreplace(const char *begin, const char *end, int newblock); /* Parser for simple replaces */
|
|
static int doshortlink(const char *begin, const char *end, int newblock); /* Parser for links and images */
|
|
static int dosurround(const char *begin, const char *end, int newblock); /* Parser for surrounding tags */
|
|
static int dounderline(const char *begin, const char *end, int newblock); /* Parser for underline tags */
|
|
static void *ereallocz(void *p, size_t size);
|
|
static void hprint(const char *begin, const char *end); /* escapes HTML and prints it to output */
|
|
static void process(const char *begin, const char *end, int isblock); /* Processes range between begin and end. */
|
|
|
|
/* list of parsers */
|
|
static Parser parsers[] = { dounderline, docomment, dolineprefix,
|
|
dolist, doparagraph, dogtlt, dosurround, dolink,
|
|
doshortlink, dohtml, doamp, doreplace };
|
|
static int nohtml = 0;
|
|
|
|
static Tag lineprefix[] = {
|
|
{ " ", 0, "<pre><code>", "\n</code></pre>" },
|
|
{ "\t", 0, "<pre><code>", "\n</code></pre>" },
|
|
{ "> ", 2, "<blockquote>", "</blockquote>" },
|
|
{ "###### ", 1, "<h6>", "</h6>" },
|
|
{ "##### ", 1, "<h5>", "</h5>" },
|
|
{ "#### ", 1, "<h4>", "</h4>" },
|
|
{ "### ", 1, "<h3>", "</h3>" },
|
|
{ "## ", 1, "<h2>", "</h2>" },
|
|
{ "# ", 1, "<h1>", "</h1>" },
|
|
{ "- - -\n", 1, "<hr />", ""},
|
|
};
|
|
|
|
static Tag underline[] = {
|
|
{ "=", 1, "<h1>", "</h1>\n" },
|
|
{ "-", 1, "<h2>", "</h2>\n" },
|
|
};
|
|
|
|
static Tag surround[] = {
|
|
{ "``", 0, "<code>", "</code>" },
|
|
{ "`", 0, "<code>", "</code>" },
|
|
{ "___", 1, "<strong><em>", "</em></strong>" },
|
|
{ "***", 1, "<strong><em>", "</em></strong>" },
|
|
{ "__", 1, "<strong>", "</strong>" },
|
|
{ "**", 1, "<strong>", "</strong>" },
|
|
{ "_", 1, "<em>", "</em>" },
|
|
{ "*", 1, "<em>", "</em>" },
|
|
};
|
|
|
|
static const char *replace[][2] = {
|
|
{ "\\\\", "\\" },
|
|
{ "\\`", "`" },
|
|
{ "\\*", "*" },
|
|
{ "\\_", "_" },
|
|
{ "\\{", "{" },
|
|
{ "\\}", "}" },
|
|
{ "\\[", "[" },
|
|
{ "\\]", "]" },
|
|
{ "\\(", "(" },
|
|
{ "\\)", ")" },
|
|
{ "\\#", "#" },
|
|
{ "\\+", "+" },
|
|
{ "\\-", "-" },
|
|
{ "\\.", "." },
|
|
{ "\\!", "!" },
|
|
};
|
|
|
|
static const char *insert[][2] = {
|
|
{ " \n", "<br />" },
|
|
};
|
|
|
|
void
|
|
eprint(const char *format, ...) {
|
|
va_list ap;
|
|
|
|
va_start(ap, format);
|
|
vfprintf(stderr, format, ap);
|
|
va_end(ap);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
int
|
|
doamp(const char *begin, const char *end, int newblock) {
|
|
const char *p;
|
|
|
|
if(*begin != '&')
|
|
return 0;
|
|
if(!nohtml) {
|
|
for(p = begin + 1; p != end && !strchr("; \\\n\t", *p); p++);
|
|
if(p == end || *p == ';')
|
|
return 0;
|
|
}
|
|
fputs("&", stdout);
|
|
return 1;
|
|
}
|
|
|
|
int
|
|
dogtlt(const char *begin, const char *end, int newblock) {
|
|
int brpos;
|
|
char c;
|
|
|
|
if(nohtml || begin + 1 >= end)
|
|
return 0;
|
|
brpos = begin[1] == '>';
|
|
if(!brpos && *begin != '<')
|
|
return 0;
|
|
c = begin[brpos ? 0 : 1];
|
|
if(!brpos && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z')) {
|
|
fputs("<", stdout);
|
|
return 1;
|
|
}
|
|
else if(brpos && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z') && !strchr("/\"'",c)) {
|
|
fprintf(stdout, "%c>",c);
|
|
return 2;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
docomment(const char *begin, const char *end, int newblock) {
|
|
char *p;
|
|
|
|
if(nohtml || strncmp("<!--", begin, 4))
|
|
return 0;
|
|
p = strstr(begin, "-->");
|
|
if(!p || p + 3 >= end)
|
|
return 0;
|
|
return (p + 3 - begin) * (newblock ? -1 : 1);
|
|
}
|
|
|
|
int
|
|
dohtml(const char *begin, const char *end, int newblock) {
|
|
const char *p, *tag, *tagend;
|
|
|
|
if(nohtml || begin + 2 >= end)
|
|
return 0;
|
|
p = begin;
|
|
if(p[0] != '<' || !isalpha(p[1]))
|
|
return 0;
|
|
p++;
|
|
tag = p;
|
|
for(; isalnum(*p) && p < end; p++);
|
|
tagend = p;
|
|
if(p > end || tag == tagend)
|
|
return 0;
|
|
while((p = strstr(p, "</")) && p < end) {
|
|
p += 2;
|
|
if(strncmp(p, tag, tagend - tag) == 0 && p[tagend - tag] == '>') {
|
|
p++;
|
|
fwrite(begin, sizeof(char), p - begin + tagend - tag + 1, stdout);
|
|
return p - begin + tagend - tag + 1;
|
|
}
|
|
}
|
|
p = strchr(tagend, '>');
|
|
if(p) {
|
|
fwrite(begin, sizeof(char), p - begin + 2, stdout);
|
|
return p - begin + 2;
|
|
}
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
dolineprefix(const char *begin, const char *end, int newblock) {
|
|
unsigned int i, j, l;
|
|
char *buffer;
|
|
const char *p;
|
|
|
|
if(newblock)
|
|
p = begin;
|
|
else if(*begin == '\n')
|
|
p = begin + 1;
|
|
else
|
|
return 0;
|
|
for(i = 0; i < LENGTH(lineprefix); i++) {
|
|
l = strlen(lineprefix[i].search);
|
|
if(end - p < l)
|
|
continue;
|
|
if(strncmp(lineprefix[i].search, p, l))
|
|
continue;
|
|
if(*begin == '\n')
|
|
fputc('\n', stdout);
|
|
fputs(lineprefix[i].before, stdout);
|
|
if(lineprefix[i].search[l-1] == '\n') {
|
|
fputc('\n', stdout);
|
|
return l;
|
|
}
|
|
if(!(buffer = malloc(BUFSIZ)))
|
|
eprint("Malloc failed.");
|
|
buffer[0] = '\0';
|
|
for(j = 0, p += l; p < end; p++, j++) {
|
|
ADDC(buffer, j) = *p;
|
|
if(*p == '\n' && p + l < end) {
|
|
if(strncmp(lineprefix[i].search, p + 1, l) != 0)
|
|
break;
|
|
p += l;
|
|
}
|
|
}
|
|
|
|
/* Skip empty lines in block */
|
|
while(*(buffer + j - 1) == '\n') {
|
|
j--;
|
|
}
|
|
|
|
ADDC(buffer, j) = '\0';
|
|
if(lineprefix[i].process)
|
|
process(buffer, buffer + strlen(buffer), lineprefix[i].process >= 2);
|
|
else
|
|
hprint(buffer, buffer + strlen(buffer));
|
|
puts(lineprefix[i].after);
|
|
free(buffer);
|
|
return -(p - begin);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
dolink(const char *begin, const char *end, int newblock) {
|
|
int img, len, sep, parens_depth = 1;
|
|
const char *desc, *link, *p, *q, *descend, *linkend;
|
|
const char *title = NULL, *titleend = NULL;
|
|
|
|
if(*begin == '[')
|
|
img = 0;
|
|
else if(strncmp(begin, ") || p > end)
|
|
return 0;
|
|
for(q = strstr(desc, ") || p > end)
|
|
return 0;
|
|
descend = p;
|
|
link = p + 2;
|
|
|
|
/* find end of link while handling nested parens */
|
|
q = link;
|
|
while(parens_depth) {
|
|
if(!(q = strpbrk(q, "()")) || q > end)
|
|
return 0;
|
|
if(*q == '(')
|
|
parens_depth++;
|
|
else
|
|
parens_depth--;
|
|
if(parens_depth && q < end)
|
|
q++;
|
|
}
|
|
|
|
if((p = strpbrk(link, "\"'")) && p < end && q > p) {
|
|
sep = p[0]; /* separator: can be " or ' */
|
|
title = p + 1;
|
|
/* strip trailing whitespace */
|
|
for(linkend = p; linkend > link && isspace(*(linkend - 1)); linkend--);
|
|
if(!(p = strchr(title, sep)) || q > end || p > q)
|
|
return 0;
|
|
titleend = p;
|
|
}
|
|
else {
|
|
linkend = q;
|
|
}
|
|
|
|
/* Links can be given in angular brackets */
|
|
if(*link == '<' && *(linkend - 1) == '>') {
|
|
link++;
|
|
linkend--;
|
|
}
|
|
|
|
len = q + 1 - begin;
|
|
if(img) {
|
|
fputs("<img src=\"", stdout);
|
|
hprint(link, linkend);
|
|
fputs("\" alt=\"", stdout);
|
|
hprint(desc, descend);
|
|
fputs("\" ", stdout);
|
|
if(title && titleend) {
|
|
fputs("title=\"", stdout);
|
|
hprint(title, titleend);
|
|
fputs("\" ", stdout);
|
|
}
|
|
fputs("/>", stdout);
|
|
}
|
|
else {
|
|
fputs("<a href=\"", stdout);
|
|
hprint(link, linkend);
|
|
fputs("\"", stdout);
|
|
if(title && titleend) {
|
|
fputs(" title=\"", stdout);
|
|
hprint(title, titleend);
|
|
fputs("\"", stdout);
|
|
}
|
|
fputs(">", stdout);
|
|
process(desc, descend, 0);
|
|
fputs("</a>", stdout);
|
|
}
|
|
return len;
|
|
}
|
|
|
|
int
|
|
dolist(const char *begin, const char *end, int newblock) {
|
|
unsigned int i, j, indent, run, ul, isblock;
|
|
const char *p, *q;
|
|
char *buffer = NULL;
|
|
|
|
isblock = 0;
|
|
if(newblock)
|
|
p = begin;
|
|
else if(*begin == '\n')
|
|
p = begin + 1;
|
|
else
|
|
return 0;
|
|
q = p;
|
|
if(*p == '-' || *p == '*' || *p == '+')
|
|
ul = 1;
|
|
else {
|
|
ul = 0;
|
|
for(; p < end && *p >= '0' && *p <= '9'; p++);
|
|
if(p >= end || *p != '.')
|
|
return 0;
|
|
}
|
|
p++;
|
|
if(p >= end || !(*p == ' ' || *p == '\t'))
|
|
return 0;
|
|
for(p++; p != end && (*p == ' ' || *p == '\t'); p++);
|
|
indent = p - q;
|
|
buffer = ereallocz(buffer, BUFSIZ);
|
|
if(!newblock)
|
|
fputc('\n', stdout);
|
|
fputs(ul ? "<ul>\n" : "<ol>\n", stdout);
|
|
run = 1;
|
|
for(; p < end && run; p++) {
|
|
for(i = 0; p < end && run; p++, i++) {
|
|
if(*p == '\n') {
|
|
if(p + 1 == end)
|
|
break;
|
|
else if(p[1] == '\n') {
|
|
p++;
|
|
ADDC(buffer, i) = '\n';
|
|
i++;
|
|
run = 0;
|
|
isblock++;
|
|
}
|
|
q = p + 1;
|
|
j = 0;
|
|
if(ul && (*q == '-' || *q == '*' || *q == '+'))
|
|
j = 1;
|
|
else if(!ul) {
|
|
for(; q + j != end && q[j] >= '0' && q[j] <= '9' && j < indent; j++);
|
|
if(q + j == end)
|
|
break;
|
|
if(j > 0 && q[j] == '.')
|
|
j++;
|
|
else
|
|
j = 0;
|
|
}
|
|
if(q + indent < end)
|
|
for(; (q[j] == ' ' || q[j] == '\t') && j < indent; j++);
|
|
if(j == indent) {
|
|
ADDC(buffer, i) = '\n';
|
|
i++;
|
|
p += indent;
|
|
run = 1;
|
|
if(*q == ' ' || *q == '\t')
|
|
p++;
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
ADDC(buffer, i) = *p;
|
|
}
|
|
ADDC(buffer, i) = '\0';
|
|
fputs("<li>", stdout);
|
|
process(buffer, buffer + i, isblock > 1 || (isblock == 1 && run));
|
|
fputs("</li>\n", stdout);
|
|
}
|
|
fputs(ul ? "</ul>\n" : "</ol>\n", stdout);
|
|
free(buffer);
|
|
p--;
|
|
while(*(--p) == '\n');
|
|
return -(p - begin + 1);
|
|
}
|
|
|
|
int
|
|
doparagraph(const char *begin, const char *end, int newblock) {
|
|
const char *p;
|
|
|
|
if(!newblock)
|
|
return 0;
|
|
p = strstr(begin, "\n\n");
|
|
if(!p || p > end)
|
|
p = end;
|
|
if(p - begin <= 1)
|
|
return 0;
|
|
fputs("<p>", stdout);
|
|
process(begin, p, 0);
|
|
fputs("</p>\n", stdout);
|
|
return -(p - begin);
|
|
}
|
|
|
|
int
|
|
doreplace(const char *begin, const char *end, int newblock) {
|
|
unsigned int i, l;
|
|
|
|
for(i = 0; i < LENGTH(insert); i++)
|
|
if(strncmp(insert[i][0], begin, strlen(insert[i][0])) == 0)
|
|
fputs(insert[i][1], stdout);
|
|
for(i = 0; i < LENGTH(replace); i++) {
|
|
l = strlen(replace[i][0]);
|
|
if(end - begin < l)
|
|
continue;
|
|
if(strncmp(replace[i][0], begin, l) == 0) {
|
|
fputs(replace[i][1], stdout);
|
|
return l;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
doshortlink(const char *begin, const char *end, int newblock) {
|
|
const char *p, *c;
|
|
int ismail = 0;
|
|
|
|
if(*begin != '<')
|
|
return 0;
|
|
for(p = begin + 1; p != end; p++) {
|
|
switch(*p) {
|
|
case ' ':
|
|
case '\t':
|
|
case '\n':
|
|
return 0;
|
|
case '#':
|
|
case ':':
|
|
ismail = -1;
|
|
break;
|
|
case '@':
|
|
if(ismail == 0)
|
|
ismail = 1;
|
|
break;
|
|
case '>':
|
|
if(ismail == 0)
|
|
return 0;
|
|
fputs("<a href=\"", stdout);
|
|
if(ismail == 1) {
|
|
/* mailto: */
|
|
fputs("mailto:", stdout);
|
|
for(c = begin + 1; *c != '>'; c++)
|
|
fprintf(stdout, "&#%u;", *c);
|
|
fputs("\">", stdout);
|
|
for(c = begin + 1; *c != '>'; c++)
|
|
fprintf(stdout, "&#%u;", *c);
|
|
}
|
|
else {
|
|
hprint(begin + 1, p);
|
|
fputs("\">", stdout);
|
|
hprint(begin + 1, p);
|
|
}
|
|
fputs("</a>", stdout);
|
|
return p - begin + 1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
dosurround(const char *begin, const char *end, int newblock) {
|
|
unsigned int i, l;
|
|
const char *p, *start, *stop;
|
|
|
|
for(i = 0; i < LENGTH(surround); i++) {
|
|
l = strlen(surround[i].search);
|
|
if(end - begin < 2*l || strncmp(begin, surround[i].search, l) != 0)
|
|
continue;
|
|
start = begin + l;
|
|
p = start - 1;
|
|
do {
|
|
stop = p;
|
|
p = strstr(p + 1, surround[i].search);
|
|
} while(p && p[-1] == '\\');
|
|
if (p && p[-1] != '\\')
|
|
stop = p;
|
|
if(!stop || stop < start || stop >= end)
|
|
continue;
|
|
fputs(surround[i].before, stdout);
|
|
|
|
/* Single space at start and end are ignored */
|
|
if (*start == ' ' && *(stop - 1) == ' ') {
|
|
start++;
|
|
stop--;
|
|
l++;
|
|
}
|
|
|
|
if(surround[i].process)
|
|
process(start, stop, 0);
|
|
else
|
|
hprint(start, stop);
|
|
fputs(surround[i].after, stdout);
|
|
return stop - begin + l;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
dounderline(const char *begin, const char *end, int newblock) {
|
|
unsigned int i, j, l;
|
|
const char *p;
|
|
|
|
if(!newblock)
|
|
return 0;
|
|
p = begin;
|
|
for(l = 0; p + l != end && p[l] != '\n'; l++);
|
|
p += l + 1;
|
|
if(l == 0)
|
|
return 0;
|
|
for(i = 0; i < LENGTH(underline); i++) {
|
|
for(j = 0; p + j != end && p[j] != '\n' && p[j] == underline[i].search[0]; j++);
|
|
if(j >= l) {
|
|
fputs(underline[i].before, stdout);
|
|
if(underline[i].process)
|
|
process(begin, begin + l, 0);
|
|
else
|
|
hprint(begin, begin + l);
|
|
fputs(underline[i].after, stdout);
|
|
return -(j + p - begin);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void *
|
|
ereallocz(void *p, size_t size) {
|
|
void *res;
|
|
if(p)
|
|
res = realloc(p , size);
|
|
else
|
|
res = calloc(1, size);
|
|
|
|
if(!res)
|
|
eprint("fatal: could not malloc() %u bytes\n", size);
|
|
return res;
|
|
}
|
|
|
|
void
|
|
hprint(const char *begin, const char *end) {
|
|
const char *p;
|
|
|
|
for(p = begin; p != end; p++) {
|
|
if(*p == '&')
|
|
fputs("&", stdout);
|
|
else if(*p == '"')
|
|
fputs(""", stdout);
|
|
else if(*p == '>')
|
|
fputs(">", stdout);
|
|
else if(*p == '<')
|
|
fputs("<", stdout);
|
|
else
|
|
fputc(*p, stdout);
|
|
}
|
|
}
|
|
|
|
void
|
|
process(const char *begin, const char *end, int newblock) {
|
|
const char *p, *q;
|
|
int affected;
|
|
unsigned int i;
|
|
|
|
for(p = begin; p < end;) {
|
|
if(newblock)
|
|
while(*p == '\n')
|
|
if(++p == end)
|
|
return;
|
|
affected = 0;
|
|
for(i = 0; i < LENGTH(parsers) && !affected; i++)
|
|
affected = parsers[i](p, end, newblock);
|
|
p += abs(affected);
|
|
if(!affected) {
|
|
if(nohtml)
|
|
hprint(p, p + 1);
|
|
else
|
|
fputc(*p, stdout);
|
|
p++;
|
|
}
|
|
for(q = p; q != end && *q == '\n'; q++);
|
|
if(q == end)
|
|
return;
|
|
else if(p[0] == '\n' && p + 1 != end && p[1] == '\n')
|
|
newblock = 1;
|
|
else
|
|
newblock = affected < 0;
|
|
}
|
|
}
|
|
|
|
int
|
|
main(int argc, char *argv[]) {
|
|
char *buffer = NULL;
|
|
int s, i;
|
|
unsigned long len, bsize;
|
|
FILE *source = stdin;
|
|
|
|
for(i = 1; i < argc; i++) {
|
|
if(!strcmp("-v", argv[i]))
|
|
eprint("simple markup %s (C) Enno Boland\n",VERSION);
|
|
else if(!strcmp("-n", argv[i]))
|
|
nohtml = 1;
|
|
else if(argv[i][0] != '-')
|
|
break;
|
|
else if(!strcmp("--", argv[i])) {
|
|
i++;
|
|
break;
|
|
}
|
|
else
|
|
eprint("Usage %s [-n] [file]\n -n escape html strictly\n", argv[0]);
|
|
}
|
|
if(i < argc && !(source = fopen(argv[i], "r")))
|
|
eprint("Cannot open file `%s`\n",argv[i]);
|
|
bsize = 2 * BUFSIZ;
|
|
buffer = ereallocz(buffer, bsize);
|
|
len = 0;
|
|
while((s = fread(buffer + len, 1, BUFSIZ, source))) {
|
|
len += s;
|
|
if(BUFSIZ + len + 1 > bsize) {
|
|
bsize += BUFSIZ;
|
|
if(!(buffer = realloc(buffer, bsize)))
|
|
eprint("realloc failed.");
|
|
}
|
|
}
|
|
buffer[len] = '\0';
|
|
process(buffer, buffer + len, 1);
|
|
fclose(source);
|
|
free(buffer);
|
|
return EXIT_SUCCESS;
|
|
}
|