scq/smu.c
Karl Bartel 1793559632 Ignore single space around code span
According to https://spec.commonmark.org/0.29/#code-spans :

> If the resulting string both begins and ends with a space character,
but does not consist entirely of space characters, a single space
character is removed from the front and back. This allows you to include
code that begins or ends with backtick characters, which must be
separated by whitespace from the opening or closing backtick strings.
2020-07-01 08:56:22 +02:00

656 lines
15 KiB
C

/* smu - simple markup
* Copyright (C) <2007, 2008> Enno Boland <g s01 de>
*
* See LICENSE for further informations
*/
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define LENGTH(x) sizeof(x)/sizeof(x[0])
#define ADDC(b,i) if(i % BUFSIZ == 0) { b = realloc(b, (i + BUFSIZ) * sizeof(char)); if(!b) eprint("Malloc failed."); } b[i]
typedef int (*Parser)(const char *, const char *, int);
typedef struct {
char *search;
int process;
char *before, *after;
} Tag;
static int doamp(const char *begin, const char *end, int newblock); /* Parser for & */
static int docomment(const char *begin, const char *end, int newblock); /* Parser for html-comments */
static int dogtlt(const char *begin, const char *end, int newblock); /* Parser for < and > */
static int dohtml(const char *begin, const char *end, int newblock); /* Parser for html */
static int dolineprefix(const char *begin, const char *end, int newblock);/* Parser for line prefix tags */
static int dolink(const char *begin, const char *end, int newblock); /* Parser for links and images */
static int dolist(const char *begin, const char *end, int newblock); /* Parser for lists */
static int doparagraph(const char *begin, const char *end, int newblock); /* Parser for paragraphs */
static int doreplace(const char *begin, const char *end, int newblock); /* Parser for simple replaces */
static int doshortlink(const char *begin, const char *end, int newblock); /* Parser for links and images */
static int dosurround(const char *begin, const char *end, int newblock); /* Parser for surrounding tags */
static int dounderline(const char *begin, const char *end, int newblock); /* Parser for underline tags */
static void *ereallocz(void *p, size_t size);
static void hprint(const char *begin, const char *end); /* escapes HTML and prints it to output */
static void process(const char *begin, const char *end, int isblock); /* Processes range between begin and end. */
/* list of parsers */
static Parser parsers[] = { dounderline, docomment, dolineprefix,
dolist, doparagraph, dogtlt, dosurround, dolink,
doshortlink, dohtml, doamp, doreplace };
static int nohtml = 0;
static Tag lineprefix[] = {
{ " ", 0, "<pre><code>", "\n</code></pre>" },
{ "\t", 0, "<pre><code>", "\n</code></pre>" },
{ "> ", 2, "<blockquote>", "</blockquote>" },
{ "###### ", 1, "<h6>", "</h6>" },
{ "##### ", 1, "<h5>", "</h5>" },
{ "#### ", 1, "<h4>", "</h4>" },
{ "### ", 1, "<h3>", "</h3>" },
{ "## ", 1, "<h2>", "</h2>" },
{ "# ", 1, "<h1>", "</h1>" },
{ "- - -\n", 1, "<hr />", ""},
};
static Tag underline[] = {
{ "=", 1, "<h1>", "</h1>\n" },
{ "-", 1, "<h2>", "</h2>\n" },
};
static Tag surround[] = {
{ "``", 0, "<code>", "</code>" },
{ "`", 0, "<code>", "</code>" },
{ "___", 1, "<strong><em>", "</em></strong>" },
{ "***", 1, "<strong><em>", "</em></strong>" },
{ "__", 1, "<strong>", "</strong>" },
{ "**", 1, "<strong>", "</strong>" },
{ "_", 1, "<em>", "</em>" },
{ "*", 1, "<em>", "</em>" },
};
static const char *replace[][2] = {
{ "\\\\", "\\" },
{ "\\`", "`" },
{ "\\*", "*" },
{ "\\_", "_" },
{ "\\{", "{" },
{ "\\}", "}" },
{ "\\[", "[" },
{ "\\]", "]" },
{ "\\(", "(" },
{ "\\)", ")" },
{ "\\#", "#" },
{ "\\+", "+" },
{ "\\-", "-" },
{ "\\.", "." },
{ "\\!", "!" },
};
static const char *insert[][2] = {
{ " \n", "<br />" },
};
void
eprint(const char *format, ...) {
va_list ap;
va_start(ap, format);
vfprintf(stderr, format, ap);
va_end(ap);
exit(EXIT_FAILURE);
}
int
doamp(const char *begin, const char *end, int newblock) {
const char *p;
if(*begin != '&')
return 0;
if(!nohtml) {
for(p = begin + 1; p != end && !strchr("; \\\n\t", *p); p++);
if(p == end || *p == ';')
return 0;
}
fputs("&amp;", stdout);
return 1;
}
int
dogtlt(const char *begin, const char *end, int newblock) {
int brpos;
char c;
if(nohtml || begin + 1 >= end)
return 0;
brpos = begin[1] == '>';
if(!brpos && *begin != '<')
return 0;
c = begin[brpos ? 0 : 1];
if(!brpos && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z')) {
fputs("&lt;", stdout);
return 1;
}
else if(brpos && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z') && !strchr("/\"'",c)) {
fprintf(stdout, "%c&gt;",c);
return 2;
}
return 0;
}
int
docomment(const char *begin, const char *end, int newblock) {
char *p;
if(nohtml || strncmp("<!--", begin, 4))
return 0;
p = strstr(begin, "-->");
if(!p || p + 3 >= end)
return 0;
return (p + 3 - begin) * (newblock ? -1 : 1);
}
int
dohtml(const char *begin, const char *end, int newblock) {
const char *p, *tag, *tagend;
if(nohtml || begin + 2 >= end)
return 0;
p = begin;
if(p[0] != '<' || !isalpha(p[1]))
return 0;
p++;
tag = p;
for(; isalnum(*p) && p < end; p++);
tagend = p;
if(p > end || tag == tagend)
return 0;
while((p = strstr(p, "</")) && p < end) {
p += 2;
if(strncmp(p, tag, tagend - tag) == 0 && p[tagend - tag] == '>') {
p++;
fwrite(begin, sizeof(char), p - begin + tagend - tag + 1, stdout);
return p - begin + tagend - tag + 1;
}
}
p = strchr(tagend, '>');
if(p) {
fwrite(begin, sizeof(char), p - begin + 2, stdout);
return p - begin + 2;
}
else
return 0;
}
int
dolineprefix(const char *begin, const char *end, int newblock) {
unsigned int i, j, l;
char *buffer;
const char *p;
if(newblock)
p = begin;
else if(*begin == '\n')
p = begin + 1;
else
return 0;
for(i = 0; i < LENGTH(lineprefix); i++) {
l = strlen(lineprefix[i].search);
if(end - p < l)
continue;
if(strncmp(lineprefix[i].search, p, l))
continue;
if(*begin == '\n')
fputc('\n', stdout);
fputs(lineprefix[i].before, stdout);
if(lineprefix[i].search[l-1] == '\n') {
fputc('\n', stdout);
return l;
}
if(!(buffer = malloc(BUFSIZ)))
eprint("Malloc failed.");
buffer[0] = '\0';
for(j = 0, p += l; p < end; p++, j++) {
ADDC(buffer, j) = *p;
if(*p == '\n' && p + l < end) {
if(strncmp(lineprefix[i].search, p + 1, l) != 0)
break;
p += l;
}
}
/* Skip empty lines in block */
while(*(buffer + j - 1) == '\n') {
j--;
}
ADDC(buffer, j) = '\0';
if(lineprefix[i].process)
process(buffer, buffer + strlen(buffer), lineprefix[i].process >= 2);
else
hprint(buffer, buffer + strlen(buffer));
puts(lineprefix[i].after);
free(buffer);
return -(p - begin);
}
return 0;
}
int
dolink(const char *begin, const char *end, int newblock) {
int img, len, sep, parens_depth = 1;
const char *desc, *link, *p, *q, *descend, *linkend;
const char *title = NULL, *titleend = NULL;
if(*begin == '[')
img = 0;
else if(strncmp(begin, "![", 2) == 0)
img = 1;
else
return 0;
p = desc = begin + 1 + img;
if(!(p = strstr(desc, "](")) || p > end)
return 0;
for(q = strstr(desc, "!["); q && q < end && q < p; q = strstr(q + 1, "!["))
if(!(p = strstr(p + 1, "](")) || p > end)
return 0;
descend = p;
link = p + 2;
/* find end of link while handling nested parens */
q = link;
while(parens_depth) {
if(!(q = strpbrk(q, "()")) || q > end)
return 0;
if(*q == '(')
parens_depth++;
else
parens_depth--;
if(parens_depth && q < end)
q++;
}
if((p = strpbrk(link, "\"'")) && p < end && q > p) {
sep = p[0]; /* separator: can be " or ' */
title = p + 1;
/* strip trailing whitespace */
for(linkend = p; linkend > link && isspace(*(linkend - 1)); linkend--);
if(!(p = strchr(title, sep)) || q > end || p > q)
return 0;
titleend = p;
}
else {
linkend = q;
}
/* Links can be given in angular brackets */
if(*link == '<' && *(linkend - 1) == '>') {
link++;
linkend--;
}
len = q + 1 - begin;
if(img) {
fputs("<img src=\"", stdout);
hprint(link, linkend);
fputs("\" alt=\"", stdout);
hprint(desc, descend);
fputs("\" ", stdout);
if(title && titleend) {
fputs("title=\"", stdout);
hprint(title, titleend);
fputs("\" ", stdout);
}
fputs("/>", stdout);
}
else {
fputs("<a href=\"", stdout);
hprint(link, linkend);
fputs("\"", stdout);
if(title && titleend) {
fputs(" title=\"", stdout);
hprint(title, titleend);
fputs("\"", stdout);
}
fputs(">", stdout);
process(desc, descend, 0);
fputs("</a>", stdout);
}
return len;
}
int
dolist(const char *begin, const char *end, int newblock) {
unsigned int i, j, indent, run, ul, isblock;
const char *p, *q;
char *buffer = NULL;
isblock = 0;
if(newblock)
p = begin;
else if(*begin == '\n')
p = begin + 1;
else
return 0;
q = p;
if(*p == '-' || *p == '*' || *p == '+')
ul = 1;
else {
ul = 0;
for(; p < end && *p >= '0' && *p <= '9'; p++);
if(p >= end || *p != '.')
return 0;
}
p++;
if(p >= end || !(*p == ' ' || *p == '\t'))
return 0;
for(p++; p != end && (*p == ' ' || *p == '\t'); p++);
indent = p - q;
buffer = ereallocz(buffer, BUFSIZ);
if(!newblock)
fputc('\n', stdout);
fputs(ul ? "<ul>\n" : "<ol>\n", stdout);
run = 1;
for(; p < end && run; p++) {
for(i = 0; p < end && run; p++, i++) {
if(*p == '\n') {
if(p + 1 == end)
break;
else if(p[1] == '\n') {
p++;
ADDC(buffer, i) = '\n';
i++;
run = 0;
isblock++;
}
q = p + 1;
j = 0;
if(ul && (*q == '-' || *q == '*' || *q == '+'))
j = 1;
else if(!ul) {
for(; q + j != end && q[j] >= '0' && q[j] <= '9' && j < indent; j++);
if(q + j == end)
break;
if(j > 0 && q[j] == '.')
j++;
else
j = 0;
}
if(q + indent < end)
for(; (q[j] == ' ' || q[j] == '\t') && j < indent; j++);
if(j == indent) {
ADDC(buffer, i) = '\n';
i++;
p += indent;
run = 1;
if(*q == ' ' || *q == '\t')
p++;
else
break;
}
}
ADDC(buffer, i) = *p;
}
ADDC(buffer, i) = '\0';
fputs("<li>", stdout);
process(buffer, buffer + i, isblock > 1 || (isblock == 1 && run));
fputs("</li>\n", stdout);
}
fputs(ul ? "</ul>\n" : "</ol>\n", stdout);
free(buffer);
p--;
while(*(--p) == '\n');
return -(p - begin + 1);
}
int
doparagraph(const char *begin, const char *end, int newblock) {
const char *p;
if(!newblock)
return 0;
p = strstr(begin, "\n\n");
if(!p || p > end)
p = end;
if(p - begin <= 1)
return 0;
fputs("<p>", stdout);
process(begin, p, 0);
fputs("</p>\n", stdout);
return -(p - begin);
}
int
doreplace(const char *begin, const char *end, int newblock) {
unsigned int i, l;
for(i = 0; i < LENGTH(insert); i++)
if(strncmp(insert[i][0], begin, strlen(insert[i][0])) == 0)
fputs(insert[i][1], stdout);
for(i = 0; i < LENGTH(replace); i++) {
l = strlen(replace[i][0]);
if(end - begin < l)
continue;
if(strncmp(replace[i][0], begin, l) == 0) {
fputs(replace[i][1], stdout);
return l;
}
}
return 0;
}
int
doshortlink(const char *begin, const char *end, int newblock) {
const char *p, *c;
int ismail = 0;
if(*begin != '<')
return 0;
for(p = begin + 1; p != end; p++) {
switch(*p) {
case ' ':
case '\t':
case '\n':
return 0;
case '#':
case ':':
ismail = -1;
break;
case '@':
if(ismail == 0)
ismail = 1;
break;
case '>':
if(ismail == 0)
return 0;
fputs("<a href=\"", stdout);
if(ismail == 1) {
/* mailto: */
fputs("&#x6D;&#x61;i&#x6C;&#x74;&#x6F;:", stdout);
for(c = begin + 1; *c != '>'; c++)
fprintf(stdout, "&#%u;", *c);
fputs("\">", stdout);
for(c = begin + 1; *c != '>'; c++)
fprintf(stdout, "&#%u;", *c);
}
else {
hprint(begin + 1, p);
fputs("\">", stdout);
hprint(begin + 1, p);
}
fputs("</a>", stdout);
return p - begin + 1;
}
}
return 0;
}
int
dosurround(const char *begin, const char *end, int newblock) {
unsigned int i, l;
const char *p, *start, *stop;
for(i = 0; i < LENGTH(surround); i++) {
l = strlen(surround[i].search);
if(end - begin < 2*l || strncmp(begin, surround[i].search, l) != 0)
continue;
start = begin + l;
p = start - 1;
do {
stop = p;
p = strstr(p + 1, surround[i].search);
} while(p && p[-1] == '\\');
if (p && p[-1] != '\\')
stop = p;
if(!stop || stop < start || stop >= end)
continue;
fputs(surround[i].before, stdout);
/* Single space at start and end are ignored */
if (*start == ' ' && *(stop - 1) == ' ') {
start++;
stop--;
l++;
}
if(surround[i].process)
process(start, stop, 0);
else
hprint(start, stop);
fputs(surround[i].after, stdout);
return stop - begin + l;
}
return 0;
}
int
dounderline(const char *begin, const char *end, int newblock) {
unsigned int i, j, l;
const char *p;
if(!newblock)
return 0;
p = begin;
for(l = 0; p + l != end && p[l] != '\n'; l++);
p += l + 1;
if(l == 0)
return 0;
for(i = 0; i < LENGTH(underline); i++) {
for(j = 0; p + j != end && p[j] != '\n' && p[j] == underline[i].search[0]; j++);
if(j >= l) {
fputs(underline[i].before, stdout);
if(underline[i].process)
process(begin, begin + l, 0);
else
hprint(begin, begin + l);
fputs(underline[i].after, stdout);
return -(j + p - begin);
}
}
return 0;
}
void *
ereallocz(void *p, size_t size) {
void *res;
if(p)
res = realloc(p , size);
else
res = calloc(1, size);
if(!res)
eprint("fatal: could not malloc() %u bytes\n", size);
return res;
}
void
hprint(const char *begin, const char *end) {
const char *p;
for(p = begin; p != end; p++) {
if(*p == '&')
fputs("&amp;", stdout);
else if(*p == '"')
fputs("&quot;", stdout);
else if(*p == '>')
fputs("&gt;", stdout);
else if(*p == '<')
fputs("&lt;", stdout);
else
fputc(*p, stdout);
}
}
void
process(const char *begin, const char *end, int newblock) {
const char *p, *q;
int affected;
unsigned int i;
for(p = begin; p < end;) {
if(newblock)
while(*p == '\n')
if(++p == end)
return;
affected = 0;
for(i = 0; i < LENGTH(parsers) && !affected; i++)
affected = parsers[i](p, end, newblock);
p += abs(affected);
if(!affected) {
if(nohtml)
hprint(p, p + 1);
else
fputc(*p, stdout);
p++;
}
for(q = p; q != end && *q == '\n'; q++);
if(q == end)
return;
else if(p[0] == '\n' && p + 1 != end && p[1] == '\n')
newblock = 1;
else
newblock = affected < 0;
}
}
int
main(int argc, char *argv[]) {
char *buffer = NULL;
int s, i;
unsigned long len, bsize;
FILE *source = stdin;
for(i = 1; i < argc; i++) {
if(!strcmp("-v", argv[i]))
eprint("simple markup %s (C) Enno Boland\n",VERSION);
else if(!strcmp("-n", argv[i]))
nohtml = 1;
else if(argv[i][0] != '-')
break;
else if(!strcmp("--", argv[i])) {
i++;
break;
}
else
eprint("Usage %s [-n] [file]\n -n escape html strictly\n", argv[0]);
}
if(i < argc && !(source = fopen(argv[i], "r")))
eprint("Cannot open file `%s`\n",argv[i]);
bsize = 2 * BUFSIZ;
buffer = ereallocz(buffer, bsize);
len = 0;
while((s = fread(buffer + len, 1, BUFSIZ, source))) {
len += s;
if(BUFSIZ + len + 1 > bsize) {
bsize += BUFSIZ;
if(!(buffer = realloc(buffer, bsize)))
eprint("realloc failed.");
}
}
buffer[len] = '\0';
process(buffer, buffer + len, 1);
fclose(source);
free(buffer);
return EXIT_SUCCESS;
}