571 lines
13 KiB
C
571 lines
13 KiB
C
/* smu - simple markup
|
|
* Copyright (C) <2007, 2008> Enno boland <g s01 de>
|
|
*
|
|
* See LICENSE for further informations
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <stdarg.h>
|
|
#include <string.h>
|
|
|
|
#define BUFFERSIZE 512
|
|
#define LENGTH(x) sizeof(x)/sizeof(x[0])
|
|
#define ADDC(b,i) if(i % BUFFERSIZE == 0) \
|
|
{ b = realloc(b, (i + BUFFERSIZE) * sizeof(b)); if(!b) eprint("Malloc failed."); } b[i]
|
|
|
|
|
|
typedef int (*Parser)(const char *, const char *, int);
|
|
struct Tag {
|
|
char *search;
|
|
int process;
|
|
char *before, *after;
|
|
};
|
|
|
|
|
|
void eprint(const char *format, ...); /* Prints error and exits */
|
|
int doamp(const char *begin, const char *end, int newblock);
|
|
/* Parser for & */
|
|
int dogtlt(const char *begin, const char *end, int newblock);
|
|
/* Parser for < and > */
|
|
int dohtml(const char *begin, const char *end, int newblock);
|
|
/* Parser for html */
|
|
int dolineprefix(const char *begin, const char *end, int newblock);
|
|
/* Parser for line prefix tags */
|
|
int dolink(const char *begin, const char *end, int newblock);
|
|
/* Parser for links and images */
|
|
int dolist(const char *begin, const char *end, int newblock);
|
|
/* Parser for lists */
|
|
int doparagraph(const char *begin, const char *end, int newblock);
|
|
/* Parser for paragraphs */
|
|
int doreplace(const char *begin, const char *end, int newblock);
|
|
/* Parser for simple replaces */
|
|
int doshortlink(const char *begin, const char *end, int newblock);
|
|
/* Parser for links and images */
|
|
int dosurround(const char *begin, const char *end, int newblock);
|
|
/* Parser for surrounding tags */
|
|
int dounderline(const char *begin, const char *end, int newblock);
|
|
/* Parser for underline tags */
|
|
void hprint(const char *begin, const char *end); /* escapes HTML and prints it to stdout*/
|
|
void process(const char *begin, const char *end, int isblock);
|
|
/* Processes range between begin and end. */
|
|
|
|
Parser parsers[] = { dounderline, dohtml, dolineprefix, dolist, doparagraph,
|
|
dogtlt, dosurround, dolink, doshortlink, doamp, doreplace };
|
|
/* list of parsers */
|
|
FILE *source;
|
|
unsigned int nohtml = 0;
|
|
struct Tag lineprefix[] = {
|
|
{ " ", 0, "<pre><code>", "</code></pre>" },
|
|
{ "\t", 0, "<pre><code>", "</code></pre>" },
|
|
{ "> ", 2, "<blockquote>", "</blockquote>" },
|
|
{ "###### ", 1, "<h6>", "</h6>" },
|
|
{ "##### ", 1, "<h5>", "</h5>" },
|
|
{ "#### ", 1, "<h4>", "</h4>" },
|
|
{ "### ", 1, "<h3>", "</h3>" },
|
|
{ "## ", 1, "<h2>", "</h2>" },
|
|
{ "# ", 1, "<h1>", "</h1>" },
|
|
{ "- - -\n", 1, "<hr />", ""},
|
|
};
|
|
struct Tag underline[] = {
|
|
{ "=", 1, "<h1>", "</h1>\n" },
|
|
{ "-", 1, "<h2>", "</h2>\n" },
|
|
};
|
|
struct Tag surround[] = {
|
|
{ "``", 0, "<code>", "</code>" },
|
|
{ "`", 0, "<code>", "</code>" },
|
|
{ "___", 1, "<strong><em>", "</em></strong>" },
|
|
{ "***", 1, "<strong><em>", "</em></strong>" },
|
|
{ "__", 1, "<strong>", "</strong>" },
|
|
{ "**", 1, "<strong>", "</strong>" },
|
|
{ "_", 1, "<em>", "</em>" },
|
|
{ "*", 1, "<em>", "</em>" },
|
|
};
|
|
char * replace[][2] = {
|
|
{ "\\\\", "\\" },
|
|
{ "\\`", "`" },
|
|
{ "\\*", "*" },
|
|
{ "\\_", "_" },
|
|
{ "\\{", "{" },
|
|
{ "\\}", "}" },
|
|
{ "\\[", "[" },
|
|
{ "\\]", "]" },
|
|
{ "\\(", "(" },
|
|
{ "\\)", ")" },
|
|
{ "\\#", "#" },
|
|
{ "\\+", "+" },
|
|
{ "\\-", "-" },
|
|
{ "\\.", "." },
|
|
{ "\\!", "!" },
|
|
};
|
|
char * insert[][2] = {
|
|
{ " \n", "<br />" },
|
|
};
|
|
|
|
void
|
|
eprint(const char *format, ...) {
|
|
va_list ap;
|
|
|
|
va_start(ap, format);
|
|
vfprintf(stderr, format, ap);
|
|
va_end(ap);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
int
|
|
doamp(const char *begin, const char *end, int newblock) {
|
|
const char *p;
|
|
|
|
if(*begin != '&')
|
|
return 0;
|
|
if(!nohtml) {
|
|
for(p = begin + 1; p != end && !strchr("; \\\n\t", *p); p++);
|
|
if(p == end || *p == ';')
|
|
return 0;
|
|
}
|
|
fputs("&", stdout);
|
|
return 1;
|
|
}
|
|
|
|
int
|
|
dogtlt(const char *begin, const char *end, int newblock) {
|
|
int brpos;
|
|
char c;
|
|
|
|
if(nohtml || begin + 1 >= end)
|
|
return 0;
|
|
brpos = begin[1] == '>';
|
|
if(!brpos && *begin != '<')
|
|
return 0;
|
|
c = begin[brpos ? 0 : 1];
|
|
if(!brpos && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z')) {
|
|
fputs("<",stdout);
|
|
return 1;
|
|
}
|
|
else if(brpos && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z') && !strchr("/\"'",c)) {
|
|
printf("%c>",c);
|
|
return 2;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
dohtml(const char *begin, const char *end, int newblock) {
|
|
const char *p, *tag, *tagend;
|
|
|
|
if(nohtml || !newblock || *begin == '\n' || begin + 2 >= end)
|
|
return 0;
|
|
p = begin;
|
|
if(p[1] == '\n')
|
|
p++;
|
|
if(p[1] != '<' || strchr(" /\n\t\\", p[2]))
|
|
return 0;
|
|
tag = p + 2;
|
|
p += 2;
|
|
for(; !strchr(" >", *p); p++);
|
|
tagend = p;
|
|
while((p = strstr(p, "\n</")) && p < end) {
|
|
p += 3;
|
|
if(strncmp(p, tag, tagend - tag) == 0 && p[tagend - tag] == '>') {
|
|
p++;
|
|
fwrite(begin, sizeof(char), p - begin + tagend - tag, stdout);
|
|
puts("\n");
|
|
return -(p - begin + tagend - tag);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
dolineprefix(const char *begin, const char *end, int newblock) {
|
|
unsigned int i, j, l;
|
|
char *buffer;
|
|
const char *p;
|
|
|
|
if(newblock)
|
|
p = begin;
|
|
else if(*begin == '\n')
|
|
p = begin + 1;
|
|
else
|
|
return 0;
|
|
for(i = 0; i < LENGTH(lineprefix); i++) {
|
|
l = strlen(lineprefix[i].search);
|
|
if(end - p < l)
|
|
continue;
|
|
if(strncmp(lineprefix[i].search, p, l))
|
|
continue;
|
|
if(*begin == '\n')
|
|
fputc('\n', stdout);
|
|
fputs(lineprefix[i].before, stdout);
|
|
if(lineprefix[i].search[l-1] == '\n') {
|
|
fputc('\n', stdout);
|
|
return l;
|
|
}
|
|
if(!(buffer = malloc(BUFFERSIZE)))
|
|
eprint("Malloc failed.");
|
|
buffer[0] = '\0';
|
|
for(j = 0, p += l; p < end; p++, j++) {
|
|
ADDC(buffer, j) = *p;
|
|
if(*p == '\n' && p + l < end) {
|
|
if(strncmp(lineprefix[i].search, p + 1, l) != 0)
|
|
break;
|
|
p += l;
|
|
}
|
|
}
|
|
ADDC(buffer, j) = '\0';
|
|
if(lineprefix[i].process)
|
|
process(buffer, buffer + strlen(buffer), lineprefix[i].process >= 2);
|
|
else
|
|
hprint(buffer, buffer + strlen(buffer));
|
|
puts(lineprefix[i].after);
|
|
free(buffer);
|
|
return -(p - begin);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
dolink(const char *begin, const char *end, int newblock) {
|
|
int img;
|
|
const char *desc, *link, *p, *q, *descend, *linkend;
|
|
|
|
if(*begin == '[')
|
|
img = 0;
|
|
else if(strncmp(begin, ") || p > end)
|
|
return 0;
|
|
for(q = strstr(desc, ") || p > end)
|
|
return 0;
|
|
descend = p;
|
|
link = p + 2;
|
|
if(!(p = strstr(link, ")")) || p > end)
|
|
return 0;
|
|
linkend = p;
|
|
if(img) {
|
|
fputs("<img src=\"", stdout);
|
|
hprint(link, linkend);
|
|
fputs("\" alt=\"", stdout);
|
|
hprint(desc, descend);
|
|
fputs("\" />", stdout);
|
|
}
|
|
else {
|
|
fputs("<a href=\"", stdout);
|
|
hprint(link, linkend);
|
|
fputs("\">", stdout);
|
|
process(desc, descend, 0);
|
|
fputs("</a>", stdout);
|
|
}
|
|
return p + 1 - begin;
|
|
}
|
|
|
|
int
|
|
dolist(const char *begin, const char *end, int newblock) {
|
|
unsigned int i, j, indent, run, ul, isblock;
|
|
const char *p, *q;
|
|
char *buffer;
|
|
|
|
isblock = 0;
|
|
if(newblock)
|
|
p = begin;
|
|
else if(*begin == '\n')
|
|
p = begin + 1;
|
|
else
|
|
return 0;
|
|
q = p;
|
|
if(*p == '-' || *p == '*' || *p == '+')
|
|
ul = 1;
|
|
else {
|
|
ul = 0;
|
|
for(; p < end && *p >= '0' && *p <= '9'; p++);
|
|
if(p >= end || *p != '.')
|
|
return 0;
|
|
}
|
|
p++;
|
|
if(p >= end || !(*p == ' ' || *p == '\t'))
|
|
return 0;
|
|
for(p++; p != end && (*p == ' ' || *p == '\t'); p++);
|
|
indent = p - q;
|
|
if(!(buffer = malloc(BUFFERSIZE)))
|
|
eprint("Malloc failed.");
|
|
if(!newblock)
|
|
putchar('\n');
|
|
fputs(ul ? "<ul>\n" : "<ol>\n", stdout);
|
|
run = 1;
|
|
for(; p < end && run; p++) {
|
|
for(i = 0; p < end && run; p++, i++) {
|
|
if(*p == '\n') {
|
|
if(p + 1 == end)
|
|
break;
|
|
else if(p[1] == '\n') {
|
|
p++;
|
|
ADDC(buffer, i) = '\n';
|
|
i++;
|
|
run = 0;
|
|
isblock++;
|
|
}
|
|
q = p + 1;
|
|
j = 0;
|
|
if(ul && (*q == '-' || *q == '*' || *q == '+'))
|
|
j = 1;
|
|
else if(!ul) {
|
|
for(; q + j != end && q[j] >= '0' && q[j] <= '9' && j < indent; j++);
|
|
if(q + j == end)
|
|
break;
|
|
if(j > 0 && q[j] == '.')
|
|
j++;
|
|
else
|
|
j = 0;
|
|
}
|
|
if(q + indent < end)
|
|
for(; (q[j] == ' ' || q[j] == '\t') && j < indent; j++);
|
|
if(j == indent) {
|
|
ADDC(buffer, i) = '\n';
|
|
i++;
|
|
p += indent;
|
|
run = 1;
|
|
if(*q == ' ' || *q == '\t')
|
|
p++;
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
ADDC(buffer, i) = *p;
|
|
}
|
|
ADDC(buffer, i) = '\0';
|
|
fputs("<li>", stdout);
|
|
process(buffer, buffer + i, isblock > 1 || (isblock == 1 && run));
|
|
fputs("</li>\n", stdout);
|
|
}
|
|
fputs(ul ? "</ul>\n" : "</ol>\n", stdout);
|
|
free(buffer);
|
|
p--;
|
|
while(*(--p) == '\n');
|
|
return -(p - begin + 1);
|
|
}
|
|
|
|
int
|
|
doparagraph(const char *begin, const char *end, int newblock) {
|
|
const char *p;
|
|
|
|
if(!newblock)
|
|
return 0;
|
|
p = strstr(begin, "\n\n");
|
|
if(!p || p > end)
|
|
p = end;
|
|
if(p - begin <= 1)
|
|
return 0;
|
|
fputs("<p>\n", stdout);
|
|
process(begin, p, 0);
|
|
fputs("</p>\n", stdout);
|
|
return -(p - begin);
|
|
}
|
|
|
|
int
|
|
doreplace(const char *begin, const char *end, int newblock) {
|
|
unsigned int i, l;
|
|
|
|
for(i = 0; i < LENGTH(insert); i++)
|
|
if(strncmp(insert[i][0], begin, strlen(insert[i][0])) == 0)
|
|
fputs(insert[i][1], stdout);
|
|
for(i = 0; i < LENGTH(replace); i++) {
|
|
l = strlen(replace[i][0]);
|
|
if(end - begin < l)
|
|
continue;
|
|
if(strncmp(replace[i][0], begin, l) == 0) {
|
|
fputs(replace[i][1], stdout);
|
|
return l;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
doshortlink(const char *begin, const char *end, int newblock) {
|
|
const char *p, *c;
|
|
int ismail = 0;
|
|
|
|
if(*begin != '<')
|
|
return 0;
|
|
for(p = begin + 1; p != end; p++) {
|
|
switch(*p) {
|
|
case ' ':
|
|
case '\t':
|
|
case '\n':
|
|
return 0;
|
|
case '#':
|
|
case ':':
|
|
ismail = -1;
|
|
break;
|
|
case '@':
|
|
if(ismail == 0)
|
|
ismail = 1;
|
|
break;
|
|
case '>':
|
|
if(ismail == 0)
|
|
return 0;
|
|
fputs("<a href=\"", stdout);
|
|
if(ismail == 1) {
|
|
/* mailto: */
|
|
fputs("mailto:", stdout);
|
|
for(c = begin + 1; *c != '>'; c++)
|
|
printf("&#%u;", *c);
|
|
fputs("\">", stdout);
|
|
for(c = begin + 1; *c != '>'; c++)
|
|
printf("&#%u;", *c);
|
|
}
|
|
else {
|
|
hprint(begin + 1, p);
|
|
fputs("\">", stdout);
|
|
hprint(begin + 1, p);
|
|
}
|
|
fputs("</a>", stdout);
|
|
return p - begin + 1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
dosurround(const char *begin, const char *end, int newblock) {
|
|
unsigned int i, l;
|
|
const char *p, *start, *stop;
|
|
|
|
for(i = 0; i < LENGTH(surround); i++) {
|
|
l = strlen(surround[i].search);
|
|
if(end - begin < 2*l || strncmp(begin, surround[i].search, l) != 0)
|
|
continue;
|
|
start = begin + l;
|
|
p = start - 1;
|
|
do {
|
|
p = strstr(p + 1, surround[i].search);
|
|
} while(p && p[-1] == '\\');
|
|
if(!p || p >= end ||
|
|
!(stop = strstr(start, surround[i].search)) || stop >= end)
|
|
continue;
|
|
fputs(surround[i].before, stdout);
|
|
if(surround[i].process)
|
|
process(start, stop, 0);
|
|
else
|
|
hprint(start, stop);
|
|
fputs(surround[i].after, stdout);
|
|
return stop - begin + l;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
dounderline(const char *begin, const char *end, int newblock) {
|
|
unsigned int i, j, l;
|
|
const char *p;
|
|
|
|
if(!newblock)
|
|
return 0;
|
|
p = begin;
|
|
for(l = 0; p + l != end && p[l] != '\n'; l++);
|
|
p += l + 1;
|
|
if(l == 0)
|
|
return 0;
|
|
for(i = 0; i < LENGTH(underline); i++) {
|
|
for(j = 0; p + j != end && p[j] != '\n' && p[j] == underline[i].search[0]; j++);
|
|
if(j >= l) {
|
|
fputs(underline[i].before, stdout);
|
|
if(underline[i].process)
|
|
process(begin, begin + l, 0);
|
|
else
|
|
hprint(begin, begin + l);
|
|
fputs(underline[i].after, stdout);
|
|
return -(j + p - begin);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
hprint(const char *begin, const char *end) {
|
|
const char *p;
|
|
|
|
for(p = begin; p != end; p++) {
|
|
if(*p == '&')
|
|
fputs("&", stdout);
|
|
else if(*p == '"')
|
|
fputs(""", stdout);
|
|
else if(*p == '>')
|
|
fputs(">", stdout);
|
|
else if(*p == '<')
|
|
fputs("<", stdout);
|
|
else
|
|
putchar(*p);
|
|
}
|
|
}
|
|
|
|
void
|
|
process(const char *begin, const char *end, int newblock) {
|
|
const char *p, *q;
|
|
int affected;
|
|
unsigned int i;
|
|
|
|
for(p = begin; p != end;) {
|
|
if(newblock)
|
|
while(*p == '\n')
|
|
if (++p == end)
|
|
return;
|
|
affected = 0;
|
|
for(i = 0; i < LENGTH(parsers) && affected == 0; i++)
|
|
affected = parsers[i](p, end, newblock);
|
|
p += abs(affected);
|
|
if(!affected) {
|
|
if(nohtml)
|
|
hprint(p, p + 1);
|
|
else
|
|
putchar(*p);
|
|
p++;
|
|
}
|
|
for(q = p; q != end && *q == '\n'; q++);
|
|
if(q == end)
|
|
return;
|
|
else if(p[0] == '\n' && p + 1 != end && p[1] == '\n')
|
|
newblock = 1;
|
|
else
|
|
newblock = affected < 0;
|
|
}
|
|
}
|
|
|
|
int
|
|
main(int argc, char *argv[]) {
|
|
char *buffer;
|
|
int s;
|
|
unsigned long len, bsize;
|
|
|
|
source = stdin;
|
|
if(argc > 1 && strcmp("-v", argv[1]) == 0)
|
|
eprint("simple markup %s (C) Enno Boland\n",VERSION);
|
|
else if(argc > 1 && strcmp("-h", argv[1]) == 0)
|
|
eprint("Usage %s [-n] [file]\n -n escape html strictly\n",argv[0]);
|
|
if(argc > 1 && strcmp("-n", argv[1]) == 0)
|
|
nohtml = 1;
|
|
if(argc > 1 + nohtml && strcmp("-", argv[1 + nohtml]) != 0
|
|
&& !(source = fopen(argv[1 + nohtml],"r")))
|
|
eprint("Cannot open file `%s`\n",argv[1 + nohtml]);
|
|
bsize = 2 * BUFFERSIZE;
|
|
if(!(buffer = malloc(bsize)))
|
|
eprint("Malloc failed.");
|
|
len = 0;
|
|
while((s = fread(buffer + len, 1, BUFFERSIZE, source))) {
|
|
len += s;
|
|
if(BUFFERSIZE + len + 1 > bsize) {
|
|
bsize += BUFFERSIZE;
|
|
if(!(buffer = realloc(buffer, bsize)))
|
|
eprint("Malloc failed.");
|
|
}
|
|
}
|
|
buffer[len] = '\0';
|
|
process(buffer, buffer + len, 1);
|
|
fclose(source);
|
|
free(buffer);
|
|
return EXIT_SUCCESS;
|
|
}
|