smu: Change file reading

This commit is contained in:
Enno Tensing 2025-01-21 19:51:56 +01:00
parent c702617acd
commit 29d314255d
Signed by: tenno
GPG key ID: 95265603BD36E66C

157
smu.c
View file

@ -1,60 +1,61 @@
/* smu - simple markup /* smu - simple markup
* Copyright (C) <2007, 2008> Enno Boland <g s01 de> * Copyright (C) <2007, 2008> Enno Boland <g s01 de>
* Copyright (C) 2025 Enno Tensing <tenno+smu@suij.in>
* *
* See LICENSE for further informations * See LICENSE for further informations
*/ */
#define _LARGEFILE64_SOURCE
#include <stdarg.h> #include <stdarg.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <ctype.h> #include <ctype.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <errno.h>
#define LENGTH(x) sizeof(x) / sizeof(x[0]) #define LENGTH(x) sizeof(x) / sizeof(x[0])
#define ADDC(b, i) \ #define ADDC(b, i, a) \
do { \
if (i % BUFSIZ == 0) { \ if (i % BUFSIZ == 0) { \
b = realloc(b, (i + BUFSIZ) * sizeof(char)); \ b = realloc(b, (i + BUFSIZ) * sizeof(char)); \
if (!b) \ if (!b) { \
eprint("Malloc failed."); \ eprint("Malloc failed."); \
return -1; \
} \ } \
b[i] } \
b[i] = a; \
} while (0)
typedef int (*Parser)(const char *, const char *, int); typedef int (*Parser)(const char *, const char *, int);
typedef struct { struct tag {
char *search; char *search;
int process; int process;
char *before, *after; char *before;
} Tag; char *after;
};
static int doamp(const char *begin, const char *end, off64_t get_file_size(const char *);
int newblock); /* Parser for & */ char *read_file(const char *, off64_t);
static int docomment(const char *begin, const char *end,
int newblock); /* Parser for html-comments */ static int doamp(const char *begin, const char *end, int newblock);
static int dogtlt(const char *begin, const char *end, static int docomment(const char *begin, const char *end, int newblock);
int newblock); /* Parser for < and > */ static int dogtlt(const char *begin, const char *end, int newblock);
static int dohtml(const char *begin, const char *end, static int dohtml(const char *begin, const char *end, int newblock);
int newblock); /* Parser for html */ static int dolineprefix(const char *begin, const char *end, int newblock);
static int dolineprefix(const char *begin, const char *end, static int dolink(const char *begin, const char *end, int newblock);
int newblock); /* Parser for line prefix tags */ static int dolist(const char *begin, const char *end, int newblock);
static int dolink(const char *begin, const char *end, static int doparagraph(const char *begin, const char *end, int newblock);
int newblock); /* Parser for links and images */ static int doreplace(const char *begin, const char *end, int newblock);
static int dolist(const char *begin, const char *end, static int doshortlink(const char *begin, const char *end, int newblock);
int newblock); /* Parser for lists */ static int dosurround(const char *begin, const char *end, int newblock);
static int doparagraph(const char *begin, const char *end, static int dounderline(const char *begin, const char *end, int newblock);
int newblock); /* Parser for paragraphs */
static int doreplace(const char *begin, const char *end,
int newblock); /* Parser for simple replaces */
static int doshortlink(const char *begin, const char *end,
int newblock); /* Parser for links and images */
static int dosurround(const char *begin, const char *end,
int newblock); /* Parser for surrounding tags */
static int dounderline(const char *begin, const char *end,
int newblock); /* Parser for underline tags */
static void *ereallocz(void *p, size_t size); static void *ereallocz(void *p, size_t size);
static void eprint(const char *format, ...); static void eprint(const char *format, ...);
static void hprint(const char *begin, static void hprint(const char *begin, const char *end);
const char *end); /* escapes HTML and prints it to output */ static void process(const char *begin, const char *end, int isblock);
static void process(const char *begin, const char *end,
int isblock); /* Processes range between begin and end. */
/* list of parsers */ /* list of parsers */
static Parser parsers[] = { dounderline, docomment, dolineprefix, dolist, static Parser parsers[] = { dounderline, docomment, dolineprefix, dolist,
@ -62,7 +63,7 @@ static Parser parsers[] = { dounderline, docomment, dolineprefix, dolist,
doshortlink, dohtml, doamp, doreplace }; doshortlink, dohtml, doamp, doreplace };
static int nohtml = 0; static int nohtml = 0;
static Tag lineprefix[] = { static struct tag lineprefix[] = {
{ " ", 0, "<pre><code>", "\n</code></pre>" }, { " ", 0, "<pre><code>", "\n</code></pre>" },
{ "\t", 0, "<pre><code>", "\n</code></pre>" }, { "\t", 0, "<pre><code>", "\n</code></pre>" },
{ ">", 2, "<blockquote>", "</blockquote>" }, { ">", 2, "<blockquote>", "</blockquote>" },
@ -75,12 +76,13 @@ static Tag lineprefix[] = {
{ "- - -\n", 1, "<hr />", "" }, { "- - -\n", 1, "<hr />", "" },
}; };
static Tag underline[] = { static struct tag underline[] = {
{ "=", 1, "<h1>", "</h1>\n" }, { "=", 1, "<h1>", "</h1>\n" },
{ "-", 1, "<h2>", "</h2>\n" }, { "-", 1, "<h2>", "</h2>\n" },
}; };
static Tag surround[] = { static struct tag surround[] = {
{ "```", 0, "<code>", "</code>" },
{ "``", 0, "<code>", "</code>" }, { "``", 0, "<code>", "</code>" },
{ "`", 0, "<code>", "</code>" }, { "`", 0, "<code>", "</code>" },
{ "___", 1, "<strong><em>", "</em></strong>" }, { "___", 1, "<strong><em>", "</em></strong>" },
@ -102,6 +104,42 @@ static const char *insert[][2] = {
{ " \n", "<br />" }, { " \n", "<br />" },
}; };
off64_t get_file_size(const char *path)
{
struct stat st;
if (stat(path, &st) == 0)
return st.st_size;
return -1;
}
char *read_file(const char *path, off64_t file_size)
{
int fd = open(path, O_LARGEFILE | O_NONBLOCK);
ssize_t bytes;
char *buf = calloc(file_size + 4, sizeof(char));
if (!buf) {
perror("");
close(fd);
return NULL;
}
bytes = read(fd, buf, file_size);
if (bytes != file_size) {
perror("");
close(fd);
free(buf);
return NULL;
}
close(fd);
return buf;
}
void eprint(const char *format, ...) void eprint(const char *format, ...)
{ {
va_list ap; va_list ap;
@ -109,7 +147,6 @@ void eprint(const char *format, ...)
va_start(ap, format); va_start(ap, format);
vfprintf(stderr, format, ap); vfprintf(stderr, format, ap);
va_end(ap); va_end(ap);
exit(EXIT_FAILURE);
} }
int doamp(const char *begin, const char *end, int newblock) int doamp(const char *begin, const char *end, int newblock)
@ -238,7 +275,7 @@ int dolineprefix(const char *begin, const char *end, int newblock)
} }
while (p < end) { while (p < end) {
ADDC(buffer, j) = *p; ADDC(buffer, j, *p);
j++; j++;
if (*(p++) == '\n') if (*(p++) == '\n')
break; break;
@ -250,7 +287,7 @@ int dolineprefix(const char *begin, const char *end, int newblock)
j--; j--;
} }
ADDC(buffer, j) = '\0'; ADDC(buffer, j, '\0');
if (lineprefix[i].process) if (lineprefix[i].process)
process(buffer, buffer + strlen(buffer), process(buffer, buffer + strlen(buffer),
lineprefix[i].process >= 2); lineprefix[i].process >= 2);
@ -399,7 +436,7 @@ int dolist(const char *begin, const char *end, int newblock)
q++) q++)
; ;
if (*q == '\n') { if (*q == '\n') {
ADDC(buffer, i) = '\n'; ADDC(buffer, i, '\0');
i++; i++;
run = 0; run = 0;
isblock++; isblock++;
@ -428,7 +465,7 @@ int dolist(const char *begin, const char *end, int newblock)
j++) j++)
; ;
if (j == indent) { if (j == indent) {
ADDC(buffer, i) = '\n'; ADDC(buffer, i, '\n');
i++; i++;
p += indent; p += indent;
run = 1; run = 1;
@ -439,9 +476,9 @@ int dolist(const char *begin, const char *end, int newblock)
} else if (j < indent) } else if (j < indent)
run = 0; run = 0;
} }
ADDC(buffer, i) = *p; ADDC(buffer, i, *p);
} }
ADDC(buffer, i) = '\0'; ADDC(buffer, i, '\0');
fputs("<li>", stdout); fputs("<li>", stdout);
process(buffer, buffer + i, process(buffer, buffer + i,
isblock > 1 || (isblock == 1 && run)); isblock > 1 || (isblock == 1 && run));
@ -640,7 +677,8 @@ void hprint(const char *begin, const char *end)
void process(const char *begin, const char *end, int newblock) void process(const char *begin, const char *end, int newblock)
{ {
const char *p, *q; const char *q;
const char *p;
int affected; int affected;
unsigned int i; unsigned int i;
@ -674,9 +712,8 @@ void process(const char *begin, const char *end, int newblock)
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
char *buffer = NULL; char *buffer = NULL;
int s, i; const char *path = "STDIN";
unsigned long len, bsize; int i;
FILE *source = stdin;
for (i = 1; i < argc; i++) { for (i = 1; i < argc; i++) {
if (!strcmp("-v", argv[i])) if (!strcmp("-v", argv[i]))
@ -692,22 +729,20 @@ int main(int argc, char *argv[])
eprint("Usage %s [-n] [file]\n -n escape html strictly\n", eprint("Usage %s [-n] [file]\n -n escape html strictly\n",
argv[0]); argv[0]);
} }
if (i < argc && !(source = fopen(argv[i], "r")))
eprint("Cannot open file `%s`\n", argv[i]); if (i < argc)
bsize = 2 * BUFSIZ; path = argv[i];
buffer = ereallocz(buffer, bsize);
len = 0; off64_t len = get_file_size(path);
while ((s = fread(buffer + len, 1, BUFSIZ, source))) { if (len == -1) {
len += s; eprint("%s: %s: %s\n", argv[0], path, strerror(errno));
if (BUFSIZ + len + 1 > bsize) { return EXIT_FAILURE;
bsize += BUFSIZ;
if (!(buffer = realloc(buffer, bsize)))
eprint("realloc failed.");
}
} }
buffer = read_file(path, len);
if (!buffer)
return EXIT_FAILURE;
buffer[len] = '\0'; buffer[len] = '\0';
process(buffer, buffer + len, 1); process(buffer, buffer + len, 1);
fclose(source);
free(buffer); free(buffer);
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }