From: Michael Göhler Date: Wed, 20 Aug 2014 22:32:52 +0000 (+0200) Subject: add line length and make it utf-8 compatible X-Git-Url: https://git.danieliu.xyz/?a=commitdiff_plain;h=f2bc670605162224119269271f7c07459c9de0a8;p=smdp.git add line length and make it utf-8 compatible --- diff --git a/include/markdown.h b/include/markdown.h index b26765e..dc759e0 100644 --- a/include/markdown.h +++ b/include/markdown.h @@ -23,6 +23,7 @@ typedef struct _line_t { struct _line_t *prev; struct _line_t *next; int bits; + int length; int offset; } line_t; diff --git a/include/parser.h b/include/parser.h index 7ade17c..091c3d5 100644 --- a/include/parser.h +++ b/include/parser.h @@ -6,6 +6,7 @@ document_t *markdown_load(FILE *input); int markdown_analyse(cstring_t *text); int is_utf8(char ch); +int length_utf8(char ch); int next_nonblank(cstring_t *text, int i); int next_blank(cstring_t *text, int i); diff --git a/markdown.c b/markdown.c index 90219e5..0d42005 100644 --- a/markdown.c +++ b/markdown.c @@ -8,7 +8,7 @@ line_t *new_line() { line_t *x = malloc(sizeof(line_t)); x->text = (void*)0; x->prev = x->next = (void*)0; - x->bits = x->offset = 0; + x->bits = x->length = x->offset = 0; return x; } diff --git a/parser.c b/parser.c index 0343509..fddd8d8 100644 --- a/parser.c +++ b/parser.c @@ -5,7 +5,7 @@ document_t *markdown_load(FILE *input) { - int c = 0, i = 0, bits = 0; + int c = 0, i = 0, l = 0, bits = 0; document_t *doc; page_t *page; @@ -29,6 +29,10 @@ document_t *markdown_load(FILE *input) { // clear text (text->reset)(text); + + // reset line length + l = 0; + // create next page page = next_page(page); @@ -54,23 +58,48 @@ document_t *markdown_load(FILE *input) { // add bits to line line->bits = bits; + // add length to line + line->length = l; + // calc offset line->offset = next_nonblank(text, 0); // new text text = cstring_init(); + + // reset line length + l = 0; } } else if(c == '\t') { // expand tab to spaces - for (i = 0; i <= 4; i++) + for (i = 0; i <= 4; i++) { (text->expand)(text, ' '); + l++; + } - } else if(isprint(c) || isspace(c) || is_utf8(c)) { + } else if(isprint(c) || isspace(c)) { // add char to line (text->expand)(text, c); + + // increase line lenght + l++; + + } else if(is_utf8(c)) { + + // add char to line + (text->expand)(text, c); + + // if utf-8 char > 1 byte add remaing to line + for(i = 0; i < length_utf8(c) - 1; i++) { + c = fgetc(input); + (text->expand)(text, c); + } + + // increase line length + l++; } } @@ -207,6 +236,15 @@ int is_utf8(char ch) { return (ch & 0x80); } +int length_utf8(char ch) { + int i = 0; + while(ch & 0x80) { + i++; + ch <<= 1; + } + return i; +} + int next_nonblank(cstring_t *text, int i) { while ((i < text->size) && isspace((text->text)[i])) ++i; diff --git a/tmp.c b/tmp.c index eeec4a4..33b8603 100644 --- a/tmp.c +++ b/tmp.c @@ -69,7 +69,7 @@ int main(int argc, char *argv[]) { if(doc->header) { header = doc->header; while(header && - header->text->size > 0 && + header->length > 0 && header->text->text[0] == '%') { offset = next_blank(header->text, 0) + 1; @@ -92,7 +92,7 @@ int main(int argc, char *argv[]) { while(line) { cl++; if(debug > 1) { - fprintf(stderr, " line %i: bits = %i, length = %i\n", cl, line->bits, line->text->size); + fprintf(stderr, " line %i: bits = %i, length = %i\n", cl, line->bits, line->length); } line = line->next; }