From: Paul Hänsch Date: Fri, 30 Aug 2024 20:02:10 +0000 (+0200) Subject: Squashed 'cgilite/' changes from 67422ab..898d470 X-Git-Url: https://git.plutz.net/?a=commitdiff_plain;h=4c308f0e436880f762451b9fbdbda38a85644b6e;p=shellwiki Squashed 'cgilite/' changes from 67422ab..898d470 898d470 optimized emphasis regex for performance in mawk 42028f5 improved tests for nested emphasis 98df52b modify regexes for use in mawk b6f82bc avoid recursion in _block function to increase compatibility c46fa28 avoid some recursion vor better compatibility and performance 1744198 test wiki links and some full pages b329161 bugfix Wiki Links, bugfix regex syntax 1653669 compatibility changes: no gensub function 7ba97e6 include mawk tests 61d13e2 tests for references and macros fd3f517 additional tests d8f6cff additional checks, check different awk implementations ccc93a0 minor fix: in fenced divs and fenced code blocks, omit empty class attribute 71c7ca6 additional checks, minor "bug"fix: no newline befor definition lists (for consistency) 3d649b9 more tests 798cbf8 test cases for markdown processor git-subtree-dir: cgilite git-subtree-split: 898d470f90e4055d0bcfe616bc009cca8d7f5692 --- diff --git a/markdown.awk b/markdown.awk index cb2a8f6..6e1440c 100755 --- a/markdown.awk +++ b/markdown.awk @@ -121,213 +121,265 @@ function URL ( text, sharp ) { return text; } -function inline( line, LOCAL, len, text, code, href, guard ) { - if ( line ~ /^$/ ) { # Recursion End - return ""; - - # omit processing of escaped characters - } else if ( line ~ /^\\./) { - return HTML(substr(line, 2, 1)) inline( substr(line, 3) ); - - # hard brakes - } else if ( match(line, /^ \n/) ) { - return "
\n" inline( substr(line, RLENGTH + 1) ); - - # ``code spans`` - } else if ( match( line, /^`+/) ) { - len = RLENGTH - guard = substr( line, 1, len ) - if ( match(line, guard ".*" guard) ) { - code = substr( line, len + 1, match( substr(line, len + 1), guard ) - 1) - len = 2 * length(guard) + length(code) - # strip single surrounding white spaces - gsub( /^ | $/, "", code) - # escape HTML within code span - gsub( /&/, "\\&", code ); gsub( //, "\\>", code ); - return "" code "" inline( substr( line, len + 1 ) ) - } +function inline( line, LOCAL, len, text, code, href, guard, ret ) { + ret = ""; + while (line !~ /^$/) { + # omit processing of escaped characters + if ( line ~ /^\\./) { + ret = ret HTML(substr(line, 2, 1)); line = substr(line, 3); + continue; + + # hard brakes + } else if ( match(line, /^ \n/) ) { + ret = ret "
\n"; line = substr(line, RLENGTH + 1); + continue; + + # ``code spans`` + } else if ( match( line, /^`+/) ) { + len = RLENGTH + guard = substr( line, 1, len ) + if ( match(line, guard ".*" guard) ) { + code = substr( line, len + 1, match( substr(line, len + 1), guard ) - 1) + len = 2 * length(guard) + length(code) + # strip single surrounding white spaces + gsub( /^ | $/, "", code) + # escape HTML within code span + gsub( /&/, "\\&", code ); gsub( //, "\\>", code ); + ret = ret "" code ""; line = substr( line, len + 1 ); + continue; + } - # Macros - } else if ( match( line, /^<<([^>]|>[^>])+>>/ ) ) { - len = RLENGTH; - return "" HTML( substr( line, 3, len - 4 ) ) "" inline(substr(line, len + 1)); - - # Wiki style links - } else if ( match( line, /^\[\[([^]|]+)(\|[^]]+)?\]\]/) ) { - len = RLENGTH; - href = gensub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\1", 1, substr(line, 1, len) ); - text = gensub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\3", 1, substr(line, 1, len) ); - if ( ! text ) text = href; - return "" HTML(text) "" inline( substr( line, len + 1) ); - - # quick links ("automatic links" in md doc) - } else if ( match( line, /^<[a-zA-Z]+:\/\/([-\.[:alnum:]]+)(:[0-9]*)?(\/[^>]*)?>/ ) ) { - len = RLENGTH; - href = HTML( substr( line, 2, len - 2) ); - return "" href "" inline( substr( line, len + 1) ); - - # quick link email - } else if ( match( line, /^<[a-zA-Z0-9.!#$%&'\''*+\/=?^_`{|}~-]+@[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>/ ) ) { - len = RLENGTH; - href = HTML( substr( line, 2, len - 2) ); - return "" href "" inline( substr( line, len + 1) ); - - # Verbatim inline HTML - } else if ( AllowHTML && match( line, /^(|<\?([^\?]|\?[^>])*\?>|]*>|])*\]\]>|<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)/) ) { - len = RLENGTH; - return substr( line, 1, len) inline(substr(line, len + 1)); - - # inline links - } else if ( match(line, "^" lii "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)") ) { - len = RLENGTH; - text = href = title = substr( line, 1, len); - sub("^\\[", "", text); sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)$", "", text); - sub("^" lii "\\([\n\t ]*", "", href); sub("([\n\t ]+" lit ")?[\n\t ]*\\)$", "", href); - sub("^" lii "\\([\n\t ]*" lid, "", title); sub("[\n\t ]*\\)$", "", title); sub("^[\n\t ]+", "", title); - - if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } - if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } - else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); } - else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); } - - gsub(/\\/, "", href); gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title); - - return "" \ - inline( text ) "" inline( substr( line, len + 1) ); - - # reference style links - } else if ( match(line, /^\[([^]]+)\] ?\[([^]]*)\]/ ) ) { - len = RLENGTH; - text = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) ); - id = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); - if ( ! id ) id = text; - if ( rl_href[id] && rl_title[id] ) { - return "" inline(text) "" inline( substr( line, len + 1) ); - } else if ( rl_href[id] ) { - return "" inline(text) "" inline( substr( line, len + 1) ); - } else { - return "" HTML(substr(line, 1, len)) inline( substr(line, len + 1) ); - } + # Macros + } else if ( match( line, /^<<([^>]|>[^>])+>>/ ) ) { + len = RLENGTH; + ret = ret "" HTML( substr( line, 3, len - 4 ) ) ""; line = substr(line, len + 1); + continue; + + # Wiki style links + } else if ( match( line, /^\[\[([^]|]+)(\|[^]]+)?\]\]/) ) { + len = RLENGTH; href = text = substr(line, 1, len); + sub(/^\[\[/, "", href); sub(/(\|([^]]+))?\]\].*$/, "", href); + sub(/^\[\[([^]|]+)/, "", text); sub(/\]\].*$/, "", text); sub(/^\|/, "", text); + # sub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\1", href ); + # sub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\3", text ); + if ( ! text ) text = href; + ret = ret "" HTML(text) ""; line = substr( line, len + 1); + continue; + + # quick links ("automatic links" in md doc) + } else if ( match( line, /^<[a-zA-Z]+:\/\/([-\.[:alnum:]]+)(:[0-9]*)?(\/[^>]*)?>/ ) ) { + len = RLENGTH; + href = HTML( substr( line, 2, len - 2) ); + ret = ret "" href ""; line = substr( line, len + 1); + continue; + + # quick link email + # } else if ( match( line, /^<[a-zA-Z0-9.!#$%&'\''*+\/=?^_`{|}~-]+@[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>/ ) ) { + } else if ( match( line, /^<[a-zA-Z0-9.!#$%&'\''*+\/=?^_`{|}~-]+@[a-zA-Z0-9]([a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9])?)*>/ ) ) { + len = RLENGTH; + href = HTML( substr( line, 2, len - 2) ); + ret = ret "" href ""; line = substr( line, len + 1); + continue; + + # Verbatim inline HTML + } else if ( AllowHTML && match( line, /^(|<\?([^\?]|\?[^>])*\?>|]*>|])*\]\]>|<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)/) ) { + len = RLENGTH; + ret = ret substr( line, 1, len); line =substr(line, len + 1); + continue; + + # inline links + } else if ( match(line, "^" lii "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)") ) { + len = RLENGTH; + text = href = title = substr( line, 1, len); + sub("^\\[", "", text); sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)$", "", text); + sub("^" lii "\\([\n\t ]*", "", href); sub("([\n\t ]+" lit ")?[\n\t ]*\\)$", "", href); + sub("^" lii "\\([\n\t ]*" lid, "", title); sub("[\n\t ]*\\)$", "", title); sub("^[\n\t ]+", "", title); + + if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } + if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } + else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); } + else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); } + + gsub(/\\/, "", href); gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title); + + ret = ret "" \ + inline( text ) ""; + line = substr( line, len + 1); + continue; + + # reference style links + } else if ( match(line, /^\[([^]]+)\] ?\[([^]]*)\]/ ) ) { + len = RLENGTH; text = id = substr(line, 1, len); + sub(/\n.*$/, "", text); sub(/^\[/, "", text); sub(/\] ?\[([^\n]*)\].*$/, "", text); + sub(/\n.*$/, "", id); sub(/^\[([^]]+)\] ?\[/, "", id); sub(/\].*$/, "", id); + # text = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, text ); + # id = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, id ); + if ( ! id ) id = text; + + if ( rl_href[id] && rl_title[id] ) { + ret = ret "" inline(text) ""; + line = substr( line, len + 1); + continue; + + } else if ( rl_href[id] ) { + ret = ret "" inline(text) ""; line = substr( line, len + 1); + continue; + + } else { + ret = ret "" HTML(substr(line, 1, len)); line = substr(line, len + 1); + continue; + } + + # inline images + } else if ( match(line, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?") ) { + len = RLENGTH; text = href = title = attrib = substr( line, 1, len); + + sub("^!\\[", "", text); + sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", text); + + sub("^!" lix "\\([\n\t ]*", "", href); + sub("([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", href); + + sub("^!" lix "\\([\n\t ]*" lid, "", title); + sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", title); + sub("^[\n\t ]+", "", title); + + sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib); + sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib); + + if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } + if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } + else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); } + else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); } + + gsub(/^[\t ]+$/, "", text); gsub(/\\/, "", href); + gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title); + + ret = ret "\"""; + line = substr( line, len + 1); + continue; + + # reference style images + } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\]/ ) ) { + len = RLENGTH; text = id = substr(line, 1, len); + sub(/\n.*$/, "", text); sub(/^!\[/, "", text); sub(/\] ?\[([^\n]*)\].*$/, "", text); + sub(/\n.*$/, "", id); sub(/^!\[([^]]+)\] ?\[/, "", id); sub(/\].*$/, "", id); + # text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) ); + # id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); + if ( ! id ) id = text; + if ( rl_href[id] && rl_title[id] ) { + ret = ret "\"""; + line = substr( line, len + 1); + continue; + + } else if ( rl_href[id] ) { + ret = ret "\"""; + line = substr( line, len + 1); + continue; + + } else { + ret = ret "" HTML(substr(line, 1, len)); line = substr(line, len + 1); + continue; + } - # inline images - } else if ( match(line, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?") ) { - len = RLENGTH; text = href = title = attrib = substr( line, 1, len); - - sub("^!\\[", "", text); - sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", text); - - sub("^!" lix "\\([\n\t ]*", "", href); - sub("([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", href); - - sub("^!" lix "\\([\n\t ]*" lid, "", title); - sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", title); - sub("^[\n\t ]+", "", title); - - sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib); - sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib); - - if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } - if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } - else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); } - else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); } - - gsub(/^[\t ]+$/, "", text); gsub(/\\/, "", href); - gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title); - - return "\""" inline( substr( line, len + 1) ); - - # reference style images - } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\]/ ) ) { - len = RLENGTH; - text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) ); - id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); - if ( ! id ) id = text; - if ( rl_href[id] && rl_title[id] ) { - return "\""" \ - inline( substr( line, len + 1) ); - } else if ( rl_href[id] ) { - return "\""" \ - inline( substr( line, len + 1) ); + # ~~strikeout~~ (pandoc) + } else if ( match(line, /^~~([[:graph:]]|[[:graph:]]([^~]|~[^~])*[[:graph:]])~~/) ) { + len = RLENGTH; + ret = ret "" inline( substr( line, 3, len - 4 ) ) ""; line = substr( line, len + 1 ); + continue; + + # ^superscript^ (pandoc) + } else if ( match(line, /^\^([^[:space:]^]|\\[ ^])+\^/) ) { + len = RLENGTH; + ret = ret "" inline( substr( line, 2, len - 2 ) ) ""; line = substr( line, len + 1 ); + continue; + + # ~subscript~ (pandoc) + } else if ( match(line, /^~([^[:space:]~]|\\[ ~])+~/) ) { + len = RLENGTH; + ret = ret "" inline( substr( line, 2, len - 2 ) ) ""; line = substr( line, len + 1 ); + continue; + + # ignore embedded underscores (pandoc, php md) + } else if ( match(line, "^[[:alnum:]](__|_)") ) { + ret = ret HTML(substr( line, 1, RLENGTH)); line = substr(line, RLENGTH + 1); + continue; + + # strong / em matchers use pre match pattern to make processing cheaper + # __strong__$ + } else if ( match(line, "^__(([^_[:space:]]|" ieu ")|([^_[:space:]]|" ieu ")(" nu "|" ieu ")*([^_[:space:]]|" ieu "))__$") ) { + len = RLENGTH; + ret = ret "" inline( substr( line, 3, len - 4 ) ) ""; line = substr( line, len + 1 ); + continue; + + # __strong__ + } else if ( match(line, "^__(([^_[:space:]]|" ieu ")|([^_[:space:]]|" ieu ")(" nu "|" ieu ")*([^_[:space:]]|" ieu "))__[[:space:][:punct:]]") ) { + len = RLENGTH; + ret = ret "" inline( substr( line, 3, len - 5 ) ) ""; line = substr( line, len); + continue; + + # **strong** + } else if ( match(line, "^\\*\\*(([^*[:space:]]|" iea ")|([^*[:space:]]|" iea ")(" na "|" iea ")*([^*[:space:]]|" iea "))\\*\\*") ) { + len = RLENGTH; + ret = ret "" inline( substr( line, 3, len - 4 ) ) ""; line = substr( line, len + 1 ); + continue; + + # _em_$ + } else if ( match(line, "^_(([^_[:space:]]|" isu ")|([^_[:space:]]|" isu ")(" nu "|" isu ")*([^_[:space:]]|" isu "))_$") ) { + len = RLENGTH; + ret = ret "" inline( substr( line, 2, len - 2 ) ) ""; line = substr( line, len + 1 ); + continue; + + # _em_ + } else if ( match(line, "^_(([^_[:space:]]|" isu ")|([^_[:space:]]|" isu ")(" nu "|" isu ")*([^_[:space:]]|" isu "))_[[:space:][:punct:]]") ) { + len = RLENGTH; + ret = ret "" inline( substr( line, 2, len - 3 ) ) ""; line = substr( line, len ); + continue; + + # *em* + } else if ( match(line, "^\\*(([^*[:space:]]|" isa ")|([^*[:space:]]|" isa ")(" na "|" isa ")*([^*[:space:]]|" isa "))\\*") ) { + len = RLENGTH; + ret = ret "" inline( substr( line, 2, len - 2 ) ) ""; line = substr( line, len + 1 ); + continue; + + # Literal HTML entities + # } else if ( match( line, /^&([a-zA-Z]{2,32}|#[0-9]{1,7}|#[xX][0-9a-fA-F]{1,6});/) ) { + # mawk does not support repitition ranges + } else if ( match( line, /^&([a-zA-Z][a-zA-Z][a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?|#[0-9][0-9]?[0-9]?[0-9]?[0-9]?[0-9]?[0-9]?|#[xX][0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?);/) ) { + len = RLENGTH; + ret = ret substr( line, 1, len ); line = substr(line, len + 1); + continue; + + # Arrows + } else if ( line ~ /^-->( |$)/) { # ignore multidash-arrow + ret = ret "-->"; line = substr(line, 4); + continue; + } else if ( line ~ /^<-( |$)/) { + ret = ret "←"; line = substr(line, 3); + continue; + } else if ( line ~ /^->( |$)/) { + ret = ret "→"; line = substr(line, 3); + continue; + + # Escape lone HTML character + } else if ( match( line, /^[&<>"']/) ) { + ret = ret HTML(substr(line, 1, 1)); line = substr(line, 2); + continue; + + # continue walk over string } else { - return "" HTML(substr(line, 1, len)) inline( substr(line, len + 1) ); + ret = ret substr(line, 1, 1); line = substr(line, 2); + continue; } - - # ~~strikeout~~ (pandoc) - } else if ( match(line, /^~~([[:graph:]]|[[:graph:]]([^~]|~[^~])*[[:graph:]])~~/) ) { - len = RLENGTH; - return "" inline( substr( line, 3, len - 4 ) ) "" inline( substr( line, len + 1 ) ); - - # ^superscript^ (pandoc) - } else if ( match(line, /^\^([^[:space:]^]|\\[ ^])+\^/) ) { - len = RLENGTH; - return "" inline( substr( line, 2, len - 2 ) ) "" inline( substr( line, len + 1 ) ); - - # ~subscript~ (pandoc) - } else if ( match(line, /^~([^[:space:]~]|\\[ ~])+~/) ) { - len = RLENGTH; - return "" inline( substr( line, 2, len - 2 ) ) "" inline( substr( line, len + 1 ) ); - - # ignore embedded underscores (pandoc, php md) - } else if ( match(line, "^[[:alnum:]](__|_)") ) { - return HTML(substr( line, 1, RLENGTH)) inline( substr(line, RLENGTH + 1) ); - - # __strong__$ - } else if ( match(line, "^__(([^_[:space:]]|" ieu ")|([^_[:space:]]|" ieu ")(" nu "|" ieu ")*([^_[:space:]]|" ieu "))__$") ) { - len = RLENGTH; - return "" inline( substr( line, 3, len - 4 ) ) "" inline( substr( line, len + 1 ) ); - - # __strong__ - } else if ( match(line, "^__(([^_[:space:]]|" ieu ")|([^_[:space:]]|" ieu ")(" nu "|" ieu ")*([^_[:space:]]|" ieu "))__[[:space:][:punct:]]") ) { - len = RLENGTH; - return "" inline( substr( line, 3, len - 5 ) ) "" inline( substr( line, len) ); - - # **strong** - } else if ( match(line, "^\\*\\*(([^\\*[:space:]]|" iea ")|([^\\*[:space:]]|" iea ")(" na "|" iea ")*([^\\*[:space:]]|" iea "))\\*\\*") ) { - len = RLENGTH; - return "" inline( substr( line, 3, len - 4 ) ) "" inline( substr( line, len + 1 ) ); - - # _em_$ - } else if ( match(line, "^_(([^_[:space:]]|" isu ")|([^_[:space:]]|" isu ")(" nu "|" isu ")*([^_[:space:]]|" isu "))_$") ) { - len = RLENGTH; - return "" inline( substr( line, 2, len - 2 ) ) "" inline( substr( line, len + 1 ) ); - - # _em_ - } else if ( match(line, "^_(([^_[:space:]]|" isu ")|([^_[:space:]]|" isu ")(" nu "|" isu ")*([^_[:space:]]|" isu "))_[[:space:][:punct:]]") ) { - len = RLENGTH; - return "" inline( substr( line, 2, len - 3 ) ) "" inline( substr( line, len ) ); - - # *em* - } else if ( match(line, "^\\*(([^\\*[:space:]]|" isa ")|([^\\*[:space:]]|" isa ")(" na "|" isa ")*([^\\*[:space:]]|" isa "))\\*") ) { - len = RLENGTH; - return "" inline( substr( line, 2, len - 2 ) ) "" inline( substr( line, len + 1 ) ); - - # Literal HTML entities - } else if ( match( line, /^&([a-zA-Z]{2,32}|#[0-9]{1,7}|#[xX][0-9a-fA-F]{1,6});/) ) { - len = RLENGTH; - return substr( line, 1, len ) inline(substr(line, len + 1)); - - # Arrows - } else if ( line ~ /^-->( |$)/) { # ignore multidash-arrow - return "-->" inline( substr(line, 4) ); - } else if ( line ~ /^<-( |$)/) { - return "←" inline( substr(line, 3) ); - } else if ( line ~ /^->( |$)/) { - return "→" inline( substr(line, 3) ); - - # Escape lone HTML character - } else if ( match( line, /^[&<>"']/) ) { - return HTML(substr(line, 1, 1)) inline(substr(line, 2)); - - # continue walk over string - } else { - return substr(line, 1, 1) inline( substr(line, 2) ); } + return ret; } function headline( hlvl, htxt, attrib, LOCAL, sec, n, HL) { - match(hstack, /([0-9]+( [0-9]+){5})$/); split( substr(hstack, RSTART), HL); + # match(hstack, /([0-9]+( [0-9]+){5})$/); split( substr(hstack, RSTART), HL); + match(hstack, /([0-9]+( [0-9]+)( [0-9]+)( [0-9]+)( [0-9]+)( [0-9]+))$/); split( substr(hstack, RSTART), HL); for ( n = hlvl; n <= 6; n++ ) { sec = sec (HL[n]?"":""); } HL[hlvl]++; for ( n = hlvl + 1; n <= 6; n++) { HL[n] = 0;} @@ -336,7 +388,8 @@ function headline( hlvl, htxt, attrib, LOCAL, sec, n, HL) { hid = hid HL[1]; for ( n = 2; n <= hlvl; n++) { hid = hid "." HL[n] ; } hid = hid ":" URL(htxt, 1); - sub(/([0-9]+( [0-9]+){5})$/, "", hstack); + # sub(/([0-9]+( [0-9]+){5})$/, "", hstack); + sub(/([0-9]+( [0-9]+)( [0-9]+)( [0-9]+)( [0-9]+)( [0-9]+))$/, "", hstack); hstack = hstack HL[1] " " HL[2] " " HL[3] " " HL[4] " " HL[5] " " HL[6]; return sec "
" \ @@ -354,406 +407,462 @@ function _nblock( block, LOCAL, sec, n ) { for ( n = blvl + 1; n in BL; n++) { delete BL[n]; } block = _block( block ); - match(hstack, /([0-9]+( [0-9]+){5})$/); split( substr(hstack, RSTART), HL); + match(hstack, /([0-9]+( [0-9]+)( [0-9]+)?( [0-9]+)?( [0-9]+)?( [0-9]+)?)$/); split( substr(hstack, RSTART), HL); sec = ""; for ( n = 1; n <= 6; n++ ) { sec = sec (HL[n]?"
":""); } - sub("( +[0-9]+){6} *$", "", hstack); blvl--; + sub("( +[0-9]+)( +[0-9]+)?( +[0-9]+)?( +[0-9]+)?( +[0-9]+)?( +[0-9]+)? *$", "", hstack); blvl--; return block sec; } -function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, indent, list ) { - gsub( "(^\n+|\n+$)", "", block ); - - if ( block == "" ) { - return ""; - - # HTML #2 #3 #4 $5 - } else if ( AllowHTML && match( block, /(^|\n) ? ? ?(|$)|<\?([^\?]|\?[^>])*(\?>|$)|]*(>|$)|])*(\]\]>|$))/) ) { - len = RLENGTH; st = RSTART; - return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len)); - - # HTML #6 - } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<\/?(address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)([[:space:]\n>]|\/>)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) { - len = RLENGTH; st = RSTART; - return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len)); - - # HTML #1 - } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<(script|pre|style)([[:space:]\n>]).*(<\/script>|<\/pre>|<\/style>|$)/) ) { - len = RLENGTH; st = RSTART; - match( tolower(substr(block, st, len)), /(<\/script>|<\/pre>|<\/style>)/); - len = RSTART + RLENGTH; - return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len)); - - # HTML #7 - } else if ( AllowHTML && match( block, /^ ? ? ?(<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)([[:space:]]*\n)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) { - len = RLENGTH; st = RSTART; - return substr(block, st, len) _block(substr(block, st + len)); - - # Metadata (custom, block starting with %something) - # Metadata is ignored but can be interpreted externally - } else if ( match(block, /^%[a-zA-Z-]+([[:space:]][^\n]*)?(\n|$)(%[a-zA-Z-]+([[:space:]][^\n]*)?(\n|$)|%([[:space:]][^\n]*)?(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) { - len = RLENGTH; st = RSTART; - return _block( substr( block, len + 1) ); - - # Blockquote (leading >) - } else if ( match( block, /^> /) ) { - match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match(block, /$/); - len = RLENGTH; st = RSTART; - text = substr(block, 1, st - 1); gsub( /(^|\n)> /, "\n", text ); - text = _nblock( text ); gsub( /^\n|\n$/, "", text ) - return "
" text "
\n\n" _block( substr(block, st + len) ); - - # Pipe Tables (pandoc / php md / gfm ) - } else if ( match(block, "^((\\|)?([^\n]+\\|)+[^\n]+(\\|)?)\n" \ - "((\\|)?(:?-+:?[\\|+])+:?-+:?(\\|)?)\n" \ - "((\\|)?([^\n]+\\|)+[^\n]+(\\|)?(\n|$))+" ) ) { - len = RLENGTH; st = RSTART; - #initialize empty arrays - split("", talign); split("", tarray); - cols = 0; cnt=0; ttext = ""; - - # table header and alignment - split( gensub( /(^\||\|$)/, "", "g", \ - gensub( /(^|[^\\])\\\|/, "\\1\\|", "g", \ - substr(block, 1, match(block, /(\n|$)/)) \ - )), tarray, /\|/); - block = substr(block, match(block, /(\n|$)/) + 1 ); - cols = split( \ - gensub( /(^\||\|$)/, "", "g", \ - substr(block, 1, match(block, /(\n|$)/)) \ - ), talign, /[+\|]/); - block = substr(block, match(block, /(\n|$)/) + 1 ); - - for( cnt = 1; cnt < cols; cnt++ ) { - if (match(talign[cnt], /:-+:/)) talign[cnt]="center"; - else if (match(talign[cnt], /-+:/)) talign[cnt]="right"; - else if (match(talign[cnt], /:-+/)) talign[cnt]="left"; - else talign[cnt]=""; - } +function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, indent, list, tmp, ret) { + ret = ""; + while ( block != "" ) { + gsub( "(^\n+|\n+$)", "", block ); - ttext = "\n" - for (cnt = 1; cnt < cols; cnt++) - ttext = ttext "" inline(tarray[cnt]) "" - ttext = ttext "\n\n" + # HTML #2 #3 #4 $5 + if ( AllowHTML && match( block, /(^|\n) ? ? ?(|$)|<\?([^\?]|\?[^>])*(\?>|$)|]*(>|$)|])*(\]\]>|$))/) ) { + len = RLENGTH; st = RSTART; + ret = ret _block(substr(block, 1, st - 1)) substr(block, st, len); block = substr(block, st + len); + continue; - while ( match(block, "^((\\|)?([^\n]+\\|)+[^\n]+(\\|)?(\n|$))+" ) ){ - split( gensub( /(^\||\|$)/, "", "g", \ - gensub( /(^|[^\\])\\\|/, "\\1\\|", "g", \ - substr(block, 1, match(block, /(\n|$)/)) \ - )), tarray, /\|/); - block = substr(block, match(block, /(\n|$)/) + 1 ); + # HTML #6 + } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<\/?(address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)([[:space:]\n>]|\/>)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) { + len = RLENGTH; st = RSTART; + ret = ret _block(substr(block, 1, st - 1)) substr(block, st, len); block = substr(block, st + len); + continue; - ttext = ttext "" - for (cnt = 1; cnt < cols; cnt++) - ttext = ttext "" inline(tarray[cnt]) "" - ttext = ttext "\n" - } - return "" ttext "
\n" _block(block); - - # Grid Tables (pandoc) - # (with, and without header) - } else if ( match( block, "^\\+(-+\\+)+\n" \ - "(\\|([^\n]+\\|)+\n)+" \ - "(\\+(:?=+:?\\+)+)\n" \ - "((\\|([^\n]+\\|)+\n)+" \ - "\\+(-+\\+)+(\n|$))+" \ - ) || \ - match( block, "^()()()" \ - "(\\+(:?-+:?\\+)+)\n" \ - "((\\|([^\n]+\\|)+\n)+" \ - "\\+(-+\\+)+(\n|$))+" \ - ) ) { - len = RLENGTH; st = RSTART; - #initialize empty arrays - split("", talign); split("", tarray); split("", tread); - cols = 0; cnt=0; ttext = ""; - - # Column Count - cols = split( gensub( "^(\\+(:?-+:?\\+)+)(\n.*)*$", "\\1", 1, block), tread, /\+/) - 2; - # debug(" Cols: " gensub( "^(\\+(:?-+:?\\+)+)(\n.*)*$", "\\1", 1, block )); - - # table alignment - split( gensub( "^(.*\n)?\\+((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)(\n.*)$", "\\2", "g", block ), talign, /\+/ ); - # debug("Align: " gensub( "^(.*\n)?\\+((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)(\n.*)$", "\\2", "g", block )); - - for (cnt = 1; cnt <= cols; cnt++) { - if (match(talign[cnt], /:(-+|=+):/)) talign[cnt]="center"; - else if (match(talign[cnt], /(-+|=+):/)) talign[cnt]="right"; - else if (match(talign[cnt], /:(-+|=+)/ )) talign[cnt]="left"; - else talign[cnt]=""; - } + # HTML #1 + } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<(script|pre|style)([[:space:]\n>]).*(<\/script>|<\/pre>|<\/style>|$)/) ) { + len = RLENGTH; st = RSTART; + match( tolower(substr(block, st, len)), /(<\/script>|<\/pre>|<\/style>)/); + len = RSTART + RLENGTH; + ret = ret _block(substr(block, 1, st - 1)) substr(block, st, len); block = substr(block, st + len); + continue; - if ( match(block, "^\\+(-+\\+)+\n" \ - "(\\|([^\n]+\\|)+\n)+" \ - "\\+(:?=+:?\\+)+\n" \ - "((\\|([^\n]+\\|)+\n)+" \ - "\\+(-+\\+)+(\n|$))+" \ - ) ) { - # table header + # HTML #7 + } else if ( AllowHTML && match( block, /^ ? ? ?(<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)([[:space:]]*\n)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) { + len = RLENGTH; st = RSTART; + ret = ret substr(block, st, len); block = substr(block, st + len); + continue; + + # Metadata (custom, block starting with %something) + # Metadata is ignored but can be interpreted externally + } else if ( match(block, /^%[a-zA-Z-]+([[:space:]][^\n]*)?(\n|$)(%[a-zA-Z-]+([[:space:]][^\n]*)?(\n|$)|%([[:space:]][^\n]*)?(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) { + len = RLENGTH; st = RSTART; + block = substr( block, len + 1); + continue; + + # Blockquote (leading >) + } else if ( match( block, /^> /) ) { + match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match(block, /$/); + len = RLENGTH; st = RSTART; + text = substr(block, 1, st - 1); gsub( /(^|\n)> /, "\n", text ); + text = _nblock( text ); gsub( /^\n|\n$/, "", text ) + ret = ret "
" text "
\n\n"; block = substr(block, st + len); + continue; + + # Pipe Tables (pandoc / php md / gfm ) + } else if ( match(block, "^((\\|)?([^\n]+\\|)+[^\n]+(\\|)?)\n" \ + "((\\|)?(:?-+:?[\\|+])+:?-+:?(\\|)?)\n" \ + "((\\|)?([^\n]+\\|)+[^\n]+(\\|)?(\n|$))+" ) ) { + len = RLENGTH; st = RSTART; + #initialize empty arrays + split("", talign); split("", tarray); + cols = 0; cnt=0; ttext = ""; + + # table header and alignment + tmp = substr(block, 1, match(block, /(\n|$)/)); + gsub( /(^|[^\\])\\\|/, "\\1\\|", tmp ); + gsub( /(^\||\|$)/, "", tmp) + split( tmp, tarray, /\|/); block = substr(block, match(block, /(\n|$)/) + 1 ); - while ( match(block, "^\\|([^\n]+\\|)+\n") ) { - split( gensub( /(^\||\|$)/, "", "g", \ - gensub( /(^|[^\\])\\\|/, "\\1\\|", "g", \ - substr(block, 1, match(block, /(\n|$)/)) \ - )), tread, /\|/); - block = substr(block, match(block, /(\n|$)/) + 1 ); - for (cnt = 1; cnt <= cols; cnt++) - tarray[cnt] = tarray[cnt] "\n" tread[cnt]; + tmp = substr(block, 1, match(block, /(\n|$)/)); + gsub( /(^\||\|$)/, "", tmp ); + cols = split( tmp , talign, /[+\|]/); + block = substr(block, match(block, /(\n|$)/) + 1 ); + + for( cnt = 1; cnt < cols; cnt++ ) { + if (match(talign[cnt], /:-+:/)) talign[cnt]="center"; + else if (match(talign[cnt], /-+:/)) talign[cnt]="right"; + else if (match(talign[cnt], /:-+/)) talign[cnt]="left"; + else talign[cnt]=""; } ttext = "\n" - for (cnt = 1; cnt <= cols; cnt++) - ttext = ttext "" _nblock(tarray[cnt]) "" - ttext = ttext "\n" - } + for (cnt = 1; cnt < cols; cnt++) + ttext = ttext "" inline(tarray[cnt]) "" + ttext = ttext "\n\n" + + while ( match(block, "^((\\|)?([^\n]+\\|)+[^\n]+(\\|)?(\n|$))+" ) ){ + tmp = substr(block, 1, match(block, /(\n|$)/)); + gsub( /(^|[^\\])\\\|/, "\\1\\|", tmp ); + gsub( /(^\||\|$)/, "", tmp ); + split( tmp, tarray, /\|/); + block = substr(block, match(block, /(\n|$)/) + 1 ); - # table body - block = substr(block, match(block, /(\n|$)/) + 1 ); - ttext = ttext "\n" - - while ( match(block, /^((\|([^\n]+\|)+\n)+\+(-+\+)+(\n|$))+/ ) ){ - split("", tarray); - while ( match(block, /^\|([^\n]+\|)+\n/) ) { - split( gensub( /(^\||\|$)/, "", "g", \ - gensub( /(^|[^\\])\\\|/, "\\1\\|", "g", \ - substr(block, 1, match(block, /(\n|$)/)) \ - )), tread, /\|/); + ttext = ttext "" + for (cnt = 1; cnt < cols; cnt++) + ttext = ttext "" inline(tarray[cnt]) "" + ttext = ttext "\n" + } + ret = ret "" ttext "
\n"; + continue; + + # Grid Tables (pandoc) + # (with, and without header) + } else if ( match( block, "^\\+(-+\\+)+\n" \ + "(\\|([^\n]+\\|)+\n)+" \ + "(\\+(:?=+:?\\+)+)\n" \ + "((\\|([^\n]+\\|)+\n)+" \ + "\\+(-+\\+)+(\n|$))+" \ + ) || \ + match( block, "^()()()" \ + "(\\+(:?-+:?\\+)+)\n" \ + "((\\|([^\n]+\\|)+\n)+" \ + "\\+(-+\\+)+(\n|$))+" \ + ) ) { + len = RLENGTH; st = RSTART; + #initialize empty arrays + split("", talign); split("", tarray); split("", tread); + cols = 0; cnt=0; ttext = ""; + + # Column Count + tmp = block; sub( "(\n.*)*$", "", tmp); + cols = split( tmp, tread, /\+/) - 2; + # debug(" Cols: " gensub( "^(\\+(:?-+:?\\+)+)(\n.*)*$", "\\1", 1, block )); + + # table alignment + match(block, "((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)"); + split( substr(block, RSTART, RLENGTH) , talign, /\+/ ); + # split( gensub( "^(.*\n)?\\+((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)(\n.*)$", "\\2", "g", block ), talign, /\+/ ); + # debug("Align: " gensub( "^(.*\n)?\\+((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)(\n.*)$", "\\2", "g", block )); + + for (cnt = 1; cnt <= cols; cnt++) { + if (match(talign[cnt], /:(-+|=+):/)) talign[cnt]="center"; + else if (match(talign[cnt], /(-+|=+):/)) talign[cnt]="right"; + else if (match(talign[cnt], /:(-+|=+)/ )) talign[cnt]="left"; + else talign[cnt]=""; + } + + if ( match(block, "^\\+(-+\\+)+\n" \ + "(\\|([^\n]+\\|)+\n)+" \ + "\\+(:?=+:?\\+)+\n" \ + "((\\|([^\n]+\\|)+\n)+" \ + "\\+(-+\\+)+(\n|$))+" \ + ) ) { + # table header block = substr(block, match(block, /(\n|$)/) + 1 ); + while ( match(block, "^\\|([^\n]+\\|)+\n") ) { + tmp = substr(block, 1, match(block, /(\n|$)/)); + gsub( /\\\\/, "\\\", tmp); gsub(/\\\|/, "\\|", tmp); + gsub( /(^\||\|$)/, "", tmp ); + split(tmp, tread, /\|/); + block = substr(block, match(block, /(\n|$)/) + 1 ); + for (cnt = 1; cnt <= cols; cnt++) + tarray[cnt] = tarray[cnt] "\n" tread[cnt]; + } + + ttext = "\n" for (cnt = 1; cnt <= cols; cnt++) - tarray[cnt] = tarray[cnt] "\n" tread[cnt]; + ttext = ttext "" _nblock(tarray[cnt]) "" + ttext = ttext "\n" } + + # table body block = substr(block, match(block, /(\n|$)/) + 1 ); + ttext = ttext "\n" + + while ( match(block, /^((\|([^\n]+\|)+\n)+\+(-+\+)+(\n|$))+/ ) ){ + split("", tarray); + while ( match(block, /^\|([^\n]+\|)+\n/) ) { + tmp = substr(block, 1, match(block, /(\n|$)/)); + gsub( /\\\\/, "\\\", tmp); gsub(/\\\|/, "\\|", tmp); + gsub( /(^\||\|$)/, "", tmp); + split( tmp, tread, /\|/); + block = substr(block, match(block, /(\n|$)/) + 1 ); + for (cnt = 1; cnt <= cols; cnt++) + tarray[cnt] = tarray[cnt] "\n" tread[cnt]; + } + block = substr(block, match(block, /(\n|$)/) + 1 ); - ttext = ttext "" - for (cnt = 1; cnt <= cols; cnt++) - ttext = ttext "" _nblock(tarray[cnt]) "" - ttext = ttext "\n" - } - return "" ttext "
\n" _nblock(block); - - # Line Blocks (pandoc) - } else if ( match(block, /^\| [^\n]*(\n|$)(\| [^\n]*(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) { - len = RLENGTH; st = RSTART; - - text = substr(block, 1, len); gsub(/\n[[:space:]]+/, " ", text); - gsub(/\n\| /, "\n", text); gsub(/^\| |\n$/, "", text); - text = inline(text); gsub(/\n/, "
\n", text); - - return "
" text "
\n" _block( substr( block, len + 1) ); - - # Indented Code Block - } else if ( match(block, /^( |\t)( *\t*[^ \t\n]+ *\t*)+(\n|$)(( |\t)[^\n]+(\n|$)|[ \t]*(\n|$))*/) ) { - len = RLENGTH; st = RSTART; - code = substr(block, 1, len); - gsub(/(^|\n)( |\t)/, "\n", code); - gsub(/^\n|\n+$/, "", code); - return "
" HTML( code ) "
\n" \ - _block( substr( block, len + 1 ) ); - - # Fenced Divs (pandoc, custom) - } else if ( match( block, /^(:::+)/ ) ) { - guard = substr( block, 1, RLENGTH ); - code = block; sub(/^[^\n]+\n/, "", code); - attrib = gensub(/^:::+[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\1", 1, block); - gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); - gsub(/(^ | $)/, "", attrib); - if ( match(code, "(^|\n)" guard "+(\n|$)" ) ) { - len = RLENGTH; st = RSTART; - return "
" _nblock( substr(code, 1, st - 1) ) "
\n" \ - _block( substr( code, st + len ) ); - } else { - match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match( block, /$/ ); - len = RLENGTH; st = RSTART; - return "

" inline( substr(block, 1, st - 1) ) "

\n" \ - _block( substr(block, st + len) ); - } + ttext = ttext "" + for (cnt = 1; cnt <= cols; cnt++) + ttext = ttext "" _nblock(tarray[cnt]) "" + ttext = ttext "\n" + } + return ret "" ttext "
\n" _nblock(block); - # Fenced Code Block (pandoc) - } else if ( match( block, /^(~~~+|```+)/ ) ) { - guard = substr( block, 1, RLENGTH ); - code = gensub(/^[^\n]+\n/, "", 1, block); - attrib = gensub(/^(~~~+|```+)[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\2", 1, block); - gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); - gsub(/(^ | $)/, "", attrib); - if ( match(code, "(^|\n)" guard "+(\n|$)" ) ) { - len = RLENGTH; st = RSTART; - return "
" HTML( substr(code, 1, st - 1) ) "
\n" \ - _block( substr( code, st + len ) ); - } else { - match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match( block, /$/ ); + # Line Blocks (pandoc) + } else if ( match(block, /^\| [^\n]*(\n|$)(\| [^\n]*(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) { len = RLENGTH; st = RSTART; - return "

" inline( substr(block, 1, st - 1) ) "

\n" \ - _block( substr(block, st + len) ); - } - # First Order Heading H1 + Attrib - } else if ( match( block, /^([^\n]+)([ \t]*\{([^\}\n]+)\})\n===+(\n|$)/ ) ) { - len = RLENGTH; text = attrib = block; - sub(/([ \t]*\{([^\}\n]+)\})\n===+(\n.*)?$/, "", text); - sub(/\}\n===+(\n.*)?$/, "", attrib); sub(/^([^\n]+)[ \t]*\{/, "", attrib); - gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); + text = substr(block, 1, len); gsub(/\n[[:space:]]+/, " ", text); + gsub(/\n\| /, "\n", text); gsub(/^\| |\n$/, "", text); + text = inline(text); gsub(/\n/, "
\n", text); - return headline(1, text, attrib) _block( substr( block, len + 1 ) ); + ret = ret "
" text "
\n"; block = substr( block, len + 1); + continue; - # First Order Heading H1 - } else if ( match( block, /^([^\n]+)\n===+(\n|$)/ ) ) { - len = RLENGTH; text = substr(block, 1, len); - sub(/\n===+(\n.*)?$/, "", text); - - return headline(1, text, 0) _block( substr( block, len + 1 ) ); - - # Second Order Heading H2 + Attrib - } else if ( match( block, /^([^\n]+)([ \t]*\{([^\}\n]+)\})\n---+(\n|$)/ ) ) { - len = RLENGTH; text = attrib = block; - sub(/([ \t]*\{([^\}\n]+)\})\n---+(\n.*)?$/, "", text); - sub(/\}\n---+(\n.*)?$/, "", attrib); sub(/^([^\n]+)[ \t]*\{/, "", attrib); - gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); - - return headline(2, text, attrib) _block( substr( block, len + 1) ); + # Indented Code Block + } else if ( match(block, /^(( |\t)[^\n]*[^\n\t ][^\n]*(\n|$))(( |\t)[^\n]*(\n|$)|[\t ]*(\n|$))*/) ) { + len = RLENGTH; st = RSTART; - # Second Order Heading H2 - } else if ( match( block, /^([^\n]+)\n---+(\n|$)/ ) ) { - len = RLENGTH; text = substr(block, 1, len); - sub(/\n---+(\n.*)?$/, "", text); + code = substr(block, 1, len); + gsub(/(^|\n)( |\t)/, "\n", code); + gsub(/^\n|\n+$/, "", code); + ret = ret "
" HTML( code ) "
\n"; block = substr( block, len + 1 ); + continue; + + # Fenced Divs (pandoc, custom) + } else if ( match( block, /^(:::+)/ ) ) { + guard = substr( block, 1, RLENGTH ); attrib = code = block; + sub(/^[^\n]+\n/, "", code); + sub(/^:::+[ \t]*\{?[ \t]*/, "", attrib); sub(/\}?[ \t]*\n.*$/, "", attrib); + # attrib = gensub(/^:::+[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\1", 1, attrib); + gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); + gsub(/(^ | $)/, "", attrib); + if ( match(code, "(^|\n)" guard "+(\n|$)" ) && attrib ) { + len = RLENGTH; st = RSTART; + ret = ret "
" _nblock( substr(code, 1, st - 1) ) "
\n"; + block = substr( code, st + len ); + continue; + + } else if ( match(code, "(^|\n)" guard "+(\n|$)" ) ) { + len = RLENGTH; st = RSTART; + ret = ret "
" _nblock( substr(code, 1, st - 1) ) "
\n"; block = substr( code, st + len ); + continue; + + } else { + match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match( block, /$/ ); + len = RLENGTH; st = RSTART; + ret = ret "

" inline( substr(block, 1, st - 1) ) "

\n"; block = substr(block, st + len); + continue; + } - return headline(2, text, 0) _block( substr( block, len + 1) ); + # Fenced Code Block (pandoc) + } else if ( match( block, /^(~~~+|```+)/ ) ) { + guard = substr( block, 1, RLENGTH ); attrib = code = block; + sub(/^[^\n]+\n/, "", code); + sub(/^(~~~+|```+)[ \t]*\{?[ \t]*/, "", attrib); sub(/\}?[ \t]*\n.*$/, "", attrib); + # attrib = gensub(/^(~~~+|```+)[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\2", 1, attrib); + gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); + gsub(/(^ | $)/, "", attrib); + if ( match(code, "(^|\n)" guard "+(\n|$)" ) && attrib ) { + len = RLENGTH; st = RSTART; + ret = ret "
" HTML( substr(code, 1, st - 1) ) "
\n"; + block = substr( code, st + len ); + continue; + + } else if ( match(code, "(^|\n)" guard "+(\n|$)" ) ) { + len = RLENGTH; st = RSTART; + ret = ret "
" HTML( substr(code, 1, st - 1) ) "
\n"; + block = substr( code, st + len ); + continue; + + } else { + match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match( block, /$/ ); + len = RLENGTH; st = RSTART; + ret = ret "

" inline( substr(block, 1, st - 1) ) "

\n"; block = substr(block, st + len); + continue; + } - # Nth Order Heading H1 H2 H3 H4 H5 H6 + Attrib - } else if ( match( block, /^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n|$)/ ) ) { - len = RLENGTH; text = attrib = substr(block, 1, len); - match(block, /^#{1,6}/); n = RLENGTH; + # First Order Heading H1 + Attrib + } else if ( match( block, /^([^\n]+)([ \t]*\{([^\}\n]+)\})\n===+(\n|$)/ ) ) { + len = RLENGTH; text = attrib = block; + sub(/([ \t]*\{([^\}\n]+)\})\n===+(\n.*)?$/, "", text); + sub(/\}\n===+(\n.*)?$/, "", attrib); sub(/^([^\n]+)[ \t]*\{/, "", attrib); + gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); + + ret = ret headline(1, text, attrib) ; block = substr( block, len + 1 ); + continue; + + # First Order Heading H1 + } else if ( match( block, /^([^\n]+)\n===+(\n|$)/ ) ) { + len = RLENGTH; text = substr(block, 1, len); + sub(/\n===+(\n.*)?$/, "", text); + + ret = ret headline(1, text, 0) ; block = substr( block, len + 1 ); + continue; + + # Second Order Heading H2 + Attrib + } else if ( match( block, /^([^\n]+)([ \t]*\{([^\}\n]+)\})\n---+(\n|$)/ ) ) { + len = RLENGTH; text = attrib = block; + sub(/([ \t]*\{([^\}\n]+)\})\n---+(\n.*)?$/, "", text); + sub(/\}\n---+(\n.*)?$/, "", attrib); sub(/^([^\n]+)[ \t]*\{/, "", attrib); + gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); + + ret = ret headline(2, text, attrib) ; block = substr( block, len + 1); + continue; + + # Second Order Heading H2 + } else if ( match( block, /^([^\n]+)\n---+(\n|$)/ ) ) { + len = RLENGTH; text = substr(block, 1, len); + sub(/\n---+(\n.*)?$/, "", text); + + ret = ret headline(2, text, 0) ; block = substr( block, len + 1); + continue; + + # # Nth Order Heading H1 H2 H3 H4 H5 H6 + Attrib + # } else if ( match( block, /^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*[ \t]*\{[a-zA-Z \t-]*\}(\n|$)/ ) ) { + } else if ( match( block, /^##?#?#?#?#?[^#\n]([^\n#]|#[^\t\n# ]|#[\t ]+[^\t\n ])+#*[\t ]*\{[\ta-zA-Z -]*\}(\n|$)/ ) ) { + len = RLENGTH; text = attrib = substr(block, 1, len); + match(block, /^##?#?#?#?#?[^#]/); n = RLENGTH - 1; + # sub(/^(##?#?#?#?#?)[ \t]*/, "", text); # not working in mawk + text = substr(text, n + 1); sub(/^[ \t]*/, "", text); + sub(/[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n.*)?$/, "", text); + + sub(/^##?#?#?#?#?[^#\n]([^\n#]|#[^\t\n# ]|#[\t ]+[^\t\n ])+#*[\t ]*\{/, "", attrib); + sub(/\}(\n.*)?$/, "", attrib); + gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); + + ret = ret headline( n, text, attrib ); block = substr( block, len + 1); + continue; + + # Nth Order Heading H1 H2 H3 H4 H5 H6 + # } else if ( match( block, /^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*(\n|$)/ ) ) { + } else if ( match( block, /^##?#?#?#?#?[^#\n]([^\n#]|#[^\t\n# ]|#[\t ]+[^\t\n ])+#*(\n|$)/ ) ) { + len = RLENGTH; text = substr(block, 1, len); + match(block, /^##?#?#?#?#?[^#]/); n = RLENGTH - 1; + # sub(/^(##?#?#?#?#?)[ \t]+/, "", text); # not working in mawk + text = substr(text, n + 1); sub(/^[ \t]*/, "", text); + sub(/[ \t]*#*(\n.*)?$/, "", text); + + ret = ret headline( n, text, 0 ) ; block = substr( block, len + 1); + continue; + + # block images (wrapped in
) + } else if ( match(block, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n|$)") ) { + len = RLENGTH; text = href = title = attrib = substr( block, 1, len); + + sub("^!\\[", "", text); + sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", text); + + sub("^!" lix "\\([\n\t ]*", "", href); + sub("([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", href); + + sub("^!" lix "\\([\n\t ]*" lid, "", title); + sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", title); + sub("^[\n\t ]+", "", title); + + sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib); + sub("(\n.*)?$", "", attrib); + sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib); + + if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } + if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } + else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); } + else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); } + + gsub(/^[\t ]+$/, "", text); gsub(/\\/, "", href); + + ret = ret "
" \ + "\""" \ + (title?"
" inline(title) "
":"") \ + "
\n\n"; + block = substr( block, len + 1); + continue; + + } else if ( match(block, /^!\[([^]]*)\] ?\[([^]]*)\](\n|$)/ ) ) { + len = RLENGTH; text = id = block; + sub(/(\n.*)?$/, "", text); sub( /^!\[/, "", text); sub(/\] ?\[([^\n]*)\]$/, "", text); + sub(/(\n.*)?$/, "", id); sub( /^!\[([^\n]*)\] ?\[/, "", id); sub(/\]$/, "", id); + # text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\1", 1, block); + # id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\2", 1, block); + if ( ! id ) id = text; + if ( rl_href[id] && rl_title[id] ) { + ret = ret "
" \ + "\""" \ + "
" inline(rl_title[id]) "
" \ + "
\n\n"; + block = substr( block, len + 1); + continue; + + } else if ( rl_href[id] ) { + ret = ret "
" \ + "\""" \ + "
\n\n"; + block = substr( block, len + 1); + continue; + } else { + ret = ret "

" HTML(substr(block, 1, len)) "

\n" ; block = substr(block, len + 1); + continue; + } - sub(/^(#{1,6})[ \t]*/, "", text); sub(/[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n.*)?$/, "", text); - sub(/^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*[ \t]*\{/, "", attrib); - sub(/\})(\n.*)?$/, "", attrib); - gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); + # Macros (standalone <> calls handled as block, so they are not wrapped in paragraph) + } else if ( match( block, /^<<(([^>]|>[^>])+)>>(\n|$)/ ) ) { + len = RLENGTH; text = block; + sub(/^<>(\n.*)?$/, "", text); + # text = gensub(/^<<(([^>]|>[^>])+)>>(\n.*)?$/, "\\1", 1, block); + ret = ret "" HTML(text) "" ; block = substr(block, len + 1); + continue; + + # Definition list + } else if (match( block, "^(([ \t]*\n)*[^:\n \t][^\n]+\n" \ + "([ \t]*\n)* ? ? ?:[ \t][^\n]+(\n|$)" \ + "(([ \t]*\n)* ? ? ?:[ \t][^\n]+(\n|$)" \ + "|[^:\n \t][^\n]+(\n|$)" \ + "|( ? ? ?\t| +)[^\n]+(\n|$)" \ + "|([ \t]*\n)+( ? ? ?\t| +)[^\n]+(\n|$))*)+" \ + )) { + list = substr( block, 1, RLENGTH); block = substr( block, RLENGTH + 1); + ret = ret "
\n" _dlist( list ) "
\n"; + continue; + + # Unordered list types + } else if ( text = _startlist( block, "ul", "-", "([+*•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + return ret text; + } else if ( text = _startlist( block, "ul", "\\+", "([-*•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + return ret text; + } else if ( text = _startlist( block, "ul", "\\*", "([-+•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + return ret text; + } else if ( text = _startlist( block, "ul", "•", "([-+*]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + return ret text; + + # Ordered list types + } else if ( text = _startlist( block, "ol", "[0-9]+\\.", "([-+*•]|#\\.|[0-9]+\\)|#\\))") ) { + return ret text; + } else if ( text = _startlist( block, "ol", "[0-9]+\\)", "([-+*•]|[0-9]+\\.|#\\.|#\\))") ) { + return ret text; + } else if ( text = _startlist( block, "ol", "#\\.", "([-+*•]|[0-9]+\\.|[0-9]+\\)|#\\))") ) { + return ret text; + } else if ( text = _startlist( block, "ol", "#\\)", "([-+*•]|[0-9]+\\.|#\\.|[0-9]+\\))") ) { + return ret text; + + # Split paragraphs + } else if ( match( block, /(^|\n)[[:space:]]*(\n|$)/) ) { + len = RLENGTH; st = RSTART; + ret = ret _block( substr(block, 1, st - 1) ) "\n"; block = substr(block, st + len); + continue; - return headline( n, text, attrib ) _block( substr( block, len + 1) ); + # Horizontal rule + # } else if ( match( block, /(^|\n) ? ? ?((\* *){3,}|(- *){3,}|(_ *){3,})($|\n)/) ) { + } else if ( match( block, /(^|\n) ? ? ?((\* *)(\* *)(\* *)(\* *)*|(- *)(- *)(- *)(- *)*|(_ *)(_ *)(_ *)(_ *)*)($|\n)/) ) { + len = RLENGTH; st = RSTART; + ret = ret _block(substr(block, 1, st - 1)) "
\n"; block = substr(block, st + len); + continue; - # Nth Order Heading H1 H2 H3 H4 H5 H6 - } else if ( match( block, /^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*(\n|$)/ ) ) { - len = RLENGTH; text = substr(block, 1, len); - match(block, /^#{1,6}/); n = RLENGTH; - sub(/^(#{1,6})[ \t]*/, "", text); sub(/[ \t]*#*(\n.*)?$/, "", text); - - return headline( n, text, 0 ) _block( substr( block, len + 1) ); - - # block images (wrapped in
) - } else if ( match(block, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n|$)") ) { - len = RLENGTH; text = href = title = attrib = substr( block, 1, len); - - sub("^!\\[", "", text); - sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", text); - - sub("^!" lix "\\([\n\t ]*", "", href); - sub("([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", href); - - sub("^!" lix "\\([\n\t ]*" lid, "", title); - sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", title); - sub("^[\n\t ]+", "", title); - - sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib); - sub("(\n.*)?$", "", attrib); - sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib); - - if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } - if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } - else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); } - else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); } - - gsub(/^[\t ]+$/, "", text); gsub(/\\/, "", href); - - return "
" \ - "\""" \ - (title?"
" inline(title) "
":"") \ - "
\n\n" \ - _block( substr( block, len + 1) ); - - # reference style images (block) - } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\](\n|$)/ ) ) { - len = RLENGTH; - text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\1", 1, block); - id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\2", 1, block); - if ( ! id ) id = text; - if ( rl_href[id] && rl_title[id] ) { - return "
" \ - "\""" \ - "
" inline(rl_title[id]) "
" \ - "
\n\n" \ - _block( substr( block, len + 1) ); - } else if ( rl_href[id] ) { - return "
" \ - "\""" \ - "
\n\n" \ - _block( substr( block, len + 1) ); + # Plain paragraph } else { - return "

" HTML(substr(block, 1, len)) "

\n" _block( substr(block, len + 1) ); + return ret "

" inline(block) "

\n"; } - - # Macros (standalone <> calls handled as block, so they are not wrapped in paragraph) - } else if ( match( block, /^<<(([^>]|>[^>])+)>>(\n|$)/ ) ) { - len = RLENGTH; - text = gensub(/^<<(([^>]|>[^>])+)>>(\n.*)?$/, "\\1", 1, block); - return "" HTML(text) "" _block(substr(block, len + 1) ); - - # Definition list - } else if (match( block, "^(([ \t]*\n)*[^:\n \t][^\n]+\n" \ - "([ \t]*\n)* ? ? ?:[ \t][^\n]+(\n|$)" \ - "(([ \t]*\n)* ? ? ?:[ \t][^\n]+(\n|$)" \ - "|[^:\n \t][^\n]+(\n|$)" \ - "|( ? ? ?\t| +)[^\n]+(\n|$)" \ - "|([ \t]*\n)+( ? ? ?\t| +)[^\n]+(\n|$))*)+" \ - )) { - list = substr( block, 1, RLENGTH); block = substr( block, RLENGTH + 1); - return "\n
\n" _dlist( list ) "
\n" _block( block ); - - # Unordered list types - } else if ( text = _startlist( block, "ul", "-", "([+*•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { - return text; - } else if ( text = _startlist( block, "ul", "\\+", "([-*•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { - return text; - } else if ( text = _startlist( block, "ul", "\\*", "([-+•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { - return text; - } else if ( text = _startlist( block, "ul", "•", "([-+*]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { - return text; - - # Ordered list types - } else if ( text = _startlist( block, "ol", "[0-9]+\\.", "([-+*•]|#\\.|[0-9]+\\)|#\\))") ) { - return text; - } else if ( text = _startlist( block, "ol", "[0-9]+\\)", "([-+*•]|[0-9]+\\.|#\\.|#\\))") ) { - return text; - } else if ( text = _startlist( block, "ol", "#\\.", "([-+*•]|[0-9]+\\.|[0-9]+\\)|#\\))") ) { - return text; - } else if ( text = _startlist( block, "ol", "#\\)", "([-+*•]|[0-9]+\\.|#\\.|[0-9]+\\))") ) { - return text; - - # Split paragraphs - } else if ( match( block, /(^|\n)[[:space:]]*(\n|$)/) ) { - len = RLENGTH; st = RSTART; - return _block( substr(block, 1, st - 1) ) "\n" \ - _block( substr(block, st + len) ); - - # Horizontal rule - } else if ( match( block, /(^|\n) ? ? ?((\* *){3,}|(- *){3,}|(_ *){3,})($|\n)/) ) { - len = RLENGTH; st = RSTART; - return _block(substr(block, 1, st - 1)) "
\n" _block(substr(block, st + len)); - - # Plain paragraph - } else { - return "

" inline(block) "

\n"; } + return ret; } -function _startlist(block, type, mark, exclude, LOCAL, st, len, list, indent, text) { +function _startlist(block, type, mark, exclude, LOCAL, st, len, list, indent, it, text) { if (match( block, "(^|\n) ? ? ?" mark "[ \t][^\n]+(\n|$)" \ "(([ \t]*\n)* ? ? ?" mark "[ \t][^\n]+(\n|$)" \ "|([ \t]*\n)*( ? ? ?\t| +)[^\n]+(\n|$)" \ "|[^\n \t][^\n]+(\n|$))*" ) ) { st = RSTART; len = RLENGTH; list = substr( block, st, len); - sub("^\n", "", list); match(list, "^ ? ? ?"); indent = RLENGTH; - gsub( "(^|\n) {0," indent "}", "\n", list); sub("^\n", "", list); + sub("^\n", "", list); match(list, "^( | | |)"); indent = RLENGTH; + # gsub( "(^|\n) {0," indent "}", "\n", list); sub("^\n", "", list); + # emulate greedy range matcher for mawk + it = "("; while ( indent > 0 ) { for (k = indent; k > 0; k--) { it = it " "; } it = it "|"; indent--; } + sub(/\|$/, ")?", it); sub(/^\($/, "", it); + gsub( "(^|\n)" it, "\n", list ); sub("^\n", "", list); text = substr(block, 1, st - 1); block = substr(block, st + len); if (match(text, /\n[[:space:]]*\n/)) return 0; @@ -767,10 +876,11 @@ function _startlist(block, type, mark, exclude, LOCAL, st, len, list, indent, te } else return 0; } -function _list (block, mark, p, LOCAL, len, st, text, indent, task) { +function _list (block, mark, p, LOCAL, len, st, text, indent, it, task) { if ( match(block, "^([ \t]*\n)*$")) return; match(block, "^" mark "[ \t]"); indent = RLENGTH; + sub("^" mark "[ \t]", "", block); if (match(block, /\n[ \t]*\n/)) p = 1; @@ -779,7 +889,11 @@ function _list (block, mark, p, LOCAL, len, st, text, indent, task) { st = (RLENGTH == -1) ? length(block) + 1 : RSTART; text = substr(block, 1, st); block = substr(block, st + 1); - gsub("\n {0," indent "}", "\n", text); + # gsub("\n {0," indent "}", "\n", text); + # emulate greedy range matcher for mawk + it = "("; while ( indent > 0 ) { for (k = indent; k > 0; k--) { it = it " "; } it = it "|"; indent--; } + sub(/\|$/, ")?", it); sub(/^\($/, "", it); + gsub("\n" it, "\n", text); task = match( text, /^\[ \]/ ) ? "
  • " : \ match( text, /^\[-\]/ ) ? "
  • " : \ @@ -795,7 +909,7 @@ function _list (block, mark, p, LOCAL, len, st, text, indent, task) { return task text "
  • \n" _list(block, mark, p); } -function _dlist (block, LOCAL, len, st, text, indent, p) { +function _dlist (block, LOCAL, len, st, text, indent, it, p) { if (match( block, "^([ \t]*\n)*[^:\n \t][^\n]+\n" )) { len = RLENGTH; text = substr(block, 1, len); gsub( "(^\n*|\n*$)", "", text ); @@ -809,7 +923,11 @@ function _dlist (block, LOCAL, len, st, text, indent, p) { sub( "^([ \t]*\n)*", "", text); match(text, "^ ? ? ?:(\t| +)"); indent = RLENGTH; sub( "^ ? ? ?:(\t| +)", "", text); - gsub( "(^|\n) {0," indent "}", "\n", text ); + # gsub( "(^|\n) {0," indent "}", "\n", text ); + # emulate greedy range matcher for mawk + it = "("; while ( indent > 0 ) { for (k = indent; k > 0; k--) { it = it " "; } it = it "|"; indent--; } + sub(/\|$/, ")?", it); sub(/^\($/, "", it); + gsub( "(^|\n)" it, "\n", text ); text = _nblock(text); if (match( text, "^

    (]|\n$" )) @@ -827,12 +945,12 @@ BEGIN { # hls = "0 0 0 0 0 0"; # Universal Patterns - nu = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\_]|_[[:alnum:]])*" # not underline (except when escaped) - na = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\\\*])*" # not asterisk (except when escaped) - ieu = "_([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])_" # inner (underline) - isu = "__([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])__" # inner (underline) - iea = "\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*" # inner (asterisk) - isa = "\\*\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*\\*" # inner (asterisk) + nu = "([^_\\\\]|\\\\.|_[[:alnum:]])" # not underline (except when escaped, or inside a word) + na = "([^*\\\\]|\\\\.)" # not asterisk (except when escaped) + ieu = "_([^_[:space:]]|[^_[:space:]]" nu "*[^_[:space:]])_" # inner (underline) + isu = "__([^_[:space:]]|[^_[:space:]]" nu "*[^_[:space:]])__" # inner (underline) + iea = "\\*([^*[:space:]]|[^*[:space:]]" na "*[^*[:space:]])\\*" # inner (asterisk) + isa = "\\*\\*([^*[:space:]]|[^*[:space:]]" na "*[^*[:space:]])\\*\\*" # inner (asterisk) lix="\\[(\\\\[^\n]|[^]\n\\\\[])*\\]" # link text lid="(<(\\\\[^\n]|[^\n<>\\\\])*>|(\\\\.|[^()\"'\\\\])+|([^<\n\t ()\\\\]|\\\\[^\n])(\\\\[\n]|[^\n\t \\(\\)\\\\])*)" # link dest @@ -850,10 +968,17 @@ BEGIN { re_reflink = "(^|\n) ? ? ?\\[([^]\n]+)\\]: ([^ \t\n]+)(\n?[ \t]+(\"([^\"]+)\"|'([^']+)'|\\(([^)]+)\\)))?(\n|$)"; # /(^|\n) ? ? ?\[([^]\n]+)\]: ([^ \t\n]+)(\n?[ \t]+("([^"]+)"|'([^']+)'|\(([^)]+)\)))?(\n|$)/ while ( match(f, re_reflink ) ) { - rl_id = gensub( re_reflink, "\\2", 1, substr(f, RSTART, RLENGTH) ); - rl_href[rl_id] = gensub( re_reflink, "\\3", 1, substr(f, RSTART, RLENGTH) ); - rl_title[rl_id] = gensub( re_reflink, "\\5", 1, substr(f, RSTART, RLENGTH) ); - f = substr(f, RSTART + RLENGTH); + tt = th = ti = substr(f, RSTART, RLENGTH); f = substr(f, RSTART + RLENGTH); + sub("(^|\n) ? ? ?\\[", "", ti); sub("\\]: ([^ \t\n]+)(\n?[ \t]+(\"([^\"]+)\"|'([^']+)'|\\(([^)]+)\\)))?(\n.*)?$", "", ti); + sub("(^|\n) ? ? ?\\[([^]\n]+)\\]: ", "", th); sub("(\n?[ \t]+(\"([^\"]+)\"|'([^']+)'|\\(([^)]+)\\)))?(\n.*)?$", "", th); + if (match(tt, "(^|\n) ? ? ?\\[([^]\n]+)\\]: ([^ \t\n]+)(\n?[ \t]+(\"([^\"]+)\"|'([^']+)'|\\(([^)]+)\\)))(\n|$)")) { + sub("(^|\n) ? ? ?\\[([^]\n]+)\\]: ([^ \t\n]+)", "", tt); sub("^\n?[ \t]+", "", tt); sub("(\n.*)?$", "", tt); + } else { tt = ""; } + rl_id = ti; rl_href[rl_id] = th; rl_title[rl_id] = tt; + # rl_id = gensub( re_reflink, "\\2", 1, substr(f, RSTART, RLENGTH) ); + # rl_href[rl_id] = gensub( re_reflink, "\\3", 1, substr(f, RSTART, RLENGTH) ); + # rl_title[rl_id] = gensub( re_reflink, "\\5", 1, substr(f, RSTART, RLENGTH) ); + # f = substr(f, RSTART + RLENGTH); rl_title[rl_id] = substr( rl_title[rl_id], 2, length(rl_title[rl_id]) - 2 ); if ( rl_href[rl_id] ~ /<.*>/ ) rl_href[rl_id] = substr( rl_href[rl_id], 2, length(rl_href[rl_id]) - 2 ); } diff --git a/tests-markdown.sh b/tests-markdown.sh new file mode 100755 index 0000000..2bbcf5d --- /dev/null +++ b/tests-markdown.sh @@ -0,0 +1,1222 @@ +#!/bin/sh + +runtimes="gawk bawk mawk goawk" + +BR=' +' +CR="$(printf \r)" +fail() { printf '%s\n' "$@"; exit 1; } + +awk() { /bin/awk "$@"; } +md_gawk() { gawk -f markdown.awk "$@"; } +md_bawk() { busybox awk -f markdown.awk "$@"; } +md_mawk() { mawk -f markdown.awk "$@"; } +md_goawk() { goawk -f markdown.awk "$@"; } + +acnt=1 # assertion count +assert() { + local md comp="$2" msg="$3" ex + printf "%3i: %s ... " $acnt "$msg" + + for proc in $runtimes; do + printf '%s ' $proc + md="$(printf '%s' "$1" |md_"$proc")"; ex=$? + if [ "$ex" != 0 ]; then + printf "Fail!\nExit Code: %i\n" $ex + exit 1 + fi + if [ "$md" != "$comp" ]; then + printf "Fail!\n:\n%s\n:\n%s\n" "$md" "$comp" + exit 1 + fi + done + printf 'OK\n' + acnt=$((acnt + 1)) +} + +# Inline checks +printf '## Testing Inline markup ##\n' + +# strong / em / ... +assert '~~strikeout~~' '

    strikeout

    ' "strikeout" +assert '~~~strikeout~~' '

    ~strikeout

    ' "strikeout" +assert '^super^' '

    super

    ' "superscript" +assert '~sub~' '

    sub

    ' "subscript" + +assert "foo ${BR}bar" "

    foo
    ${BR}bar

    " 'double space line break' +assert '```©```' "

    &copy;

    " "code span escape" + +assert '_emphasized text_' '

    emphasized text

    ' "em" +assert '_emphasized_text_' '

    emphasized_text

    ' "em" +assert 'empha*sized* text_' '

    emphasized text_

    ' "em" +assert '__empha*sized* text__' '

    emphasized text

    ' "strong em" +assert '***strem***' '

    strem

    ' "strong em" +assert '***str**em*' '

    strem

    ' "em strong" +assert '_**strem**_' '

    strem

    ' "em strong" + +assert '*foo**str**bar**str**qua*' '

    foostrbarstrqua

    ' 'em strong asterisk' +assert '**foo*em*bar*em*qua**' '

    fooembaremqua

    ' 'strong em asterisk' + +assert '_foo__str__bar__str__qua_' '

    foo__str__bar__str__qua

    ' 'em embedded underscore' +assert '__foo_em_bar_em_qua__' '

    foo_em_bar_em_qua

    ' 'strong embedded underscore' +assert '_**str**foo**str**_' '

    strfoostr

    ' 'em strong mixed' + +assert '_foo_-> bar' '

    foo→ bar

    ' 'arrow' +assert '`_foo_-> bar`' '

    _foo_-> bar

    ' 'arrow' +assert ' <- comment' '

    <!-- comment --> ← comment

    ' 'arrow' + +# Escaping +assert '©' "

    ©

    " "escape" +assert '\©' "

    &copy;

    " "escape" +assert 'AT&T' "

    AT&T

    " "escape" +assert '`©`' "

    &copy;

    " "code span escape" + +# Wiki Links +assert '[[Link/]]' '

    Link/

    ' "Wiki Link" +assert '[[Link/|Linked Page]]' '

    Linked Page

    ' "Wiki Link" + +# Automatic Links +assert '' "

    https://de.wikipedia.org

    " "automatic link" +assert '' "

    http://de.wikipedia.org

    " "automatic link" +# assert '' "

    http://de.wikipedia.org

    " "automatic link" + +# Inline Links +assert '[Wikipedia](http://de.wikipedia.org)' "

    Wikipedia

    " "inline link" +assert '[Wikipedia](http://de.wikipedia.org "Online Encyclopedia")' "

    Wikipedia

    " "inline link" +assert '[Wikipedia]( "Online Encyclopedia")' "

    Wikipedia

    " "inline link" + +# Inline Images (note leading white space) +assert ' ![Testbild](Test Bild.jpg)' '

    Testbild

    ' "inline image" +assert ' ![Testbild](Test Bild.jpg "German Television *test* image ca. 1994")' '

    Testbild

    ' "inline image" +assert ' ![Testbild *ARD*](Test Bild.jpg){tv ard function-check}' '

    Testbild *ARD*

    ' "inline image" +# assert ' ![Testbild *ARD*](Test Bild.jpg){#tv .ard .function-check}' '

    Testbild *ARD*

    ' "inline image id/classes" + +assert '[![Wikipedia](wikilogo.png)]()'\ + '

    Wikipedia

    '\ + "Image Link" + +assert ' <" _foo_>>' '

    macro /test -- "* weird <args>" _foo_

    ' "Macros" + +# Block checks +printf '\n## Testing Block markup ##\n' + +assert \ +'foo + +bar' \ +'

    foo

    + +

    bar

    ' \ +'paragraphs' + +assert '%meta *data block* + ignored `no` __formatting__ +regular *data*' \ +'

    regular data

    ' \ +"meta data block" + +assert '> text in a block +> quote can be *emphasized* +and quotes continued + +until they end' \ +'

    text in a block +quote can be emphasized +and quotes continued

    + +

    until they end

    ' \ +'block quote' + +assert '| text in a line +| block can be *emphasized* +but not continued + +until they end' \ +'
    text in a line
    +block can be emphasized
    +

    but not continued

    + +

    until they end

    ' \ +'pandoc line block' + +assert ' indented code will + not be + *formatted* + but ­ ' \ +'
    indented code will
    +not be
    +*formatted*
    +but &shy; <escaped>
    ' \ +"indented code block" + +assert ' indented code will + not be + + *formatted* + but ­ ' \ +'
    indented code will
    +not be
    +
    +*formatted*
    +but &shy; <escaped>
    ' \ +"indented code block" + +assert ':::: tag +fenced _divs_ are regular text + +::: +and can contain another div +::: +::::' \ +'

    fenced divs are regular text

    + +

    and can contain another div

    +
    +
    ' \ +"pandoc fenced divs" + +assert '``` tag,code +fenced code will +not be +*formatted* +but ­ +```' \ +'
    fenced code will
    +not be
    +*formatted*
    +but &shy; <escaped>
    ' \ +"fenced code block" + +# Block Images +assert '![Testbild](Test Bild.jpg)' \ +'
    Testbild
    ' \ +"block image" + +assert '![Testbild](Test Bild.jpg "German Television *test* image ca. 1994")' \ +'
    Testbild
    German Television test image ca. 1994
    ' \ +"block image" + +assert '![Testbild *ARD*](Test Bild.jpg){tv ard function-check}' \ +'
    Testbild *ARD*
    ' \ +"block image tagged" + +# assert '![Testbild *ARD*](Test Bild.jpg){#tv .ard .function-check}' \ +# '
    Testbild *ARD*
    ' \ +# "block image tagged" + +# Headings +assert 'Heading first Order +============' \ +'

    Heading first Order

    +
    ' \ +'Heading h1' + +assert 'Heading first Order {.foo #bar} +============' \ +'

    Heading first Order

    +
    ' \ +'Heading h1 + attributes' + +assert 'Heading second Order +------------' \ +'

    Heading second Order

    +
    ' \ +'Heading h2' + +assert 'Heading second Order {.foo #bar} +------------' \ +'

    Heading second Order

    +
    ' \ +'Heading h2 + attributes' + +assert '#### Heading four' \ +'

    Heading four

    +
    ' \ +'Heading arbitrary' + +assert '###Heading three ######' \ +'

    Heading three

    +
    ' \ +'Heading arbitrary' + +assert '### Heading three ## {foo bar}' \ +'

    Heading three

    +
    ' \ +'Heading arbitrary + attributes' + +assert '# Heading \# # {foo bar}' \ +'

    Heading #

    +
    ' \ +'Heading arbitrary + attributes' + +assert 'Definition +: term +with line continuation + +: second term + +foo +: bar' \ +'
    +
    Definition
    +
    term +with line continuation
    +
    second term
    +
    foo
    +
    bar
    +
    ' \ +'Definition List' + +assert ' * list +* item + 1. sub list +* three +- new list' \ +'
      +
    • list
    • +
    • item

      +
        +
      1. sub list
      2. +
      +
    • +
    • three
    • +
    +
      +
    • new list
    • +
    ' \ +'Lists' + +assert '::: outer div +Nesting paragraph + +------- + +> ``` +> quoted code +> ``` +> +> > quoted quote +::: +' \ +'

    Nesting paragraph

    + +
    + +
    quoted code
    +

    quoted quote

    +
    + +
    ' \ +"Nesting" + +assert ' +| Col 1 | Col 2| Col 3 | +|-------|-------|------:| +| foo | *bar* | `qua` | +| 23 | 47 | 11 | +' \ +' + + + + +
    Col 1 Col 2 Col 3
    foo bar qua
    23 47 11
    ' \ +'Pipe Tables' + +# assert ' +# Col 1 | Col 2| Col 3 +# :-----:|-------|------: +# foo | *bar* | `qua` +# 23 | 47 | 11 | +# ' \ +# ' +# +# +# +# +#
    Col 1 Col 2 Col 3
    foo bar qua
    23 47 11
    ' \ +# 'Pipe Tables' + +assert '+---+---+---+ +|Col 1\\| Col\|2 | Col 3| ++===+:==:+===+ +| * foo1 | *bar* |```| +| * foo2 | **qua** |code | +| - foo3 | `quux` |```| ++-------+-----+----+ +| 23 | 47 | 11 | ++-------+-----+----+ +' \ +' + + + + +

    Col 1\

    +

    Col|2

    +

    Col 3

    +
      +
    • foo1
    • +
    • foo2
    • +
    +
      +
    • foo3
    • +
    +

    bar + qua + quux

    +
    code 
    +

    23

    +

    47

    +

    11

    +
    ' \ +'Grid Tables' + +assert '## foo + +# bar + +sub bar +------- + +### sub sub sub ### + +##sub2 bar {x} +' \ +'

    foo

    +

    bar

    +

    sub bar

    +

    sub sub sub

    +

    sub2 bar

    +
    ' \ +'Headline Nesting' + +# Reference syntax checks +printf '\n## Testing reference syntax ##\n' + +assert 'Foo bar [Link] [1] for show + +The same in [en][] + +[en]: +[1]: http://de.wikipedia.org "Online Encyclopedia"' \ +'

    Foo bar Link for show

    + +

    The same in en

    ' \ +"Reference Links" + +assert 'Foo bar [Link] [1] for show + +[en]: +[1]: http://de.wikipedia.org + "Online Encyclopedia"' \ +'

    Foo bar Link for show

    ' \ +"Reference Links" + +assert 'Foo bar ![Image] [1] for show + +The same as ![PNG][] + +[PNG]: +[1]: http://de.wikipedia.org/logo.jpg "Online Encyclopedia"' \ +'

    Foo bar Image for show

    + +

    The same as PNG

    ' \ +"Reference images" + +assert '![Image] [1] + +[PNG]: +[1]: http://de.wikipedia.org/logo.jpg "Online Encyclopedia"' \ +'
    Image
    Online Encyclopedia
    ' \ +"Reference images (block)" + +assert '<" _foo_>>' 'macro /test -- "* weird <args>" _foo_' "Macros/Block" + + +printf '\n## Testing example pages ##\n' + +assert 'Markdown.awk +============ + +Supported Features / TODO: +-------------------------- +- [x] done +- [ ] todo +- [-] not planned +- ? unsure (whether to implement) +- [/] partial + +### Basic Markdown - Block elements: ### +- [x] Paragraphs + - [x] Double space line breaks +- [x] Proper block element nesting +- [x] Headings +- [x] ATX-Style Headings +- [x] Blockquotes +- [x] Lists (ordered, unordered) +- [x] Code blocks (using indention) +- [x] Horizontal rules +- [x] Verbatim HTML block (disabled by default) + +### Basic Markdown - Inline elements: ### +- [x] Links +- [x] Reference style links +- [x] Emphasis *em*/**strong** (*Asterisk*, _Underscore_) +- [x] `code`, also ``code containing `backticks` `` +- [x] Images / reference style images +- [x] +- [x] backslash escapes +- [x] Verbatim HTML inline (disabled by default) +- [x] HTML escaping + +NOTE: Set the environment variable `MD_HTML=true` to enable verbatim HTML + +### Extensions - Block elements: ### +- [x] Automatic
    -wrapping (custom) +- ? Heading identifiers (php md, pandoc) + - [x] Heading attributes (custom) +- [x] Automatic heading identifiers (custom) +- [x] Fenced code blocks (php md, pandoc) + - [x] Fenced code attributes +- [x] Images (as block elements,
    -wrapped) (custom) + - [x] reference style block images +- [/] Tables + - ? Simple table (pandoc) + - ? Multiline table (pandoc) + - [x] Grid table (pandoc) + - [x] Headerless + - [x] Pipe table (php md, pandoc) +- [x] Line blocks (pandoc) +- [x] Task lists (pandoc, custom) +- [x] Definition lists (php md, pandoc) +- [-] Numbered example lists (pandoc) +- [-] Metadata blocks (pandoc) +- [x] Metadata blocks (custom) +- [x] Fenced Divs (pandoc) + +### Extensions - Inline elements: ### +- [x] Ignore embedded_underscores (php md, pandoc) +- [x] ~~strikeout~~ (pandoc) +- [x] ^Superscript^ ~Subscript~ (pandoc) +- [-] Bracketed spans (pandoc) + - [-] Inline attributes (pandoc) +- [x] Image attributes (custom, pandoc inspired, not for reference style) +- [x] Wiki style links [[PageName]] / [[PageName|Link Text]] +- [-] TEX-Math (pandoc) +- ? Footnotes (php md) +- ? Abbreviations (php md) +- ? "Curly quotes" (smartypants) +- [ ] em-dashes (--) (smartypants old) +- ? ... three-dot ellipsis (smartypants) +- [-] en-dash (smartypants) +- [ ] Automatic em-dash / en-dash +- [x] Automatic -> Arrows <- (custom) + +Compatibility +------------- +Markdown.awk can run in GNU awk (`gawk`) and in Busybox awk. It is _not_ fully POSIX compliant and does not run in `mawk` or `nawk`. In particular it makes heavy use of the `gensub()` function and its ability to use paranthesized subexpressions in the replacement text. This feature is not available in the POSIX specified `sub()` and `gsub()` functions. Hence it cannot be replaced without effort. + +Tests +----- +[Link with Title](https://en.wikipedia.org/wiki/Markdown "Markdown in Wikipedia"), *emphasis*, **strong**, **strong containing *emphasis***, `inline code`, `` code with `backticks` ``. See more tests [here](./tests/).' \ +'

    Markdown.awk

    +

    Supported Features / TODO:

    +
      +
    • done
    • +
    • todo
    • +
    • not planned
    • +
    • ? unsure (whether to implement)
    • +
    • partial
    • +
    +

    Basic Markdown - Block elements:

    +
      +
    • Paragraphs

      +
        +
      • Double space line breaks
      • +
      +
    • +
    • Proper block element nesting
    • +
    • Headings
    • +
    • ATX-Style Headings
    • +
    • Blockquotes
    • +
    • Lists (ordered, unordered)
    • +
    • Code blocks (using indention)
    • +
    • Horizontal rules
    • +
    • Verbatim HTML block (disabled by default)
    • +
    +

    Basic Markdown - Inline elements:

    +
      +
    • Links
    • +
    • Reference style links
    • +
    • Emphasis em/strong (Asterisk, Underscore)
    • +
    • code, also code containing `backticks`
    • +
    • Images / reference style images
    • +
    • <automatic links>
    • +
    • backslash escapes
    • +
    • Verbatim HTML inline (disabled by default)
    • +
    • HTML escaping
    • +
    +

    NOTE: Set the environment variable MD_HTML=true to enable verbatim HTML

    + +

    Extensions - Block elements:

    +
      +
    • Automatic <section>-wrapping (custom)
    • +
    • ? Heading identifiers (php md, pandoc)

      +
        +
      • Heading attributes (custom)
      • +
      +
    • +
    • Automatic heading identifiers (custom)
    • +
    • Fenced code blocks (php md, pandoc)

      +
        +
      • Fenced code attributes
      • +
      +
    • +
    • Images (as block elements, <figure>-wrapped) (custom)

      +
        +
      • reference style block images
      • +
      +
    • +
    • Tables

      +
        +
      • ? Simple table (pandoc)
      • +
      • ? Multiline table (pandoc)
      • +
      • Grid table (pandoc)

        +
          +
        • Headerless
        • +
        +
      • +
      • Pipe table (php md, pandoc)
      • +
      +
    • +
    • Line blocks (pandoc)
    • +
    • Task lists (pandoc, custom)
    • +
    • Definition lists (php md, pandoc)
    • +
    • Numbered example lists (pandoc)
    • +
    • Metadata blocks (pandoc)
    • +
    • Metadata blocks (custom)
    • +
    • Fenced Divs (pandoc)
    • +
    +

    Extensions - Inline elements:

    +
      +
    • Ignore embedded_underscores (php md, pandoc)
    • +
    • strikeout (pandoc)
    • +
    • Superscript Subscript (pandoc)
    • +
    • Bracketed spans (pandoc)

      +
        +
      • Inline attributes (pandoc)
      • +
      +
    • +
    • Image attributes (custom, pandoc inspired, not for reference style)
    • +
    • Wiki style links PageName / Link Text
    • +
    • TEX-Math (pandoc)
    • +
    • ? Footnotes (php md)
    • +
    • ? Abbreviations (php md)
    • +
    • ? "Curly quotes" (smartypants)
    • +
    • em-dashes (--) (smartypants old)
    • +
    • ? ... three-dot ellipsis (smartypants)
    • +
    • en-dash (smartypants)
    • +
    • Automatic em-dash / en-dash
    • +
    • Automatic → Arrows ← (custom)
    • +
    +

    Compatibility

    +

    Markdown.awk can run in GNU awk (gawk) and in Busybox awk. It is not fully POSIX compliant and does not run in mawk or nawk. In particular it makes heavy use of the gensub() function and its ability to use paranthesized subexpressions in the replacement text. This feature is not available in the POSIX specified sub() and gsub() functions. Hence it cannot be replaced without effort.

    + +

    Tests

    +

    Link with Title, emphasis, strong, strong containing emphasis, inline code, code with `backticks`. See more tests here.

    +
    ' \ +'Full Page (cgilite markdown)' + +assert 'Headline First Order +==================== + +Headline Second Order +--------------------- + + Code Block + with indentation + +> Blockquote +> ---------- +> like in an email + +### Headline 3rd order + +- unordered List +1. with sub points + + sometimes longer ones + +2. which are ordered +3. [ ] and have a Todo item +- more list points + - and a sublist +- [x] some of which ae done + +---------- ++ A lazy, lazy, list +item. + ++ Another one; this looks +bad but is legal. + + Second paragraph of second +list item. + +--------- + +~~~ {.blue} +Fenced Code Block +# with verbatim Text +`and an attribute` +~~~ + +| The limerick packs laughs anatomical +| In space that is quite economical. +| But the *good* ones I'\''ve seen +| So seldom are *clean* +| And the clean ones so seldom are comical + +| The Right Honorable Most Venerable and Righteous Samuel L. + Constable, Jr. +| 200 Main St. +| Berkeley, CA 94718 + +Term 1 + +: This is a definition with two paragraphs. Lorem ipsum + dolor sit amet, consectetuer adipiscing elit. Aliquam + hendrerit mi posuere lectus. + + Vestibulum enim wisi, viverra nec, fringilla in, laoreet + vitae, risus. + +: Second definition for term 1, also wrapped in a paragraph + because of the blank line preceding it. + +Term 2 + +: This definition has a code block, a blockquote and a list. + + code block. + + > block quote + > on two lines. + + 1. first list item + 2. second list item' \ +'

    Headline First Order

    +

    Headline Second Order

    +
    Code Block
    +with indentation
    +

    Blockquote

    +

    like in an email

    +
    + +

    Headline 3rd order

    +
      +
    • unordered List
    • +
    +
      +
    1. with sub points

      + +

      sometimes longer ones

      +
    2. +
    3. which are ordered

      +
    4. +
    5. and have a Todo item

      +
    6. +
    +
      +
    • more list points

      +
        +
      • and a sublist
      • +
      +
    • +
    • some of which ae done
    • +
    +
    +
      +
    • A lazy, lazy, list +item.

      +
    • +
    • Another one; this looks +bad but is legal.

      + +

      Second paragraph of second +list item.

      +
    • +
    +
    + +
    Fenced Code Block
    +# with verbatim Text
    +`and an attribute`
    +
    The limerick packs laughs anatomical
    +In space that is quite economical.
    + But the good ones I've seen
    + So seldom are clean
    +And the clean ones so seldom are comical
    +
    The Right Honorable Most Venerable and Righteous Samuel L. Constable, Jr.
    +200 Main St.
    +Berkeley, CA 94718
    +
    +
    Term 1
    +

    This is a definition with two paragraphs. Lorem ipsum +dolor sit amet, consectetuer adipiscing elit. Aliquam +hendrerit mi posuere lectus.

    + +

    Vestibulum enim wisi, viverra nec, fringilla in, laoreet +vitae, risus.

    +
    +
    Second definition for term 1, also wrapped in a paragraph +because of the blank line preceding it.
    +
    Term 2
    +

    This definition has a code block, a blockquote and a list.

    + +
    code block.
    +

    block quote +on two lines.

    + +
      +
    1. first list item
    2. +
    3. second list item
    4. +
    +
    +
    +
    ' \ +'Full Page (MD Tests)' + +assert '%css shellwiki.css + +Shellwiki +========= +Shellwiki is a Wiki and Content Management System with minimal dependencies. It can run on embedded devices, as well as full size web servers. Its goals are: + + - **easy deployment** + + *ShellWiki* can run on any Unix-Like web server. It requires no + scripting languages beyound the regular (Bourne style) Unix + shell, `awk`, and `sed`, all of which can be providede by + `busybox`. It can be launched via `netcat`, `inetd`, `systemd`, + or any cgi capable webserver like `apache` or `lighttpd`. + *ShellWiki* can run easily on embedded systems like OpenWRT or + RaspberryPi, and just as easily on internet web servers + providing multisite setups. + + - **accessibility** + + *ShellWiki* requires no browserside scripting. It aims to be rendered + in all web browsers including `w3m` and `links` besides graphical + browsers like `chromium` or `firefox`. It is as accessible on mobile + screens as on desktop computers. + *ShellWiki* uses the well known `markdown` syntax for formatting and + aims to provide consistent UI controls for various use cases. + + - **adaptability** + + *ShellWiki* is extensible through plugins and provides theming and + styling capabilities that make it suitable not only as a wiki, but + also as a CMS, including access scopes for different authors and + stylisticly distinct subpages. + + - **simplicity** + + *ShellWiki* avoids complexity in both software design and user + interface. It aims to be secure and predictable. Extensions can + be written and modified by system administrators. + +<> + +Features +-------- + - **Markdown Wiki Syntax** + + The wiki syntax is based on [John Grubers Markdown](https://daringfireball.net/projects/markdown/) + with extensions inspired by [Pandoc](https://pandoc.org/MANUAL.html#pandocs-markdown), + [PHP Markdown Extra](https://michelf.ca/projects/php-markdown/extra/), and + [Github Flavored Markdown](https://github.github.com/gfm/). + Additional Macros are provided to enable functions like an automatic table of contents, listing of + sub pages, etc. + + See [Markdown](/software/cgilite/markdown/) + and [Macros](macros/) + + - **Plain file Storage** + + Pages and attachments are stored as plain files on disk. There is no need for a separate database + system. + + - **Git revisioning** + + If `git` is available on the web server, pages can be revisioned so that past versions can be + revisited. Optionally attachments can be revisioned too. Server administrators may use the git + archives to synchronise sites across servers by adding their own mechanics. + + - **Multisite Installation** + + Code and data directories are stricly separate on the server. Directory pathes are obtained from + environment variables, so that multiple sites can be served from the same installation directory. + + See also: [Installation](installation/) + + - **Semantic HTML5** + + for accessible rendering of pages + + - **Descriptive Page Names** + + URLs of pages can be freely provided by the user. User access can be constrained to specific sub + pages. Within their access permissions users can move and rename pages as they like. + + - **File Upload / Attachment** + + While pages are merely text documents themselves, users can upload additional attachments and + link to them in a page. Images and media files can be embedded directly into a page. + + - **Image scaling** + + If `ImageMagick` is available on the web server, huge attachment images are automatically compressed + and scaled to HD resolution when referred to in a page. + Of course the original version can still be linked to. + + - **Permissions via ACL** + + Grant read/write access for pages and sub-pages + + - **User provided CSS** + + Aside from full theming in the installation directory, pages can be styled using CSS files + uploaded as attachments. + + - **No reliance on Javascript** + + Authors and visitors can use the site without being forced to run untrusted code. + The main theme still provides collapsible menus and a responsive layout. + + - **Complete GDPR Compliance** without consent walls + + Because shellwiki does not track page visitors and does not + serve cookies to visitors by default it does not need to coerce + visitors into handling GDPR "consent" forms. + + (Login for authors still requires a session cookie) + + - **True multilanguage capability** + + - Pages can be translated + - Switching language does not require a cookie + - Fallback language for missing translations + - Users stay on a translated version, even if single page translations are missing + + - **Full text indexing and search** + + Shellwiki contains its own basic text indexer without external dependencies. + + - **Extensibility** through + + - [Themes](themes/) + - [Macros](macros/) + - [URL Handlers](handlers/) + - [Custom Syntax parsers](parsers/) + +Dependencies +------------ +Shellwiki is based on [cgilite](/software/cgilite/), which is included in the installation. It is written in posix compliant shell script, and the markdown renderer is written in ~~posix compliant~~ AWK. The entire wiki system can run with nothing more than a busybox. In fact it can be served from the rescue shell in a Debian initrd, or from an OpenWRT router. + +**Its precise requirements are:** + + - A Posix Shell (as provided by busybox, but bash is OK) + - An AWK interpreter (as provided by busybox, but GNU AWK is OK) + - `mawk` and `nawk` will currently not work + - inetd (as provided by busybox) + + **or** any CGI-Capable web server + + - _Optional:_ GIT for revisioning + - _Optional:_ ImageMagick for image compression + - _Optional:_ Sendmail for sending password reminders, etc. + +Installation +------------ +Also see -> [[installation/]] + +You can try out shellwiki right now using busybox: + + ~$ git clone https://git.plutz.net/git/shellwiki ~/shellwiki + ~$ _DATA=~/wikidata busybox nc -llp 1080 -e ~/shellwiki/index.cgi + +For additional examples, regarding permanent installation and configuration in webservers see [[installation/]]. + +Syntax +------ +The wiki syntax is based on John Grubers [Markdown](https://daringfireball.net/projects/markdown/) with extensions borrowed from [Pandoc](https://pandoc.org/MANUAL%202.html#pandocs-markdown) and [PHP Markdown Extra](https://michelf.ca/projects/php-markdown/extra/). The Markdown parser is provided by [Cgilite](/software/cgilite/) and its full documentation can be looked at [here](/software/cgilite/markdown/). + +<> + +Macros +------ +Also see -> [[macros/]] + +In addition to the Markdown syntax, wiki pages can include Macros, which perform additional functions on a page, like generating an image gallery, including parts of other pages, etc. Macros make Shellwiki truly dynamic and flexible. + +For example you can include a table of content for the current page by including the line + + <> + +in your page. Macros can receive additional parameters, which modify their behaviour. + +Macros are the most easy to write type of extension. See [Macros](macros/) for a full list of available macros. + +Themes +------ +Also see -> [[theming/]] + +While Shellwiki supports plugins for [theming](dev-theming/), it'\''s apearance can mostly be configured by the user. Pages can be configured to use custom CSS files. In addition page headers and footers are themselves wiki pages which can be modified to add menus, custom logos, links, etc. The same goes for error pages. + +For an example, see the [technical pages](/[wiki]/) for this wiki. + +Multiple Languages +------------------ +To enable a multilingual setup you must set a default language in your configuration environment: + +``` +export LANGUAGE_DEFAULT=en +``` + +Once this is the case, pagenames starting with a colon (`:`) will be considered translated versions of their parent pages. I.e. the pages `/`, `/:de`, and `/:fr` will serve as the default, german, and french home page respectively. + +The names of the languages can be arbitrary, but I recommend using [ISO-639](https://en.wikipedia.org/wiki/ISO_639-1) codes, because the code is used in the `lang=""` attribute of the pages top level html element. You can however make up non-standardised or fantastic language names as well. + +Links on each page will automatically be suffixed with the same language tag, so a visitor keeps browsing the same language without needing a cookie. Attachments should only be uploaded to the default language page, and attachment links in the translated pages will correctly point to the main page attachments. You can create a language menu on the header page, simply by linking to `./:en`, `./:es` , `./:fr`, etc. + +Header, footer, and error pages will be included from their respective language version, as will all macro includes, etc. Should a page not exist in a given language, the default page will be displayed instead. However, included elements will still be taken from the respective language version, possibly mixing languages between the selected user language and the default. + +### Constraints of the current implementation + - There can be only one default language, with no priority of different fallback languages + - Page URLs can currently not be translated. Doing so would require a model for manually assigning translated page names and would not be trivial to use. + +Developer Documentation +----------------------- +How to write: + + - [Themes](dev-theming/) + - [Macros](dev-macros/) + - [Handlers](dev-handlers/) + - [Parsers](dev-parsers/)' \ +'

    Shellwiki

    +

    Shellwiki is a Wiki and Content Management System with minimal dependencies. It can run on embedded devices, as well as full size web servers. Its goals are:

    +
      +
    • easy deployment

      + +

      ShellWiki can run on any Unix-Like web server. It requires no +scripting languages beyound the regular (Bourne style) Unix +shell, awk, and sed, all of which can be providede by +busybox. It can be launched via netcat, inetd, systemd, +or any cgi capable webserver like apache or lighttpd.
      +ShellWiki can run easily on embedded systems like OpenWRT or +RaspberryPi, and just as easily on internet web servers +providing multisite setups.

      +
    • +
    • accessibility

      + +

      ShellWiki requires no browserside scripting. It aims to be rendered +in all web browsers including w3m and links besides graphical +browsers like chromium or firefox. It is as accessible on mobile +screens as on desktop computers.
      +ShellWiki uses the well known markdown syntax for formatting and +aims to provide consistent UI controls for various use cases.

      +
    • +
    • adaptability

      + +

      ShellWiki is extensible through plugins and provides theming and +styling capabilities that make it suitable not only as a wiki, but +also as a CMS, including access scopes for different authors and +stylisticly distinct subpages.

      +
    • +
    • simplicity

      + +

      ShellWiki avoids complexity in both software design and user +interface. It aims to be secure and predictable. Extensions can +be written and modified by system administrators.

      +
    • +
    +toc 2 2

    Features

    +
      +
    • Markdown Wiki Syntax

      + +

      The wiki syntax is based on John Grubers Markdown +with extensions inspired by Pandoc, +PHP Markdown Extra, and +Github Flavored Markdown. +Additional Macros are provided to enable functions like an automatic table of contents, listing of +sub pages, etc.

      + +

      See Markdown
      +and Macros

      +
    • +
    • Plain file Storage

      + +

      Pages and attachments are stored as plain files on disk. There is no need for a separate database +system.

      +
    • +
    • Git revisioning

      + +

      If git is available on the web server, pages can be revisioned so that past versions can be +revisited. Optionally attachments can be revisioned too. Server administrators may use the git +archives to synchronise sites across servers by adding their own mechanics.

      +
    • +
    • Multisite Installation

      + +

      Code and data directories are stricly separate on the server. Directory pathes are obtained from +environment variables, so that multiple sites can be served from the same installation directory.

      + +

      See also: Installation

      +
    • +
    • Semantic HTML5

      + +

      for accessible rendering of pages

      +
    • +
    • Descriptive Page Names

      + +

      URLs of pages can be freely provided by the user. User access can be constrained to specific sub +pages. Within their access permissions users can move and rename pages as they like.

      +
    • +
    • File Upload / Attachment

      + +

      While pages are merely text documents themselves, users can upload additional attachments and +link to them in a page. Images and media files can be embedded directly into a page.

      +
    • +
    • Image scaling

      + +

      If ImageMagick is available on the web server, huge attachment images are automatically compressed +and scaled to HD resolution when referred to in a page. +Of course the original version can still be linked to.

      +
    • +
    • Permissions via ACL

      + +

      Grant read/write access for pages and sub-pages

      +
    • +
    • User provided CSS

      + +

      Aside from full theming in the installation directory, pages can be styled using CSS files +uploaded as attachments.

      +
    • +
    • No reliance on Javascript

      + +

      Authors and visitors can use the site without being forced to run untrusted code. +The main theme still provides collapsible menus and a responsive layout.

      +
    • +
    • Complete GDPR Compliance without consent walls

      + +

      Because shellwiki does not track page visitors and does not +serve cookies to visitors by default it does not need to coerce +visitors into handling GDPR "consent" forms.

      + +

      (Login for authors still requires a session cookie)

      +
    • +
    • True multilanguage capability

      +
        +
      • Pages can be translated
      • +
      • Switching language does not require a cookie
      • +
      • Fallback language for missing translations
      • +
      • Users stay on a translated version, even if single page translations are missing
      • +
      +
    • +
    • Full text indexing and search

      + +

      Shellwiki contains its own basic text indexer without external dependencies.

      +
    • +
    • Extensibility through

      + +
    • +
    +

    Dependencies

    +

    Shellwiki is based on cgilite, which is included in the installation. It is written in posix compliant shell script, and the markdown renderer is written in posix compliant AWK. The entire wiki system can run with nothing more than a busybox. In fact it can be served from the rescue shell in a Debian initrd, or from an OpenWRT router.

    + +

    Its precise requirements are:

    +
      +
    • A Posix Shell (as provided by busybox, but bash is OK)

      +
    • +
    • An AWK interpreter (as provided by busybox, but GNU AWK is OK)

      +
        +
      • mawk and nawk will currently not work
      • +
      +
    • +
    • inetd (as provided by busybox)

      + +

      or any CGI-Capable web server

      +
    • +
    • Optional: GIT for revisioning

      +
    • +
    • Optional: ImageMagick for image compression

      +
    • +
    • Optional: Sendmail for sending password reminders, etc.

      +
    • +
    +

    Installation

    +

    Also see → installation/

    + +

    You can try out shellwiki right now using busybox:

    + +
    ~$ git clone https://git.plutz.net/git/shellwiki ~/shellwiki
    +~$ _DATA=~/wikidata busybox nc -llp 1080 -e ~/shellwiki/index.cgi
    +

    For additional examples, regarding permanent installation and configuration in webservers see installation/.

    + +

    Syntax

    +

    The wiki syntax is based on John Grubers Markdown with extensions borrowed from Pandoc and PHP Markdown Extra. The Markdown parser is provided by Cgilite and its full documentation can be looked at here.

    + +include --nolink /[wiki]/editorhelp/

    Macros

    +

    Also see → macros/

    + +

    In addition to the Markdown syntax, wiki pages can include Macros, which perform additional functions on a page, like generating an image gallery, including parts of other pages, etc. Macros make Shellwiki truly dynamic and flexible.

    + +

    For example you can include a table of content for the current page by including the line

    + +
    <<toc>>
    +

    in your page. Macros can receive additional parameters, which modify their behaviour.

    + +

    Macros are the most easy to write type of extension. See Macros for a full list of available macros.

    + +

    Themes

    +

    Also see → theming/

    + +

    While Shellwiki supports plugins for theming, it's apearance can mostly be configured by the user. Pages can be configured to use custom CSS files. In addition page headers and footers are themselves wiki pages which can be modified to add menus, custom logos, links, etc. The same goes for error pages.

    + +

    For an example, see the technical pages for this wiki.

    + +

    Multiple Languages

    +

    To enable a multilingual setup you must set a default language in your configuration environment:

    + +
    export LANGUAGE_DEFAULT=en
    +

    Once this is the case, pagenames starting with a colon (:) will be considered translated versions of their parent pages. I.e. the pages /, /:de, and /:fr will serve as the default, german, and french home page respectively.

    + +

    The names of the languages can be arbitrary, but I recommend using ISO-639 codes, because the code is used in the lang="" attribute of the pages top level html element. You can however make up non-standardised or fantastic language names as well.

    + +

    Links on each page will automatically be suffixed with the same language tag, so a visitor keeps browsing the same language without needing a cookie. Attachments should only be uploaded to the default language page, and attachment links in the translated pages will correctly point to the main page attachments. You can create a language menu on the header page, simply by linking to ./:en, ./:es , ./:fr, etc.

    + +

    Header, footer, and error pages will be included from their respective language version, as will all macro includes, etc. Should a page not exist in a given language, the default page will be displayed instead. However, included elements will still be taken from the respective language version, possibly mixing languages between the selected user language and the default.

    + +

    Constraints of the current implementation

    +
      +
    • There can be only one default language, with no priority of different fallback languages
    • +
    • Page URLs can currently not be translated. Doing so would require a model for manually assigning translated page names and would not be trivial to use.
    • +
    +

    Developer Documentation

    +

    How to write:

    + +
    ' \ +'Full Page (ShellWiki)' + +printf '\nAll tests passed!\n'