From c46fa28d9b531c7fe1ffa5d1bbce39a4b71f6597 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Paul=20H=C3=A4nsch?= Date: Fri, 30 Aug 2024 14:38:22 +0200 Subject: [PATCH] avoid some recursion vor better compatibility and performance --- markdown.awk | 458 ++++++++++++++++++++++++++++----------------------- 1 file changed, 250 insertions(+), 208 deletions(-) diff --git a/markdown.awk b/markdown.awk index 26fc11b..d1c8b5b 100755 --- a/markdown.awk +++ b/markdown.awk @@ -121,217 +121,259 @@ function URL ( text, sharp ) { return text; } -function inline( line, LOCAL, len, text, code, href, guard ) { - if ( line ~ /^$/ ) { # Recursion End - return ""; - - # omit processing of escaped characters - } else if ( line ~ /^\\./) { - return HTML(substr(line, 2, 1)) inline( substr(line, 3) ); - - # hard brakes - } else if ( match(line, /^ \n/) ) { - return "
\n" inline( substr(line, RLENGTH + 1) ); - - # ``code spans`` - } else if ( match( line, /^`+/) ) { - len = RLENGTH - guard = substr( line, 1, len ) - if ( match(line, guard ".*" guard) ) { - code = substr( line, len + 1, match( substr(line, len + 1), guard ) - 1) - len = 2 * length(guard) + length(code) - # strip single surrounding white spaces - gsub( /^ | $/, "", code) - # escape HTML within code span - gsub( /&/, "\\&", code ); gsub( //, "\\>", code ); - return "" code "" inline( substr( line, len + 1 ) ) - } - - # Macros - } else if ( match( line, /^<<([^>]|>[^>])+>>/ ) ) { - len = RLENGTH; - return "" HTML( substr( line, 3, len - 4 ) ) "" inline(substr(line, len + 1)); - - # Wiki style links - } else if ( match( line, /^\[\[([^]|]+)(\|[^]]+)?\]\]/) ) { - len = RLENGTH; href = text = substr(line, 1, len); - sub(/^\[\[/, "", href); sub(/(\|([^]]+))?\]\].*$/, "", href); - sub(/^\[\[([^]|]+)/, "", text); sub(/\]\].*$/, "", text); sub(/^\|/, "", text); - # sub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\1", href ); - # sub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\3", text ); - if ( ! text ) text = href; - return "" HTML(text) "" inline( substr( line, len + 1) ); - - # quick links ("automatic links" in md doc) - } else if ( match( line, /^<[a-zA-Z]+:\/\/([-\.[:alnum:]]+)(:[0-9]*)?(\/[^>]*)?>/ ) ) { - len = RLENGTH; - href = HTML( substr( line, 2, len - 2) ); - return "" href "" inline( substr( line, len + 1) ); - - # quick link email - } else if ( match( line, /^<[a-zA-Z0-9.!#$%&'\''*+\/=?^_`{|}~-]+@[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>/ ) ) { - len = RLENGTH; - href = HTML( substr( line, 2, len - 2) ); - return "" href "" inline( substr( line, len + 1) ); - - # Verbatim inline HTML - } else if ( AllowHTML && match( line, /^(|<\?([^\?]|\?[^>])*\?>|]*>|])*\]\]>|<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)/) ) { - len = RLENGTH; - return substr( line, 1, len) inline(substr(line, len + 1)); - - # inline links - } else if ( match(line, "^" lii "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)") ) { - len = RLENGTH; - text = href = title = substr( line, 1, len); - sub("^\\[", "", text); sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)$", "", text); - sub("^" lii "\\([\n\t ]*", "", href); sub("([\n\t ]+" lit ")?[\n\t ]*\\)$", "", href); - sub("^" lii "\\([\n\t ]*" lid, "", title); sub("[\n\t ]*\\)$", "", title); sub("^[\n\t ]+", "", title); - - if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } - if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } - else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); } - else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); } - - gsub(/\\/, "", href); gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title); - - return "" \ - inline( text ) "" inline( substr( line, len + 1) ); - - # reference style links - } else if ( match(line, /^\[([^]]+)\] ?\[([^]]*)\]/ ) ) { - len = RLENGTH; text = id = substr(line, 1, len); - sub(/\n.*$/, "", text); sub(/^\[/, "", text); sub(/\] ?\[([^\n]*)\].*$/, "", text); - sub(/\n.*$/, "", id); sub(/^\[([^]]+)\] ?\[/, "", id); sub(/\].*$/, "", id); - # text = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, text ); - # id = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, id ); - if ( ! id ) id = text; - if ( rl_href[id] && rl_title[id] ) { - return "" inline(text) "" inline( substr( line, len + 1) ); - } else if ( rl_href[id] ) { - return "" inline(text) "" inline( substr( line, len + 1) ); - } else { - return "" HTML(substr(line, 1, len)) inline( substr(line, len + 1) ); - } - - # inline images - } else if ( match(line, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?") ) { - len = RLENGTH; text = href = title = attrib = substr( line, 1, len); - - sub("^!\\[", "", text); - sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", text); - - sub("^!" lix "\\([\n\t ]*", "", href); - sub("([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", href); - - sub("^!" lix "\\([\n\t ]*" lid, "", title); - sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", title); - sub("^[\n\t ]+", "", title); +function inline( line, LOCAL, len, text, code, href, guard, ret ) { + ret = ""; + while (line !~ /^$/) { + # omit processing of escaped characters + if ( line ~ /^\\./) { + ret = ret HTML(substr(line, 2, 1)); line = substr(line, 3); + continue; + + # hard brakes + } else if ( match(line, /^ \n/) ) { + ret = ret "
\n"; line = substr(line, RLENGTH + 1); + continue; + + # ``code spans`` + } else if ( match( line, /^`+/) ) { + len = RLENGTH + guard = substr( line, 1, len ) + if ( match(line, guard ".*" guard) ) { + code = substr( line, len + 1, match( substr(line, len + 1), guard ) - 1) + len = 2 * length(guard) + length(code) + # strip single surrounding white spaces + gsub( /^ | $/, "", code) + # escape HTML within code span + gsub( /&/, "\\&", code ); gsub( //, "\\>", code ); + ret = ret "" code ""; line = substr( line, len + 1 ); + continue; + } - sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib); - sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib); + # Macros + } else if ( match( line, /^<<([^>]|>[^>])+>>/ ) ) { + len = RLENGTH; + ret = ret "" HTML( substr( line, 3, len - 4 ) ) ""; line = substr(line, len + 1); + continue; + + # Wiki style links + } else if ( match( line, /^\[\[([^]|]+)(\|[^]]+)?\]\]/) ) { + len = RLENGTH; href = text = substr(line, 1, len); + sub(/^\[\[/, "", href); sub(/(\|([^]]+))?\]\].*$/, "", href); + sub(/^\[\[([^]|]+)/, "", text); sub(/\]\].*$/, "", text); sub(/^\|/, "", text); + # sub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\1", href ); + # sub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\3", text ); + if ( ! text ) text = href; + ret = ret "" HTML(text) ""; line = substr( line, len + 1); + continue; + + # quick links ("automatic links" in md doc) + } else if ( match( line, /^<[a-zA-Z]+:\/\/([-\.[:alnum:]]+)(:[0-9]*)?(\/[^>]*)?>/ ) ) { + len = RLENGTH; + href = HTML( substr( line, 2, len - 2) ); + ret = ret "" href ""; line = substr( line, len + 1); + continue; + + # quick link email + # } else if ( match( line, /^<[a-zA-Z0-9.!#$%&'\''*+\/=?^_`{|}~-]+@[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>/ ) ) { + } else if ( match( line, /^<[a-zA-Z0-9.!#$%&'\''*+\/=?^_`{|}~-]+@[a-zA-Z0-9]([a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9])?)*>/ ) ) { + len = RLENGTH; + href = HTML( substr( line, 2, len - 2) ); + ret = ret "" href ""; line = substr( line, len + 1); + continue; + + # Verbatim inline HTML + } else if ( AllowHTML && match( line, /^(|<\?([^\?]|\?[^>])*\?>|]*>|])*\]\]>|<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)/) ) { + len = RLENGTH; + ret = ret substr( line, 1, len); line =substr(line, len + 1); + continue; + + # inline links + } else if ( match(line, "^" lii "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)") ) { + len = RLENGTH; + text = href = title = substr( line, 1, len); + sub("^\\[", "", text); sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)$", "", text); + sub("^" lii "\\([\n\t ]*", "", href); sub("([\n\t ]+" lit ")?[\n\t ]*\\)$", "", href); + sub("^" lii "\\([\n\t ]*" lid, "", title); sub("[\n\t ]*\\)$", "", title); sub("^[\n\t ]+", "", title); + + if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } + if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } + else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); } + else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); } + + gsub(/\\/, "", href); gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title); + + ret = ret "" \ + inline( text ) ""; + line = substr( line, len + 1); + continue; + + # reference style links + } else if ( match(line, /^\[([^]]+)\] ?\[([^]]*)\]/ ) ) { + len = RLENGTH; text = id = substr(line, 1, len); + sub(/\n.*$/, "", text); sub(/^\[/, "", text); sub(/\] ?\[([^\n]*)\].*$/, "", text); + sub(/\n.*$/, "", id); sub(/^\[([^]]+)\] ?\[/, "", id); sub(/\].*$/, "", id); + # text = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, text ); + # id = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, id ); + if ( ! id ) id = text; + + if ( rl_href[id] && rl_title[id] ) { + ret = ret "" inline(text) ""; + line = substr( line, len + 1); + continue; + + } else if ( rl_href[id] ) { + ret = ret "" inline(text) ""; line = substr( line, len + 1); + continue; + + } else { + ret = ret "" HTML(substr(line, 1, len)); line = substr(line, len + 1); + continue; + } - if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } - if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } - else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); } - else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); } + # inline images + } else if ( match(line, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?") ) { + len = RLENGTH; text = href = title = attrib = substr( line, 1, len); + + sub("^!\\[", "", text); + sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", text); + + sub("^!" lix "\\([\n\t ]*", "", href); + sub("([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", href); + + sub("^!" lix "\\([\n\t ]*" lid, "", title); + sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", title); + sub("^[\n\t ]+", "", title); + + sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib); + sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib); + + if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } + if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } + else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); } + else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); } + + gsub(/^[\t ]+$/, "", text); gsub(/\\/, "", href); + gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title); + + ret = ret "\"""; + line = substr( line, len + 1); + continue; + + # reference style images + } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\]/ ) ) { + len = RLENGTH; text = id = substr(line, 1, len); + sub(/\n.*$/, "", text); sub(/^!\[/, "", text); sub(/\] ?\[([^\n]*)\].*$/, "", text); + sub(/\n.*$/, "", id); sub(/^!\[([^]]+)\] ?\[/, "", id); sub(/\].*$/, "", id); + # text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) ); + # id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); + if ( ! id ) id = text; + if ( rl_href[id] && rl_title[id] ) { + ret = ret "\"""; + line = substr( line, len + 1); + continue; + + } else if ( rl_href[id] ) { + ret = ret "\"""; + line = substr( line, len + 1); + continue; + + } else { + ret = ret "" HTML(substr(line, 1, len)); line = substr(line, len + 1); + continue; + } - gsub(/^[\t ]+$/, "", text); gsub(/\\/, "", href); - gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title); - - return "\""" inline( substr( line, len + 1) ); - - # reference style images - } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\]/ ) ) { - len = RLENGTH; text = id = substr(line, 1, len); - sub(/\n.*$/, "", text); sub(/^!\[/, "", text); sub(/\] ?\[([^\n]*)\].*$/, "", text); - sub(/\n.*$/, "", id); sub(/^!\[([^]]+)\] ?\[/, "", id); sub(/\].*$/, "", id); - # text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) ); - # id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); - if ( ! id ) id = text; - if ( rl_href[id] && rl_title[id] ) { - return "\""" \ - inline( substr( line, len + 1) ); - } else if ( rl_href[id] ) { - return "\""" \ - inline( substr( line, len + 1) ); + # ~~strikeout~~ (pandoc) + } else if ( match(line, /^~~([[:graph:]]|[[:graph:]]([^~]|~[^~])*[[:graph:]])~~/) ) { + len = RLENGTH; + ret = ret "" inline( substr( line, 3, len - 4 ) ) ""; line = substr( line, len + 1 ); + continue; + + # ^superscript^ (pandoc) + } else if ( match(line, /^\^([^[:space:]^]|\\[ ^])+\^/) ) { + len = RLENGTH; + ret = ret "" inline( substr( line, 2, len - 2 ) ) ""; line = substr( line, len + 1 ); + continue; + + # ~subscript~ (pandoc) + } else if ( match(line, /^~([^[:space:]~]|\\[ ~])+~/) ) { + len = RLENGTH; + ret = ret "" inline( substr( line, 2, len - 2 ) ) ""; line = substr( line, len + 1 ); + continue; + + # ignore embedded underscores (pandoc, php md) + } else if ( match(line, "^[[:alnum:]](__|_)") ) { + ret = ret HTML(substr( line, 1, RLENGTH)); line = substr(line, RLENGTH + 1); + continue; + + # __strong__$ + } else if ( match(line, "^__(([^_[:space:]]|" ieu ")|([^_[:space:]]|" ieu ")(" nu "|" ieu ")*([^_[:space:]]|" ieu "))__$") ) { + len = RLENGTH; + ret = ret "" inline( substr( line, 3, len - 4 ) ) ""; line = substr( line, len + 1 ); + continue; + + # __strong__ + } else if ( match(line, "^__(([^_[:space:]]|" ieu ")|([^_[:space:]]|" ieu ")(" nu "|" ieu ")*([^_[:space:]]|" ieu "))__[[:space:][:punct:]]") ) { + len = RLENGTH; + ret = ret "" inline( substr( line, 3, len - 5 ) ) ""; line = substr( line, len); + continue; + + # **strong** + } else if ( match(line, "^\\*\\*(([^\\*[:space:]]|" iea ")|([^\\*[:space:]]|" iea ")(" na "|" iea ")*([^\\*[:space:]]|" iea "))\\*\\*") ) { + len = RLENGTH; + ret = ret "" inline( substr( line, 3, len - 4 ) ) ""; line = substr( line, len + 1 ); + continue; + + # _em_$ + } else if ( match(line, "^_(([^_[:space:]]|" isu ")|([^_[:space:]]|" isu ")(" nu "|" isu ")*([^_[:space:]]|" isu "))_$") ) { + len = RLENGTH; + ret = ret "" inline( substr( line, 2, len - 2 ) ) ""; line = substr( line, len + 1 ); + continue; + + # _em_ + } else if ( match(line, "^_(([^_[:space:]]|" isu ")|([^_[:space:]]|" isu ")(" nu "|" isu ")*([^_[:space:]]|" isu "))_[[:space:][:punct:]]") ) { + len = RLENGTH; + ret = ret "" inline( substr( line, 2, len - 3 ) ) ""; line = substr( line, len ); + continue; + + # *em* + } else if ( match(line, "^\\*(([^\\*[:space:]]|" isa ")|([^\\*[:space:]]|" isa ")(" na "|" isa ")*([^\\*[:space:]]|" isa "))\\*") ) { + len = RLENGTH; + ret = ret "" inline( substr( line, 2, len - 2 ) ) ""; line = substr( line, len + 1 ); + continue; + + # Literal HTML entities + # } else if ( match( line, /^&([a-zA-Z]{2,32}|#[0-9]{1,7}|#[xX][0-9a-fA-F]{1,6});/) ) { + # mawk does not support repitition ranges + } else if ( match( line, /^&([a-zA-Z][a-zA-Z][a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?|#[0-9][0-9]?[0-9]?[0-9]?[0-9]?[0-9]?[0-9]?|#[xX][0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?);/) ) { + len = RLENGTH; + ret = ret substr( line, 1, len ); line = substr(line, len + 1); + continue; + + # Arrows + } else if ( line ~ /^-->( |$)/) { # ignore multidash-arrow + ret = ret "-->"; line = substr(line, 4); + continue; + } else if ( line ~ /^<-( |$)/) { + ret = ret "←"; line = substr(line, 3); + continue; + } else if ( line ~ /^->( |$)/) { + ret = ret "→"; line = substr(line, 3); + continue; + + # Escape lone HTML character + } else if ( match( line, /^[&<>"']/) ) { + ret = ret HTML(substr(line, 1, 1)); line = substr(line, 2); + continue; + + # continue walk over string } else { - return "" HTML(substr(line, 1, len)) inline( substr(line, len + 1) ); + ret = ret substr(line, 1, 1); line = substr(line, 2); + continue; } - - # ~~strikeout~~ (pandoc) - } else if ( match(line, /^~~([[:graph:]]|[[:graph:]]([^~]|~[^~])*[[:graph:]])~~/) ) { - len = RLENGTH; - return "" inline( substr( line, 3, len - 4 ) ) "" inline( substr( line, len + 1 ) ); - - # ^superscript^ (pandoc) - } else if ( match(line, /^\^([^[:space:]^]|\\[ ^])+\^/) ) { - len = RLENGTH; - return "" inline( substr( line, 2, len - 2 ) ) "" inline( substr( line, len + 1 ) ); - - # ~subscript~ (pandoc) - } else if ( match(line, /^~([^[:space:]~]|\\[ ~])+~/) ) { - len = RLENGTH; - return "" inline( substr( line, 2, len - 2 ) ) "" inline( substr( line, len + 1 ) ); - - # ignore embedded underscores (pandoc, php md) - } else if ( match(line, "^[[:alnum:]](__|_)") ) { - return HTML(substr( line, 1, RLENGTH)) inline( substr(line, RLENGTH + 1) ); - - # __strong__$ - } else if ( match(line, "^__(([^_[:space:]]|" ieu ")|([^_[:space:]]|" ieu ")(" nu "|" ieu ")*([^_[:space:]]|" ieu "))__$") ) { - len = RLENGTH; - return "" inline( substr( line, 3, len - 4 ) ) "" inline( substr( line, len + 1 ) ); - - # __strong__ - } else if ( match(line, "^__(([^_[:space:]]|" ieu ")|([^_[:space:]]|" ieu ")(" nu "|" ieu ")*([^_[:space:]]|" ieu "))__[[:space:][:punct:]]") ) { - len = RLENGTH; - return "" inline( substr( line, 3, len - 5 ) ) "" inline( substr( line, len) ); - - # **strong** - } else if ( match(line, "^\\*\\*(([^\\*[:space:]]|" iea ")|([^\\*[:space:]]|" iea ")(" na "|" iea ")*([^\\*[:space:]]|" iea "))\\*\\*") ) { - len = RLENGTH; - return "" inline( substr( line, 3, len - 4 ) ) "" inline( substr( line, len + 1 ) ); - - # _em_$ - } else if ( match(line, "^_(([^_[:space:]]|" isu ")|([^_[:space:]]|" isu ")(" nu "|" isu ")*([^_[:space:]]|" isu "))_$") ) { - len = RLENGTH; - return "" inline( substr( line, 2, len - 2 ) ) "" inline( substr( line, len + 1 ) ); - - # _em_ - } else if ( match(line, "^_(([^_[:space:]]|" isu ")|([^_[:space:]]|" isu ")(" nu "|" isu ")*([^_[:space:]]|" isu "))_[[:space:][:punct:]]") ) { - len = RLENGTH; - return "" inline( substr( line, 2, len - 3 ) ) "" inline( substr( line, len ) ); - - # *em* - } else if ( match(line, "^\\*(([^\\*[:space:]]|" isa ")|([^\\*[:space:]]|" isa ")(" na "|" isa ")*([^\\*[:space:]]|" isa "))\\*") ) { - len = RLENGTH; - return "" inline( substr( line, 2, len - 2 ) ) "" inline( substr( line, len + 1 ) ); - - # Literal HTML entities - # } else if ( match( line, /^&([a-zA-Z]{2,32}|#[0-9]{1,7}|#[xX][0-9a-fA-F]{1,6});/) ) { - # mawk does not support repitition ranges - } else if ( match( line, /^&([a-zA-Z][a-zA-Z][a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?|#[0-9][0-9]?[0-9]?[0-9]?[0-9]?[0-9]?[0-9]?|#[xX][0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?);/) ) { - len = RLENGTH; - return substr( line, 1, len ) inline(substr(line, len + 1)); - - # Arrows - } else if ( line ~ /^-->( |$)/) { # ignore multidash-arrow - return "-->" inline( substr(line, 4) ); - } else if ( line ~ /^<-( |$)/) { - return "←" inline( substr(line, 3) ); - } else if ( line ~ /^->( |$)/) { - return "→" inline( substr(line, 3) ); - - # Escape lone HTML character - } else if ( match( line, /^[&<>"']/) ) { - return HTML(substr(line, 1, 1)) inline(substr(line, 2)); - - # continue walk over string - } else { - return substr(line, 1, 1) inline( substr(line, 2) ); } + return ret; } function headline( hlvl, htxt, attrib, LOCAL, sec, n, HL) { @@ -364,14 +406,14 @@ function _nblock( block, LOCAL, sec, n ) { for ( n = blvl + 1; n in BL; n++) { delete BL[n]; } block = _block( block ); - match(hstack, /([0-9]+( [0-9]+){5})$/); split( substr(hstack, RSTART), HL); + match(hstack, /([0-9]+( [0-9]+)( [0-9]+)?( [0-9]+)?( [0-9]+)?( [0-9]+)?)$/); split( substr(hstack, RSTART), HL); sec = ""; for ( n = 1; n <= 6; n++ ) { sec = sec (HL[n]?"":""); } - sub("( +[0-9]+){6} *$", "", hstack); blvl--; + sub("( +[0-9]+)( +[0-9]+)?( +[0-9]+)?( +[0-9]+)?( +[0-9]+)?( +[0-9]+)? *$", "", hstack); blvl--; return block sec; } -function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, indent, list ) { +function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, indent, list, tmp ) { gsub( "(^\n+|\n+$)", "", block ); if ( block == "" ) { -- 2.39.2