X-Git-Url: https://git.plutz.net/?p=cgilite;a=blobdiff_plain;f=markdown.awk;h=361e6005c3fc42fa5e1f81859d69ec59d0c0a138;hp=3b5a9bb063ce8e18c7a7bc41abfa8569f08bad15;hb=HEAD;hpb=4ee910d547ff383c57c513436a6ed2576f66bf0d diff --git a/markdown.awk b/markdown.awk index 3b5a9bb..34879d2 100755 --- a/markdown.awk +++ b/markdown.awk @@ -122,13 +122,6 @@ function URL ( text, sharp ) { } function inline( line, LOCAL, len, text, code, href, guard ) { - nu = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\_]|_[[:alnum:]])*" # not underline (except when escaped) - na = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\\\*])*" # not asterisk (except when escaped) - ieu = "_([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])_" # inner (underline) - isu = "__([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])__" # inner (underline) - iea = "\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*" # inner (asterisk) - isa = "\\*\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*\\*" # inner (asterisk) - if ( line ~ /^$/ ) { # Recursion End return ""; @@ -154,41 +147,53 @@ function inline( line, LOCAL, len, text, code, href, guard ) { return "" code "" inline( substr( line, len + 1 ) ) } + # Macros + } else if ( match( line, /^<<([^>]|>[^>])+>>/ ) ) { + len = RLENGTH; + return "" HTML( substr( line, 3, len - 4 ) ) "" inline(substr(line, len + 1)); + # Wiki style links } else if ( match( line, /^\[\[([^]|]+)(\|[^]]+)?\]\]/) ) { len = RLENGTH; href = gensub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\1", 1, substr(line, 1, len) ); text = gensub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\3", 1, substr(line, 1, len) ); if ( ! text ) text = href; - return "" HTML(text) "" inline( substr( line, len + 1) ); + return "" HTML(text) "" inline( substr( line, len + 1) ); # quick links ("automatic links" in md doc) } else if ( match( line, /^<[a-zA-Z]+:\/\/([-\.[:alnum:]]+)(:[0-9]*)?(\/[^>]*)?>/ ) ) { len = RLENGTH; - href = URL( substr( line, 2, len - 2) ); + href = HTML( substr( line, 2, len - 2) ); return "" href "" inline( substr( line, len + 1) ); # quick link email } else if ( match( line, /^<[a-zA-Z0-9.!#$%&'\''*+\/=?^_`{|}~-]+@[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>/ ) ) { len = RLENGTH; - href = URL( substr( line, 2, len - 2) ); + href = HTML( substr( line, 2, len - 2) ); return "" href "" inline( substr( line, len + 1) ); + # Verbatim inline HTML + } else if ( AllowHTML && match( line, /^(|<\?([^\?]|\?[^>])*\?>|]*>|])*\]\]>|<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)/) ) { + len = RLENGTH; + return substr( line, 1, len) inline(substr(line, len + 1)); + # inline links - # ,_______________________Image____________________________, - } else if ( match(line, /^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/) ) { + } else if ( match(line, "^" lii "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)") ) { len = RLENGTH; - text = gensub(/^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \ - "\\1", 1, substr(line, 1, len) ); - href = gensub(/^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \ - "\\4", 1, substr(line, 1, len) ); - title = gensub(/^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \ - "\\6", 1, substr(line, 1, len) ); - if ( title ) { - return "" inline( text ) "" inline( substr( line, len + 1) ); - } else { - return "" inline( text ) "" inline( substr( line, len + 1) ); - } + text = href = title = substr( line, 1, len); + sub("^\\[", "", text); sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)$", "", text); + sub("^" lii "\\([\n\t ]*", "", href); sub("([\n\t ]+" lit ")?[\n\t ]*\\)$", "", href); + sub("^" lii "\\([\n\t ]*" lid, "", title); sub("[\n\t ]*\\)$", "", title); sub("^[\n\t ]+", "", title); + + if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } + if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } + else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); } + else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); } + + gsub(/\\/, "", href); gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title); + + return "" \ + inline( text ) "" inline( substr( line, len + 1) ); # reference style links } else if ( match(line, /^\[([^]]+)\] ?\[([^]]*)\]/ ) ) { @@ -197,33 +202,41 @@ function inline( line, LOCAL, len, text, code, href, guard ) { id = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); if ( ! id ) id = text; if ( rl_href[id] && rl_title[id] ) { - return "" inline(text) "" inline( substr( line, len + 1) ); + return "" inline(text) "" inline( substr( line, len + 1) ); } else if ( rl_href[id] ) { - return "" inline(text) "" inline( substr( line, len + 1) ); + return "" inline(text) "" inline( substr( line, len + 1) ); } else { return "" HTML(substr(line, 1, len)) inline( substr(line, len + 1) ); } # inline images - } else if ( match(line, /^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/) ) { - len = RLENGTH; - text = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\1", "g", substr(line, 1, len) ); - href = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\2", "g", substr(line, 1, len) ); - title = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\4", "g", substr(line, 1, len) ); - attrib = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\6", "g", substr(line, 1, len) ); - if ( title && attrib ) { - return "\""" \ - inline( substr( line, len + 1) ); - } else if ( title ) { - return "\""" \ - inline( substr( line, len + 1) ); - } else if ( attrib ) { - return "\""" \ - inline( substr( line, len + 1) ); - } else { - return "\""" \ - inline( substr( line, len + 1) ); - } + } else if ( match(line, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?") ) { + len = RLENGTH; text = href = title = attrib = substr( line, 1, len); + + sub("^!\\[", "", text); + sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", text); + + sub("^!" lix "\\([\n\t ]*", "", href); + sub("([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", href); + + sub("^!" lix "\\([\n\t ]*" lid, "", title); + sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", title); + sub("^[\n\t ]+", "", title); + + sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib); + sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib); + + if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } + if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } + else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); } + else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); } + + gsub(/^[\t ]+$/, "", text); gsub(/\\/, "", href); + gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title); + + return "\""" inline( substr( line, len + 1) ); # reference style images } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\]/ ) ) { @@ -232,10 +245,10 @@ function inline( line, LOCAL, len, text, code, href, guard ) { id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); if ( ! id ) id = text; if ( rl_href[id] && rl_title[id] ) { - return "\""" \ + return "\""" \ inline( substr( line, len + 1) ); } else if ( rl_href[id] ) { - return "\""" \ + return "\""" \ inline( substr( line, len + 1) ); } else { return "" HTML(substr(line, 1, len)) inline( substr(line, len + 1) ); @@ -290,16 +303,6 @@ function inline( line, LOCAL, len, text, code, href, guard ) { len = RLENGTH; return "" inline( substr( line, 2, len - 2 ) ) "" inline( substr( line, len + 1 ) ); - # Macros - } else if ( AllowMacros && match( line, /^<<([^>]|>[^>])+>>/) ) { - len = RLENGTH; - return macro( substr( line, 3, len - 4 ) ) inline(substr(line, len + 1)); - - # Verbatim inline HTML - } else if ( AllowHTML && match( line, /^(|<\?([^\?]|\?[^>])*\?>|]*>|])*\]\]>|<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)/) ) { - len = RLENGTH; - return substr( line, 1, len) inline(substr(line, len + 1)); - # Literal HTML entities } else if ( match( line, /^&([a-zA-Z]{2,32}|#[0-9]{1,7}|#[xX][0-9a-fA-F]{1,6});/) ) { len = RLENGTH; @@ -388,7 +391,7 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, # Metadata (custom, block starting with %something) # Metadata is ignored but can be interpreted externally - } else if ( match(block, /^%[a-zA-Z]+([[:space:]][^\n]*)?(\n|$)(%[a-zA-Z]+([[:space:]][^\n]*)?(\n|$)|%([[:space:]][^\n]*)?(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) { + } else if ( match(block, /^%[a-zA-Z-]+([[:space:]][^\n]*)?(\n|$)(%[a-zA-Z-]+([[:space:]][^\n]*)?(\n|$)|%([[:space:]][^\n]*)?(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) { len = RLENGTH; st = RSTART; return _block( substr( block, len + 1) ); @@ -402,7 +405,7 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, # Pipe Tables (pandoc / php md / gfm ) } else if ( match(block, "^((\\|)?([^\n]+\\|)+[^\n]+(\\|)?)\n" \ - "((\\|)?:?(-+:?[\\|+])+:?-+:?(\\|)?)\n" \ + "((\\|)?(:?-+:?[\\|+])+:?-+:?(\\|)?)\n" \ "((\\|)?([^\n]+\\|)+[^\n]+(\\|)?(\n|$))+" ) ) { len = RLENGTH; st = RSTART; #initialize empty arrays @@ -636,35 +639,36 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, return headline( n, text, 0 ) _block( substr( block, len + 1) ); # block images (wrapped in
) - } else if ( match(block, /^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n|$)/) ) { - len = RLENGTH; - text = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\1", "g", block); - href = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\2", "g", block); - title = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\4", "g", block); - attrib = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\6", "g", block); - if ( title && attrib ) { - return "
" \ - "\""" \ - "
" inline(title) "
" \ - "
\n\n" \ - _block( substr( block, len + 1) ); - } else if ( title ) { - return "
" \ - "\""" \ - "
" inline(title) "
" \ - "
\n\n" \ - _block( substr( block, len + 1) ); - } else if ( attrib ) { - return "
" \ - "\""" \ - "
\n\n" \ - _block( substr( block, len + 1) ); - } else { - return "
" \ - "\""" \ - "
\n\n" \ - _block( substr( block, len + 1) ); - } + } else if ( match(block, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n|$)") ) { + len = RLENGTH; text = href = title = attrib = substr( block, 1, len); + + sub("^!\\[", "", text); + sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", text); + + sub("^!" lix "\\([\n\t ]*", "", href); + sub("([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", href); + + sub("^!" lix "\\([\n\t ]*" lid, "", title); + sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", title); + sub("^[\n\t ]+", "", title); + + sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib); + sub("(\n.*)?$", "", attrib); + sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib); + + if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } + if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } + else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); } + else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); } + + gsub(/^[\t ]+$/, "", text); gsub(/\\/, "", href); + + return "
" \ + "\""" \ + (title?"
" inline(title) "
":"") \ + "
\n\n" \ + _block( substr( block, len + 1) ); # reference style images (block) } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\](\n|$)/ ) ) { @@ -673,14 +677,14 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\2", 1, block); if ( ! id ) id = text; if ( rl_href[id] && rl_title[id] ) { - return "
" \ - "\""" \ + return "
" \ + "\""" \ "
" inline(rl_title[id]) "
" \ "
\n\n" \ _block( substr( block, len + 1) ); } else if ( rl_href[id] ) { - return "
" \ - "\""" \ + return "
" \ + "\""" \ "
\n\n" \ _block( substr( block, len + 1) ); } else { @@ -688,10 +692,10 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, } # Macros (standalone <> calls handled as block, so they are not wrapped in paragraph) - } else if ( AllowMacros && match( block, /^<<(([^>]|>[^>])+)>>(\n|$)/) ) { + } else if ( match( block, /^<<(([^>]|>[^>])+)>>(\n|$)/ ) ) { len = RLENGTH; text = gensub(/^<<(([^>]|>[^>])+)>>(\n.*)?$/, "\\1", 1, block); - return macro(text) _block(substr(block, len + 1) ); + return "" HTML(text) "" _block(substr(block, len + 1) ); # Definition list } else if (match( block, "^(([ \t]*\n)*[^:\n \t][^\n]+\n" \ @@ -705,21 +709,23 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, return "\n
\n" _dlist( list ) "
\n" _block( block ); # Unordered list types - } else if ( text = _startlist( block, "ul", "-", "([+*]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + } else if ( text = _startlist( block, "ul", "-", "([+*•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + return text; + } else if ( text = _startlist( block, "ul", "\\+", "([-*•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { return text; - } else if ( text = _startlist( block, "ul", "\\+", "([-*]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + } else if ( text = _startlist( block, "ul", "\\*", "([-+•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { return text; - } else if ( text = _startlist( block, "ul", "\\*", "([-+]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + } else if ( text = _startlist( block, "ul", "•", "([-+*]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { return text; # Ordered list types - } else if ( text = _startlist( block, "ol", "[0-9]+\\.", "([-+*]|#\\.|[0-9]+\\)|#\\))") ) { + } else if ( text = _startlist( block, "ol", "[0-9]+\\.", "([-+*•]|#\\.|[0-9]+\\)|#\\))") ) { return text; - } else if ( text = _startlist( block, "ol", "[0-9]+\\)", "([-+*]|[0-9]+\\.|#\\.|#\\))") ) { + } else if ( text = _startlist( block, "ol", "[0-9]+\\)", "([-+*•]|[0-9]+\\.|#\\.|#\\))") ) { return text; - } else if ( text = _startlist( block, "ol", "#\\.", "([-+*]|[0-9]+\\.|[0-9]+\\)|#\\))") ) { + } else if ( text = _startlist( block, "ol", "#\\.", "([-+*•]|[0-9]+\\.|[0-9]+\\)|#\\))") ) { return text; - } else if ( text = _startlist( block, "ol", "#\\)", "([-+*]|[0-9]+\\.|#\\.|[0-9]+\\))") ) { + } else if ( text = _startlist( block, "ol", "#\\)", "([-+*•]|[0-9]+\\.|#\\.|[0-9]+\\))") ) { return text; # Split paragraphs @@ -820,6 +826,20 @@ BEGIN { HL[1] = 0; HL[2] = 0; HL[3] = 0; HL[4] = 0; HL[5] = 0; HL[6] = 0; # hls = "0 0 0 0 0 0"; + # Universal Patterns + nu = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\_]|_[[:alnum:]])*" # not underline (except when escaped) + na = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\\\*])*" # not asterisk (except when escaped) + ieu = "_([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])_" # inner (underline) + isu = "__([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])__" # inner (underline) + iea = "\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*" # inner (asterisk) + isa = "\\*\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*\\*" # inner (asterisk) + + lix="\\[(\\\\[^\n]|[^]\n\\\\[])*\\]" # link text + lid="(<(\\\\[^\n]|[^\n<>\\\\])*>|(\\\\.|[^()\"'\\\\])+|([^<\n\t ()\\\\]|\\\\[^\n])(\\\\[\n]|[^\n\t \\(\\)\\\\])*)" # link dest + lit="(\"(\\\\.|[^\"\\\\])*\"|'(\\\\.|[^'\\\\])*'|\\((\\\\.|[^\\(\\)\\\\])*\\))" # link text + # link text with image def + lii="\\[(\\\\[^\n]|[^]\n\\\\[])*(!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\))?(\\\\[^\n]|[^]\n\\\\[])*\\]" + # Buffering of full file ist necessary, e.g. to find reference links while (getline) { file = file $0 "\n"; } # Clean up MS-DOS line breaks