X-Git-Url: https://git.plutz.net/?p=cgilite;a=blobdiff_plain;f=markdown.awk;h=e63541bebfa78ffc70053ca23a1d94ff6b586c66;hp=65e0aefe2bd3ed4da3f4d3a543cc12511c31b4f9;hb=76805496f22af93204e2c6225f2e4f32902bcf2b;hpb=882f37daf3eac1ee8589eb623ec3bc4cfc3c7e9f diff --git a/markdown.awk b/markdown.awk index 65e0aef..e63541b 100755 --- a/markdown.awk +++ b/markdown.awk @@ -4,10 +4,6 @@ # EXPERIMENTAL Markdown processor with minimal dependencies. # Meant to support all features of John Grubers basic Markdown # + a number of common extensions, mostly inspired by Pandoc Markdown -# -# ToDo: -# - HTML processing / escaping (according to environment flag) -# - em-dashes and arrows # Supported Features / TODO: # ========================== @@ -45,7 +41,7 @@ # - ? Heading identifiers (php md, pandoc) # - [x] Automatic heading identifiers (custom) # - [x] Fenced code blocks (php md, pandoc) -# - [-] Fenced code attributes +# - [x] Fenced code attributes # - [ ] Tables # - ? Simple table (pandoc) # - ? Multiline table (pandoc) @@ -56,7 +52,8 @@ # - [ ] Definition lists (php md, pandoc) # - [-] Numbered example lists (pandoc) # - [-] Metadata blocks (pandoc) -# - [-] Fenced Divs (pandoc) +# - [x] Metadata blocks (custom) +# - [x] Fenced Divs (pandoc) # # Extensions - Inline elements: # ---------------------------- @@ -126,12 +123,18 @@ function inline( line, LOCAL, len, code, href, guard ) { href = HTML( substr( line, 2, len - 2) ); return "" href "" inline( substr( line, len + 1) ); + # quick link email + } else if ( match( line, /^<[a-zA-Z0-9.!#$%&'\''*+\/=?^_`{|}~-]+@[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>/ ) ) { + len = RLENGTH; + href = HTML( substr( line, 2, len - 2) ); + return "" href "" inline( substr( line, len + 1) ); + # inline links - } else if ( match(line, /^\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/) ) { + } else if ( match(line, /^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/) ) { len = RLENGTH; - text = gensub(/^\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\1", "g", line); - href = gensub(/^\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\2", "g", line); - title = gensub(/^\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\4", "g", line); + text = gensub(/^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, "\\1", 1, substr(line, 1, len) ); + href = gensub(/^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, "\\2", 1, substr(line, 1, len) ); + title = gensub(/^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, "\\4", 1, substr(line, 1, len) ); if ( title ) { return "" inline( text ) "" inline( substr( line, len + 1) ); } else { @@ -141,23 +144,23 @@ function inline( line, LOCAL, len, code, href, guard ) { # reference style links } else if ( match(line, /^\[([^]]+)\] ?\[([^]]*)\]/ ) ) { len = RLENGTH; - text = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, line); - id = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, line); + text = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) ); + id = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); if ( ! id ) id = text; if ( rl_href[id] && rl_title[id] ) { - return "" inline(text) "" inline( substr( line, len + 1) ); + return "" inline(text) "" inline( substr( line, len + 1) ); } else if ( rl_href[id] ) { - return "" inline(text) "" inline( substr( line, len + 1) ); + return "" inline(text) "" inline( substr( line, len + 1) ); } else { - return "" substr(line, 1, len) inline( substr(line, len + 1) ); + return "" HTML(substr(line, 1, len)) inline( substr(line, len + 1) ); } # inline images } else if ( match(line, /^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/) ) { len = RLENGTH; - text = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\1", "g", line); - href = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\2", "g", line); - title = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\4", "g", line); + text = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\1", "g", substr(line, 1, len) ); + href = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\2", "g", substr(line, 1, len) ); + title = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\4", "g", substr(line, 1, len) ); if ( title ) { return "\""" inline( substr( line, len + 1) ); } else { @@ -167,15 +170,15 @@ function inline( line, LOCAL, len, code, href, guard ) { # reference style images } else if ( match(line, /^!\[([^]]+)\] ?\[([^]]*)\]/ ) ) { len = RLENGTH; - text = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, line); - id = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, line); + text = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) ); + id = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); if ( ! id ) id = text; if ( rl_href[id] && rl_title[id] ) { - return "\""" inline( substr( line, len + 1) ); + return "\""" inline( substr( line, len + 1) ); } else if ( rl_href[id] ) { - return "\""" inline( substr( line, len + 1) ); + return "\""" inline( substr( line, len + 1) ); } else { - return "" substr(line, 1, len) inline( substr(line, len + 1) ); + return "" HTML(substr(line, 1, len)) inline( substr(line, len + 1) ); } # ~~strikeout~~ (pandoc) @@ -195,7 +198,7 @@ function inline( line, LOCAL, len, code, href, guard ) { # ignore embedded underscores (pandoc, php md) } else if ( match(line, "^[[:alnum:]](__|_)") ) { - return substr( line, 1, RLENGTH) inline( substr(line, RLENGTH + 1) ); + return HTML(substr( line, 1, RLENGTH)) inline( substr(line, RLENGTH + 1) ); # __strong__$ } else if ( match(line, "^__(([^_[:space:]]|" ieu ")|([^_[:space:]]|" ieu ")(" nu "|" ieu ")*([^_[:space:]]|" ieu "))__$") ) { @@ -252,7 +255,7 @@ function inline( line, LOCAL, len, code, href, guard ) { } } -function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent ) { +function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib ) { gsub( /^\n+|\n+$/, "", block ); if ( block == "" ) { @@ -279,6 +282,12 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent ) { } else if ( AllowHTML && match( block, /^ ? ? ?(<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)([[:space:]]*\n)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) { len = RLENGTH; st = RSTART; return substr(block, st, len) _block(substr(block, st + len)); + + # Metadata (custom, block starting with %something) + # Metadata is ignored but can be interpreted externally + } else if ( match(block, /^%[a-zA-Z]+([[:space:]][^\n]*)?(\n|$)(%[a-zA-Z]+([[:space:]][^\n]*)?(\n|$)|%([[:space:]][^\n]*)?(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) { + len = RLENGTH; st = RSTART; + return _block( substr( block, len + 1) ); # Blockquote (leading >) } else if ( match( block, /^> /) ) { @@ -306,13 +315,34 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent ) { return "
" HTML( code ) "
\n" \ _block( substr( block, len + 1 ) ); + # Fenced Divs (pandoc, custom) + } else if ( match( block, /^(:::+)/ ) ) { + guard = substr( block, 1, RLENGTH ); + code = gensub(/^[^\n]+\n/, "", 1, block); + attrib = gensub(/^:::+[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\1", 1, block); + gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); + gsub(/(^ | $)/, "", attrib); + if ( match(code, "(^|\n)" guard "+(\n|$)" ) ) { + len = RLENGTH; st = RSTART; + return "
" _block( substr(code, 1, st - 1) ) "
\n" \ + _block( substr( code, st + len ) ); + } else { + match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match( block, /$/ ); + len = RLENGTH; st = RSTART; + return "

" inline( substr(block, 1, st - 1) ) "

\n" \ + _block( substr(block, st + len) ); + } + # Fenced Code Block (pandoc) } else if ( match( block, /^(~~~+|```+)/ ) ) { guard = substr( block, 1, RLENGTH ); code = gensub(/^[^\n]+\n/, "", 1, block); + attrib = gensub(/^:::+[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\1", 1, block); + gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); + gsub(/(^ | $)/, "", attrib); if ( match(code, "(^|\n)" guard "+(\n|$)" ) ) { len = RLENGTH; st = RSTART; - return "
" HTML( substr(code, 1, st - 1) ) "
\n" \ + return "
" HTML( substr(code, 1, st - 1) ) "
\n" \ _block( substr( code, st + len ) ); } else { match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match( block, /$/ ); @@ -369,6 +399,12 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent ) { return "" inline( htxt ) "\n\n" \ _block( substr( block, len + 1) ); + # Split paragraphs + } else if ( match( block, /(^|\n)[[:space:]]*(\n|$)/) ) { + len = RLENGTH; st = RSTART; + return _block( substr(block, 1, st - 1) ) "\n" \ + _block( substr(block, st + len) ); + # Horizontal rule } else if ( match( block, /(^|\n) ? ? ?((\* *){3,}|(- *){3,}|(_ *){3,})($|\n)/) ) { len = RLENGTH; st = RSTART; @@ -376,10 +412,7 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent ) { # Plain paragraph } else { - match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match( block, /$/ ); - len = RLENGTH; st = RSTART; - return "

" inline( substr(block, 1, st - 1) ) "

\n" \ - _block( substr(block, st + len) ); + return "

" inline(block) "

\n"; } }