X-Git-Url: https://git.plutz.net/?p=cgilite;a=blobdiff_plain;f=markdown.awk;h=6271996440dab3b37132f8eccdecbf6094a4db99;hp=d28c7cfc760a2ed6c9fde6e040a2bf981c008b38;hb=9bb22561275a9ce0d5403a12fe7e8aeeeb2792e9;hpb=b2b268b458208ba7746052e05f1f1f5ced081023 diff --git a/markdown.awk b/markdown.awk index d28c7cf..6271996 100755 --- a/markdown.awk +++ b/markdown.awk @@ -4,10 +4,6 @@ # EXPERIMENTAL Markdown processor with minimal dependencies. # Meant to support all features of John Grubers basic Markdown # + a number of common extensions, mostly inspired by Pandoc Markdown -# -# ToDo: -# - HTML processing / escaping (according to environment flag) -# - em-dashes and arrows # Supported Features / TODO: # ========================== @@ -50,12 +46,13 @@ # - ? Simple table (pandoc) # - ? Multiline table (pandoc) # - ? Grid table (pandoc) -# - ? Pipe table (php md pandoc) +# - [x] Pipe table (php md pandoc) # - [x] Line blocks (pandoc) -# - [x] Task lists (pandoc) +# - [x] Task lists (pandoc, custom) # - [ ] Definition lists (php md, pandoc) # - [-] Numbered example lists (pandoc) # - [-] Metadata blocks (pandoc) +# - [x] Metadata blocks (custom) # - [x] Fenced Divs (pandoc) # # Extensions - Inline elements: @@ -126,12 +123,18 @@ function inline( line, LOCAL, len, code, href, guard ) { href = HTML( substr( line, 2, len - 2) ); return "" href "" inline( substr( line, len + 1) ); + # quick link email + } else if ( match( line, /^<[a-zA-Z0-9.!#$%&'\''*+\/=?^_`{|}~-]+@[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>/ ) ) { + len = RLENGTH; + href = HTML( substr( line, 2, len - 2) ); + return "" href "" inline( substr( line, len + 1) ); + # inline links - } else if ( match(line, /^\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/) ) { + } else if ( match(line, /^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/) ) { len = RLENGTH; - text = gensub(/^\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\1", "g", line); - href = gensub(/^\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\2", "g", line); - title = gensub(/^\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\4", "g", line); + text = gensub(/^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, "\\1", 1, substr(line, 1, len) ); + href = gensub(/^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, "\\2", 1, substr(line, 1, len) ); + title = gensub(/^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, "\\4", 1, substr(line, 1, len) ); if ( title ) { return "" inline( text ) "" inline( substr( line, len + 1) ); } else { @@ -141,8 +144,8 @@ function inline( line, LOCAL, len, code, href, guard ) { # reference style links } else if ( match(line, /^\[([^]]+)\] ?\[([^]]*)\]/ ) ) { len = RLENGTH; - text = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, line); - id = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, line); + text = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) ); + id = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); if ( ! id ) id = text; if ( rl_href[id] && rl_title[id] ) { return "" inline(text) "" inline( substr( line, len + 1) ); @@ -155,9 +158,9 @@ function inline( line, LOCAL, len, code, href, guard ) { # inline images } else if ( match(line, /^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/) ) { len = RLENGTH; - text = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\1", "g", line); - href = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\2", "g", line); - title = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\4", "g", line); + text = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\1", "g", substr(line, 1, len) ); + href = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\2", "g", substr(line, 1, len) ); + title = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\4", "g", substr(line, 1, len) ); if ( title ) { return "\""" inline( substr( line, len + 1) ); } else { @@ -167,8 +170,8 @@ function inline( line, LOCAL, len, code, href, guard ) { # reference style images } else if ( match(line, /^!\[([^]]+)\] ?\[([^]]*)\]/ ) ) { len = RLENGTH; - text = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, line); - id = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, line); + text = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) ); + id = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); if ( ! id ) id = text; if ( rl_href[id] && rl_title[id] ) { return "\""" inline( substr( line, len + 1) ); @@ -279,6 +282,12 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib } else if ( AllowHTML && match( block, /^ ? ? ?(<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)([[:space:]]*\n)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) { len = RLENGTH; st = RSTART; return substr(block, st, len) _block(substr(block, st + len)); + + # Metadata (custom, block starting with %something) + # Metadata is ignored but can be interpreted externally + } else if ( match(block, /^%[a-zA-Z]+([[:space:]][^\n]*)?(\n|$)(%[a-zA-Z]+([[:space:]][^\n]*)?(\n|$)|%([[:space:]][^\n]*)?(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) { + len = RLENGTH; st = RSTART; + return _block( substr( block, len + 1) ); # Blockquote (leading >) } else if ( match( block, /^> /) ) { @@ -287,6 +296,53 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib return "
\n" _block( gensub( /(^|\n)> /, "\n", "g", substr(block, 1, st - 1) ) ) "
\n\n" \ _block( substr(block, st + len) ); + # Pipe Tables (pandoc / php md / gfm ) + } else if ( match(block, "^((\\|)?([^\n]+\\|)+[^\n]+(\\|)?)\n" \ + "((\\|)?:?(-+:?[\\|+])+:?-+:?(\\|)?)\n" \ + "((\\|)?([^\n]+\\|)+[^\n]+(\\|)?(\n|$))+" ) ) { + len = RLENGTH; st = RSTART; + #initialize empty arrays + split("", talign); split("", tarray); + cols = 0; ttext = ""; cnt=0; + + # table header and alignment + split( gensub( /(^\||\|$)/, "", "g", \ + gensub( /(^|[^\\])\\\|/, "\\1\\|", "g", \ + substr(block, 1, match(block, /(\n|$)/)) \ + )), tarray, /\|/); + block = substr(block, match(block, /(\n|$)/) + 1 ); + cols = split( \ + gensub( /(^\||\|$)/, "", "g", \ + substr(block, 1, match(block, /(\n|$)/)) \ + ), talign, /[+\|]/); + block = substr(block, match(block, /(\n|$)/) + 1 ); + + for( cnt = 1; cnt < cols; cnt++ ) { + if (match(talign[cnt], /:-+:/)) talign[cnt]="center"; + else if (match(talign[cnt], /-+:/)) talign[cnt]="right"; + else if (match(talign[cnt], /:-+/)) talign[cnt]="left"; + else talign[cnt]=""; + } + + ttext = "\n" + for (cnt = 1; cnt < cols; cnt++) + ttext = ttext "" inline(tarray[cnt]) "" + ttext = ttext "\n\n" + + while ( match(block, "^((\\|)?([^\n]+\\|)+[^\n]+(\\|)?(\n|$))+" ) ){ + split( gensub( /(^\||\|$)/, "", "g", \ + gensub( /(^|[^\\])\\\|/, "\\1\\|", "g", \ + substr(block, 1, match(block, /(\n|$)/)) \ + )), tarray, /\|/); + block = substr(block, match(block, /(\n|$)/) + 1 ); + + ttext = ttext "" + for (cnt = 1; cnt < cols; cnt++) + ttext = ttext "" inline(tarray[cnt]) "" + ttext = ttext "\n" + } + return "" ttext "
\n" _block( substr(block, len + 1) ); + # Line Blocks (pandoc) } else if ( match(block, /^\| [^\n]*(\n|$)(\| [^\n]*(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) { len = RLENGTH; st = RSTART; @@ -328,7 +384,7 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib } else if ( match( block, /^(~~~+|```+)/ ) ) { guard = substr( block, 1, RLENGTH ); code = gensub(/^[^\n]+\n/, "", 1, block); - attrib = gensub(/^:::+[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\1", 1, block); + attrib = gensub(/^(~~~+|```+)[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\2", 1, block); gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); if ( match(code, "(^|\n)" guard "+(\n|$)" ) ) { @@ -431,12 +487,28 @@ function _list( block, last, LOCAL, p) { } sub( /\n$/, "", p ); - # Task List (pandoc) - if ( p ~ /^\[ \].*/ ) { p = "" substr(p, 4); } - else if ( p ~ /^\[[xX]\].*/ ) { p = "" substr(p, 4); } - else if ( p ~ /^

\[ \].*/ ) { p = "

" substr(p, 7); } - else if ( p ~ /^

\[[xX]\].*/ ) { p = "

" substr(p, 7); } - return "

  • " p "
  • \n" _list( block, last ); + # Task List (pandoc, custom) + if ( p ~ /^\[ \].*/ ) { return "
  • " \ + substr(p, 4) "
  • \n" _list( block, last ); + } else if ( p ~ /^\[-\].*/ ) { return "
  • " \ + substr(p, 4) "
  • \n" _list( block, last ); + } else if ( p ~ /^\[\?\].*/ ) { return "
  • " \ + substr(p, 4) "
  • \n" _list( block, last ); + } else if ( p ~ /^\[\/\].*/ ) { return "
  • " \ + substr(p, 4) "
  • \n" _list( block, last ); + } else if ( p ~ /^\[[xX]\].*/ ) { return "
  • " \ + substr(p, 4) "
  • \n" _list( block, last ); + } else if ( p ~ /^

    \[ \].*/ ) { return "

  • " \ + substr(p, 7) "

  • \n" _list( block, last ); + } else if ( p ~ /^

    \[-\].*/ ) { return "

  • " \ + substr(p, 7) "

  • \n" _list( block, last ); + } else if ( p ~ /^

    \[\?\].*/ ) { return "

  • " \ + substr(p, 7) "

  • \n" _list( block, last ); + } else if ( p ~ /^

    \[\/\].*/ ) { return "

  • " \ + substr(p, 7) "

  • \n" _list( block, last ); + } else if ( p ~ /^

    \[[xX]\].*/ ) { return "

  • " \ + substr(p, 7) "

  • \n" _list( block, last ); + } else { return "
  • " p "
  • \n" _list( block, last ); } } BEGIN {