X-Git-Url: http://git.plutz.net/?a=blobdiff_plain;f=markdown.awk;h=100242cb1ebb9b0e2a706a1025dbc552c51c190b;hb=fa6be3f1cfe2df6e0fc89f26dc1100f53ce2c061;hp=fa60543b49d43ed013325bf939e2d93ed22e7a5b;hpb=697a1bb3a7823c889abe31df5168a57dd11f37d4;p=cgilite diff --git a/markdown.awk b/markdown.awk index fa60543..100242c 100755 --- a/markdown.awk +++ b/markdown.awk @@ -48,7 +48,8 @@ # - ? Simple table (pandoc) # - ? Multiline table (pandoc) # - [x] Grid table (pandoc) -# - [x] Pipe table (php md pandoc) +# - [x] Headerless +# - [x] Pipe table (php md, pandoc) # - [x] Line blocks (pandoc) # - [x] Task lists (pandoc, custom) # - [ ] Definition lists (php md, pandoc) @@ -64,7 +65,7 @@ # - [x] ^Superscript^ ~Subscript~ (pandoc) # - [-] Bracketed spans (pandoc) # - [-] Inline attributes (pandoc) -# - [x] Image attributes (custom, pandoc inspired, inline only) +# - [x] Image attributes (custom, pandoc inspired, not for reference style) # - [x] Wiki style links [[PageName]] / [[PageName|Link Text]] # - [-] TEX-Math (pandoc) # - ? Footnotes (php md) @@ -74,7 +75,7 @@ # - ? ... three-dot ellipsis (smartypants) # - [-] en-dash (smartypants) # - [ ] Automatic em-dash / en-dash -# - [ ] Automatic -> Arrows <- +# - [x] Automatic -> Arrows <- (custom) function debug(text) { printf "\n---\n%s\n---\n", text > "/dev/stderr"; } @@ -113,7 +114,7 @@ function inline( line, LOCAL, len, code, href, guard ) { if ( line ~ /^$/ ) { # Recursion End return ""; - # omit processing of escaped characters + # omit processing of escaped characters } else if ( line ~ /^\\[]\\`\*_\{\}\(\)#\+-\.![]/) { return substr(line, 2, 1) inline( substr(line, 3) ); @@ -157,13 +158,13 @@ function inline( line, LOCAL, len, code, href, guard ) { # inline links # ,_______________________Image____________________________, - } else if ( match(line, /^\[([^]]+|!\[[^]]+\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/) ) { + } else if ( match(line, /^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/) ) { len = RLENGTH; - text = gensub(/^\[([^]]+|!\[[^]]+\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \ + text = gensub(/^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \ "\\1", 1, substr(line, 1, len) ); - href = gensub(/^\[([^]]+|!\[[^]]+\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \ + href = gensub(/^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \ "\\4", 1, substr(line, 1, len) ); - title = gensub(/^\[([^]]+|!\[[^]]+\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \ + title = gensub(/^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \ "\\6", 1, substr(line, 1, len) ); if ( title ) { return "" inline( text ) "" inline( substr( line, len + 1) ); @@ -186,12 +187,12 @@ function inline( line, LOCAL, len, code, href, guard ) { } # inline images - } else if ( match(line, /^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/) ) { + } else if ( match(line, /^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/) ) { len = RLENGTH; - text = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\1", "g", substr(line, 1, len) ); - href = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\2", "g", substr(line, 1, len) ); - title = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\4", "g", substr(line, 1, len) ); - attrib = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\6", "g", substr(line, 1, len) ); + text = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\1", "g", substr(line, 1, len) ); + href = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\2", "g", substr(line, 1, len) ); + title = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\4", "g", substr(line, 1, len) ); + attrib = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\6", "g", substr(line, 1, len) ); if ( title && attrib ) { return "\""" \ inline( substr( line, len + 1) ); @@ -207,10 +208,10 @@ function inline( line, LOCAL, len, code, href, guard ) { } # reference style images - } else if ( match(line, /^!\[([^]]+)\] ?\[([^]]*)\]/ ) ) { + } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\]/ ) ) { len = RLENGTH; - text = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) ); - id = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); + text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) ); + id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); if ( ! id ) id = text; if ( rl_href[id] && rl_title[id] ) { return "\""" \ @@ -286,6 +287,14 @@ function inline( line, LOCAL, len, code, href, guard ) { len = RLENGTH; return substr( line, 1, len ) inline(substr(line, len + 1)); + # Arrows + } else if ( line ~ /^-->( |$)/) { # ignore multidash-arrow + return "-->" inline( substr(line, 4) ); + } else if ( line ~ /^<-( |$)/) { + return "←" inline( substr(line, 3) ); + } else if ( line ~ /^->( |$)/) { + return "→" inline( substr(line, 3) ); + # Escape lone HTML character } else if ( match( line, /^[&<>"']/) ) { return HTML(substr(line, 1, 1)) inline(substr(line, 2)); @@ -385,46 +394,62 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib return "" ttext "
\n" _block(block); # Grid Tables (pandoc) - } else if ( match(block, "^\\+(-+\\+)+\n" \ - "(\\|([^\n]+\\|)+\n)+" \ - "\\+(:?=+:?\\+)+\n" \ - "((\\|([^\n]+\\|)+\n)+" \ - "\\+(-+\\+)+(\n|$))+" \ - ) ) { + # (with, and without header) + } else if ( match( block, "^\\+(-+\\+)+\n" \ + "(\\|([^\n]+\\|)+\n)+" \ + "(\\+(:?=+:?\\+)+)\n" \ + "((\\|([^\n]+\\|)+\n)+" \ + "\\+(-+\\+)+(\n|$))+", \ + tread \ + ) || \ + match( block, "^()()()" \ + "(\\+(:?-+:?\\+)+)\n" \ + "((\\|([^\n]+\\|)+\n)+" \ + "\\+(-+\\+)+(\n|$))+", \ + tread \ + ) ) { len = RLENGTH; st = RSTART; #initialize empty arrays - split("", talign); split("", tarray); split("", tread); + split("", talign); split("", tarray); # split("", tread); cols = 0; cnt=0; ttext = ""; - # table header and alignment - block = substr(block, match(block, /(\n|$)/) + 1 ); - while ( match(block, "^\\|([^\n]+\\|)+\n") ) { - cols = split( gensub( /(^\||\|$)/, "", "g", \ - gensub( /(^|[^\\])\\\|/, "\\1\\|", "g", \ - substr(block, 1, match(block, /(\n|$)/)) \ - )), tread, /\|/); - block = substr(block, match(block, /(\n|$)/) + 1 ); - for (cnt = 1; cnt < cols; cnt++) - tarray[cnt] = tarray[cnt] "\n" tread[cnt]; + # table alignment + cols = split( gensub( /(^\+|\+$)/, "", "g", tread[4] ), talign, /\+/ ); + + for (cnt = 1; cnt <= cols; cnt++) { + if (match(talign[cnt], /:(-+|=+):/)) talign[cnt]="center"; + else if (match(talign[cnt], /(-+|=+):/)) talign[cnt]="right"; + else if (match(talign[cnt], /:(-+|=+)/ )) talign[cnt]="left"; + else talign[cnt]=""; } - cols = split( \ - gensub( /(^\+|\+$)/, "", "g", \ - substr(block, 1, match(block, /(\n|$)/)) \ - ), talign, /\+/); - block = substr(block, match(block, /(\n|$)/) + 1 ); + if ( match(block, "^\\+(-+\\+)+\n" \ + "(\\|([^\n]+\\|)+\n)+" \ + "\\+(:?=+:?\\+)+\n" \ + "((\\|([^\n]+\\|)+\n)+" \ + "\\+(-+\\+)+(\n|$))+" \ + ) ) { + # table header + block = substr(block, match(block, /(\n|$)/) + 1 ); + while ( match(block, "^\\|([^\n]+\\|)+\n") ) { + cols = split( gensub( /(^\||\|$)/, "", "g", \ + gensub( /(^|[^\\])\\\|/, "\\1\\|", "g", \ + substr(block, 1, match(block, /(\n|$)/)) \ + )), tread, /\|/); + block = substr(block, match(block, /(\n|$)/) + 1 ); + for (cnt = 1; cnt <= cols; cnt++) + tarray[cnt] = tarray[cnt] "\n" tread[cnt]; + } - for (cnt = 1; cnt < cols; cnt++) { - if (match(talign[cnt], /:=+:/)) talign[cnt]="center"; - else if (match(talign[cnt], /=+:/)) talign[cnt]="right"; - else if (match(talign[cnt], /:=+/ )) talign[cnt]="left"; - else talign[cnt]=""; + ttext = "\n" + for (cnt = 1; cnt <= cols; cnt++) + ttext = ttext "" _block(tarray[cnt]) "" + ttext = ttext "\n" } - ttext = "\n" - for (cnt = 1; cnt < cols; cnt++) - ttext = ttext "" _block(tarray[cnt]) "" - ttext = ttext "\n\n" + # table body + block = substr(block, match(block, /(\n|$)/) + 1 ); + ttext = ttext "\n" while ( match(block, /^((\|([^\n]+\|)+\n)+\+(-+\+)+(\n|$))+/ ) ){ split("", tarray); @@ -434,13 +459,13 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib substr(block, 1, match(block, /(\n|$)/)) \ )), tread, /\|/); block = substr(block, match(block, /(\n|$)/) + 1 ); - for (cnt = 1; cnt < cols; cnt++) + for (cnt = 1; cnt <= cols; cnt++) tarray[cnt] = tarray[cnt] "\n" tread[cnt]; } block = substr(block, match(block, /(\n|$)/) + 1 ); ttext = ttext "" - for (cnt = 1; cnt < cols; cnt++) + for (cnt = 1; cnt <= cols; cnt++) ttext = ttext "" _block(tarray[cnt]) "" ttext = ttext "\n" } @@ -557,12 +582,12 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib _block( substr( block, len + 1) ); # block images (wrapped in
) - } else if ( match(block, /^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n|$)/) ) { + } else if ( match(block, /^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n|$)/) ) { len = RLENGTH; - text = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\1", "g", block); - href = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\2", "g", block); - title = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\4", "g", block); - attrib = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\6", "g", block); + text = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\1", "g", block); + href = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\2", "g", block); + title = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\4", "g", block); + attrib = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\6", "g", block); if ( title && attrib ) { return "
" \ "\""" \ @@ -588,10 +613,10 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib } # reference style images (block) - } else if ( match(line, /^!\[([^]]+)\] ?\[([^]]*)\](\n|$)/ ) ) { + } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\](\n|$)/ ) ) { len = RLENGTH; - text = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\](\n.*)?$/, "\\1", 1, block); - id = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\](\n.*)?$/, "\\2", 1, block); + text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\1", 1, block); + id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\2", 1, block); if ( ! id ) id = text; if ( rl_href[id] && rl_title[id] ) { return "
" \ @@ -608,6 +633,12 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib return "

" HTML(substr(block, 1, len)) "

\n" _block( substr(block, len + 1) ); } + # Macros (standalone <> calls handled as block, so they are not wrapped in paragraph) + } else if ( AllowMacros && match( block, /^<<(([^>]|>[^>])+)>>(\n|$)/) ) { + len = RLENGTH; + text = gensub(/^<<(([^>]|>[^>])+)>>(\n.*)?$/, "\\1", 1, block); + return macro(text) _block(substr(block, len + 1)); + # Split paragraphs } else if ( match( block, /(^|\n)[[:space:]]*(\n|$)/) ) { len = RLENGTH; st = RSTART;