X-Git-Url: http://git.plutz.net/?a=blobdiff_plain;f=markdown.awk;h=3fa248d25f2877fdc0e6f7eab9a5642635407d75;hb=8ce6dce8725f84096aebcf1a4063eaeee754b92a;hp=b5072666de89f34c4b425211b70dd7471b0d9f21;hpb=01dadd76ff332d89503364a794423564f01805ea;p=cgilite diff --git a/markdown.awk b/markdown.awk index b507266..3fa248d 100755 --- a/markdown.awk +++ b/markdown.awk @@ -48,7 +48,8 @@ # - ? Simple table (pandoc) # - ? Multiline table (pandoc) # - [x] Grid table (pandoc) -# - [x] Pipe table (php md pandoc) +# - [x] Headerless +# - [x] Pipe table (php md, pandoc) # - [x] Line blocks (pandoc) # - [x] Task lists (pandoc, custom) # - [ ] Definition lists (php md, pandoc) @@ -64,7 +65,7 @@ # - [x] ^Superscript^ ~Subscript~ (pandoc) # - [-] Bracketed spans (pandoc) # - [-] Inline attributes (pandoc) -# - [x] Image attributes (custom, pandoc inspired, inline only) +# - [x] Image attributes (custom, pandoc inspired, not for reference style) # - [x] Wiki style links [[PageName]] / [[PageName|Link Text]] # - [-] TEX-Math (pandoc) # - ? Footnotes (php md) @@ -74,7 +75,7 @@ # - ? ... three-dot ellipsis (smartypants) # - [-] en-dash (smartypants) # - [ ] Automatic em-dash / en-dash -# - [ ] Automatic -> Arrows <- +# - [x] Automatic -> Arrows <- (custom) function debug(text) { printf "\n---\n%s\n---\n", text > "/dev/stderr"; } @@ -92,6 +93,7 @@ function URL ( text ) { gsub( /&/, "%26", text ); gsub( /"/, "%22", text ); gsub( /'/, "%27", text ); + gsub( /`/, "%60", text ); gsub( /\?/, "%3F", text ); gsub( /#/, "%23", text ); gsub( /\[/, "%5B", text ); @@ -113,9 +115,9 @@ function inline( line, LOCAL, len, code, href, guard ) { if ( line ~ /^$/ ) { # Recursion End return ""; - # omit processing of escaped characters - } else if ( line ~ /^\\[]\\`\*_\{\}\(\)#\+-\.![]/) { - return substr(line, 2, 1) inline( substr(line, 3) ); + # omit processing of escaped characters + } else if ( line ~ /^\\./) { + return HTML(substr(line, 2, 1)) inline( substr(line, 3) ); # hard brakes } else if ( match(line, /^ \n/) ) { @@ -129,17 +131,17 @@ function inline( line, LOCAL, len, code, href, guard ) { code = substr( line, len + 1, match( substr(line, len + 1), guard ) - 1) len = 2 * length(guard) + length(code) # strip single surrounding white spaces - code = gensub( / (.*) /, "\\1", "1" , code) + code = gensub( /^ | $/, "", "g" , code) # escape HTML within code span gsub( /&/, "\\&", code ); gsub( //, "\\>", code ); return "" code "" inline( substr( line, len + 1 ) ) } # Wiki style links - } else if ( match( line, /^\[\[([^\]\|]+)(\|([^\]]+))?\]\]/) ) { + } else if ( match( line, /^\[\[([^]|]+)(\|[^]]+)?\]\]/) ) { len = RLENGTH; - href = gensub(/^\[\[([^\]\|]+)(\|([^\]]+))?\]\]/, "\\1", 1, substr(line, 1, len) ); - text = gensub(/^\[\[([^\]\|]+)(\|([^\]]+))?\]\]/, "\\3", 1, substr(line, 1, len) ); + href = gensub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\1", 1, substr(line, 1, len) ); + text = gensub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\3", 1, substr(line, 1, len) ); if ( ! text ) text = href; return "" HTML(text) "" inline( substr( line, len + 1) ); @@ -157,13 +159,13 @@ function inline( line, LOCAL, len, code, href, guard ) { # inline links # ,_______________________Image____________________________, - } else if ( match(line, /^\[([^]]+|!\[[^]]+\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/) ) { + } else if ( match(line, /^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/) ) { len = RLENGTH; - text = gensub(/^\[([^]]+|!\[[^]]+\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \ + text = gensub(/^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \ "\\1", 1, substr(line, 1, len) ); - href = gensub(/^\[([^]]+|!\[[^]]+\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \ + href = gensub(/^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \ "\\4", 1, substr(line, 1, len) ); - title = gensub(/^\[([^]]+|!\[[^]]+\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \ + title = gensub(/^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \ "\\6", 1, substr(line, 1, len) ); if ( title ) { return "" inline( text ) "" inline( substr( line, len + 1) ); @@ -186,12 +188,12 @@ function inline( line, LOCAL, len, code, href, guard ) { } # inline images - } else if ( match(line, /^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/) ) { + } else if ( match(line, /^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/) ) { len = RLENGTH; - text = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\1", "g", substr(line, 1, len) ); - href = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\2", "g", substr(line, 1, len) ); - title = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\4", "g", substr(line, 1, len) ); - attrib = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\6", "g", substr(line, 1, len) ); + text = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\1", "g", substr(line, 1, len) ); + href = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\2", "g", substr(line, 1, len) ); + title = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\4", "g", substr(line, 1, len) ); + attrib = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\6", "g", substr(line, 1, len) ); if ( title && attrib ) { return "\""" \ inline( substr( line, len + 1) ); @@ -207,10 +209,10 @@ function inline( line, LOCAL, len, code, href, guard ) { } # reference style images - } else if ( match(line, /^!\[([^]]+)\] ?\[([^]]*)\]/ ) ) { + } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\]/ ) ) { len = RLENGTH; - text = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) ); - id = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); + text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) ); + id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); if ( ! id ) id = text; if ( rl_href[id] && rl_title[id] ) { return "\""" \ @@ -286,6 +288,14 @@ function inline( line, LOCAL, len, code, href, guard ) { len = RLENGTH; return substr( line, 1, len ) inline(substr(line, len + 1)); + # Arrows + } else if ( line ~ /^-->( |$)/) { # ignore multidash-arrow + return "-->" inline( substr(line, 4) ); + } else if ( line ~ /^<-( |$)/) { + return "←" inline( substr(line, 3) ); + } else if ( line ~ /^->( |$)/) { + return "→" inline( substr(line, 3) ); + # Escape lone HTML character } else if ( match( line, /^[&<>"']/) ) { return HTML(substr(line, 1, 1)) inline(substr(line, 2)); @@ -385,46 +395,65 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib return "" ttext "
\n" _block(block); # Grid Tables (pandoc) - } else if ( match(block, "^\\+(-+\\+)+\n" \ - "(\\|([^\n]+\\|)+\n)+" \ - "\\+(:?=+:?\\+)+\n" \ - "((\\|([^\n]+\\|)+\n)+" \ - "\\+(-+\\+)+(\n|$))+" \ - ) ) { + # (with, and without header) + } else if ( match( block, "^\\+(-+\\+)+\n" \ + "(\\|([^\n]+\\|)+\n)+" \ + "(\\+(:?=+:?\\+)+)\n" \ + "((\\|([^\n]+\\|)+\n)+" \ + "\\+(-+\\+)+(\n|$))+" \ + ) || \ + match( block, "^()()()" \ + "(\\+(:?-+:?\\+)+)\n" \ + "((\\|([^\n]+\\|)+\n)+" \ + "\\+(-+\\+)+(\n|$))+" \ + ) ) { len = RLENGTH; st = RSTART; #initialize empty arrays split("", talign); split("", tarray); split("", tread); cols = 0; cnt=0; ttext = ""; - # table header and alignment - block = substr(block, match(block, /(\n|$)/) + 1 ); - while ( match(block, "^\\|([^\n]+\\|)+\n") ) { - cols = split( gensub( /(^\||\|$)/, "", "g", \ - gensub( /(^|[^\\])\\\|/, "\\1\\|", "g", \ - substr(block, 1, match(block, /(\n|$)/)) \ - )), tread, /\|/); - block = substr(block, match(block, /(\n|$)/) + 1 ); - for (cnt = 1; cnt < cols; cnt++) - tarray[cnt] = tarray[cnt] "\n" tread[cnt]; - } + # Column Count + cols = split( gensub( "^(\\+(:?-+:?\\+)+)(\n.*)*$", "\\1", 1, block), tread, /\+/) - 2; + # debug(" Cols: " gensub( "^(\\+(:?-+:?\\+)+)(\n.*)*$", "\\1", 1, block )); - cols = split( \ - gensub( /(^\+|\+$)/, "", "g", \ - substr(block, 1, match(block, /(\n|$)/)) \ - ), talign, /\+/); - block = substr(block, match(block, /(\n|$)/) + 1 ); + # table alignment + split( gensub( "^(.*\n)?\\+((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)(\n.*)$", "\\2", "g", block ), talign, /\+/ ); + # debug("Align: " gensub( "^(.*\n)?\\+((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)(\n.*)$", "\\2", "g", block )); - for (cnt = 1; cnt < cols; cnt++) { - if (match(talign[cnt], /:=+:/)) talign[cnt]="center"; - else if (match(talign[cnt], /=+:/)) talign[cnt]="right"; - else if (match(talign[cnt], /:=+/ )) talign[cnt]="left"; + for (cnt = 1; cnt <= cols; cnt++) { + if (match(talign[cnt], /:(-+|=+):/)) talign[cnt]="center"; + else if (match(talign[cnt], /(-+|=+):/)) talign[cnt]="right"; + else if (match(talign[cnt], /:(-+|=+)/ )) talign[cnt]="left"; else talign[cnt]=""; } - ttext = "\n" - for (cnt = 1; cnt < cols; cnt++) - ttext = ttext "" _block(tarray[cnt]) "" - ttext = ttext "\n\n" + if ( match(block, "^\\+(-+\\+)+\n" \ + "(\\|([^\n]+\\|)+\n)+" \ + "\\+(:?=+:?\\+)+\n" \ + "((\\|([^\n]+\\|)+\n)+" \ + "\\+(-+\\+)+(\n|$))+" \ + ) ) { + # table header + block = substr(block, match(block, /(\n|$)/) + 1 ); + while ( match(block, "^\\|([^\n]+\\|)+\n") ) { + split( gensub( /(^\||\|$)/, "", "g", \ + gensub( /(^|[^\\])\\\|/, "\\1\\|", "g", \ + substr(block, 1, match(block, /(\n|$)/)) \ + )), tread, /\|/); + block = substr(block, match(block, /(\n|$)/) + 1 ); + for (cnt = 1; cnt <= cols; cnt++) + tarray[cnt] = tarray[cnt] "\n" tread[cnt]; + } + + ttext = "\n" + for (cnt = 1; cnt <= cols; cnt++) + ttext = ttext "" _block(tarray[cnt]) "" + ttext = ttext "\n" + } + + # table body + block = substr(block, match(block, /(\n|$)/) + 1 ); + ttext = ttext "\n" while ( match(block, /^((\|([^\n]+\|)+\n)+\+(-+\+)+(\n|$))+/ ) ){ split("", tarray); @@ -434,13 +463,13 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib substr(block, 1, match(block, /(\n|$)/)) \ )), tread, /\|/); block = substr(block, match(block, /(\n|$)/) + 1 ); - for (cnt = 1; cnt < cols; cnt++) + for (cnt = 1; cnt <= cols; cnt++) tarray[cnt] = tarray[cnt] "\n" tread[cnt]; } block = substr(block, match(block, /(\n|$)/) + 1 ); ttext = ttext "" - for (cnt = 1; cnt < cols; cnt++) + for (cnt = 1; cnt <= cols; cnt++) ttext = ttext "" _block(tarray[cnt]) "" ttext = ttext "\n" } @@ -557,12 +586,12 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib _block( substr( block, len + 1) ); # block images (wrapped in
) - } else if ( match(block, /^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n|$)/) ) { + } else if ( match(block, /^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n|$)/) ) { len = RLENGTH; - text = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\1", "g", block); - href = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\2", "g", block); - title = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\4", "g", block); - attrib = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\6", "g", block); + text = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\1", "g", block); + href = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\2", "g", block); + title = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\4", "g", block); + attrib = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\6", "g", block); if ( title && attrib ) { return "
" \ "\""" \ @@ -588,10 +617,10 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib } # reference style images (block) - } else if ( match(line, /^!\[([^]]+)\] ?\[([^]]*)\](\n|$)/ ) ) { + } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\](\n|$)/ ) ) { len = RLENGTH; - text = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\](\n.*)?$/, "\\1", 1, block); - id = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\](\n.*)?$/, "\\2", 1, block); + text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\1", 1, block); + id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\2", 1, block); if ( ! id ) id = text; if ( rl_href[id] && rl_title[id] ) { return "
" \