# - ? Simple table (pandoc)
# - ? Multiline table (pandoc)
# - [x] Grid table (pandoc)
-# - [x] Pipe table (php md pandoc)
+# - [x] Headerless
+# - [x] Pipe table (php md, pandoc)
# - [x] Line blocks (pandoc)
# - [x] Task lists (pandoc, custom)
# - [ ] Definition lists (php md, pandoc)
# - [x] ^Superscript^ ~Subscript~ (pandoc)
# - [-] Bracketed spans (pandoc)
# - [-] Inline attributes (pandoc)
-# - [x] Image attributes (custom, pandoc inspired, inline only)
+# - [x] Image attributes (custom, pandoc inspired, not for reference style)
# - [x] Wiki style links [[PageName]] / [[PageName|Link Text]]
# - [-] TEX-Math (pandoc)
# - ? Footnotes (php md)
# - ? ... three-dot ellipsis (smartypants)
# - [-] en-dash (smartypants)
# - [ ] Automatic em-dash / en-dash
-# - [ ] Automatic -> Arrows <-
+# - [x] Automatic -> Arrows <- (custom)
function debug(text) { printf "\n---\n%s\n---\n", text > "/dev/stderr"; }
if ( line ~ /^$/ ) { # Recursion End
return "";
- # omit processing of escaped characters
+ # omit processing of escaped characters
} else if ( line ~ /^\\[]\\`\*_\{\}\(\)#\+-\.![]/) {
return substr(line, 2, 1) inline( substr(line, 3) );
}
# Wiki style links
- } else if ( match( line, /^\[\[([^\]\|]+)(\|([^\]]+))?\]\]/) ) {
+ } else if ( match( line, /^\[\[([^]|]+)(\|[^]]+)?\]\]/) ) {
len = RLENGTH;
- href = gensub(/^\[\[([^\]\|]+)(\|([^\]]+))?\]\]/, "\\1", 1, substr(line, 1, len) );
- text = gensub(/^\[\[([^\]\|]+)(\|([^\]]+))?\]\]/, "\\3", 1, substr(line, 1, len) );
+ href = gensub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\1", 1, substr(line, 1, len) );
+ text = gensub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\3", 1, substr(line, 1, len) );
if ( ! text ) text = href;
return "<a href=\"" URL(href) "\">" HTML(text) "</a>" inline( substr( line, len + 1) );
# inline links
# ,_______________________Image____________________________,
- } else if ( match(line, /^\[([^]]+|!\[[^]]+\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/) ) {
+ } else if ( match(line, /^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/) ) {
len = RLENGTH;
- text = gensub(/^\[([^]]+|!\[[^]]+\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \
+ text = gensub(/^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \
"\\1", 1, substr(line, 1, len) );
- href = gensub(/^\[([^]]+|!\[[^]]+\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \
+ href = gensub(/^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \
"\\4", 1, substr(line, 1, len) );
- title = gensub(/^\[([^]]+|!\[[^]]+\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \
+ title = gensub(/^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \
"\\6", 1, substr(line, 1, len) );
if ( title ) {
return "<a href=\"" URL(href) "\" title=\"" HTML(title) "\">" inline( text ) "</a>" inline( substr( line, len + 1) );
}
# inline images
- } else if ( match(line, /^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/) ) {
+ } else if ( match(line, /^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/) ) {
len = RLENGTH;
- text = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\1", "g", substr(line, 1, len) );
- href = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\2", "g", substr(line, 1, len) );
- title = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\4", "g", substr(line, 1, len) );
- attrib = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\6", "g", substr(line, 1, len) );
+ text = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\1", "g", substr(line, 1, len) );
+ href = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\2", "g", substr(line, 1, len) );
+ title = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\4", "g", substr(line, 1, len) );
+ attrib = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\6", "g", substr(line, 1, len) );
if ( title && attrib ) {
return "<img src=\"" URL(href) "\" alt=\"" HTML(text) "\" title=\"" HTML(title) "\" class=\"" HTML(attrib) "\">" \
inline( substr( line, len + 1) );
}
# reference style images
- } else if ( match(line, /^!\[([^]]+)\] ?\[([^]]*)\]/ ) ) {
+ } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\]/ ) ) {
len = RLENGTH;
- text = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) );
- id = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) );
+ text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) );
+ id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) );
if ( ! id ) id = text;
if ( rl_href[id] && rl_title[id] ) {
return "<img src=\"" URL(rl_href[id]) "\" alt=\"" HTML(text) "\" title=\"" HTML(rl_title[id]) "\">" \
len = RLENGTH;
return substr( line, 1, len ) inline(substr(line, len + 1));
+ # Arrows
+ } else if ( line ~ /^-->( |$)/) { # ignore multidash-arrow
+ return "-->" inline( substr(line, 4) );
+ } else if ( line ~ /^<-( |$)/) {
+ return "←" inline( substr(line, 3) );
+ } else if ( line ~ /^->( |$)/) {
+ return "→" inline( substr(line, 3) );
+
# Escape lone HTML character
} else if ( match( line, /^[&<>"']/) ) {
return HTML(substr(line, 1, 1)) inline(substr(line, 2));
return "<table>" ttext "</tbody></table>\n" _block(block);
# Grid Tables (pandoc)
- } else if ( match(block, "^\\+(-+\\+)+\n" \
- "(\\|([^\n]+\\|)+\n)+" \
- "\\+(:?=+:?\\+)+\n" \
- "((\\|([^\n]+\\|)+\n)+" \
- "\\+(-+\\+)+(\n|$))+" \
- ) ) {
+ # (with, and without header)
+ } else if ( match( block, "^\\+(-+\\+)+\n" \
+ "(\\|([^\n]+\\|)+\n)+" \
+ "(\\+(:?=+:?\\+)+)\n" \
+ "((\\|([^\n]+\\|)+\n)+" \
+ "\\+(-+\\+)+(\n|$))+", \
+ tread \
+ ) || \
+ match( block, "^()()()" \
+ "(\\+(:?-+:?\\+)+)\n" \
+ "((\\|([^\n]+\\|)+\n)+" \
+ "\\+(-+\\+)+(\n|$))+", \
+ tread \
+ ) ) {
len = RLENGTH; st = RSTART;
#initialize empty arrays
- split("", talign); split("", tarray); split("", tread);
+ split("", talign); split("", tarray); # split("", tread);
cols = 0; cnt=0; ttext = "";
- # table header and alignment
- block = substr(block, match(block, /(\n|$)/) + 1 );
- while ( match(block, "^\\|([^\n]+\\|)+\n") ) {
- cols = split( gensub( /(^\||\|$)/, "", "g", \
- gensub( /(^|[^\\])\\\|/, "\\1\\|", "g", \
- substr(block, 1, match(block, /(\n|$)/)) \
- )), tread, /\|/);
- block = substr(block, match(block, /(\n|$)/) + 1 );
- for (cnt = 1; cnt < cols; cnt++)
- tarray[cnt] = tarray[cnt] "\n" tread[cnt];
+ # table alignment
+ cols = split( gensub( /(^\+|\+$)/, "", "g", tread[4] ), talign, /\+/ );
+
+ for (cnt = 1; cnt <= cols; cnt++) {
+ if (match(talign[cnt], /:(-+|=+):/)) talign[cnt]="center";
+ else if (match(talign[cnt], /(-+|=+):/)) talign[cnt]="right";
+ else if (match(talign[cnt], /:(-+|=+)/ )) talign[cnt]="left";
+ else talign[cnt]="";
}
- cols = split( \
- gensub( /(^\+|\+$)/, "", "g", \
- substr(block, 1, match(block, /(\n|$)/)) \
- ), talign, /\+/);
- block = substr(block, match(block, /(\n|$)/) + 1 );
+ if ( match(block, "^\\+(-+\\+)+\n" \
+ "(\\|([^\n]+\\|)+\n)+" \
+ "\\+(:?=+:?\\+)+\n" \
+ "((\\|([^\n]+\\|)+\n)+" \
+ "\\+(-+\\+)+(\n|$))+" \
+ ) ) {
+ # table header
+ block = substr(block, match(block, /(\n|$)/) + 1 );
+ while ( match(block, "^\\|([^\n]+\\|)+\n") ) {
+ cols = split( gensub( /(^\||\|$)/, "", "g", \
+ gensub( /(^|[^\\])\\\|/, "\\1\\|", "g", \
+ substr(block, 1, match(block, /(\n|$)/)) \
+ )), tread, /\|/);
+ block = substr(block, match(block, /(\n|$)/) + 1 );
+ for (cnt = 1; cnt <= cols; cnt++)
+ tarray[cnt] = tarray[cnt] "\n" tread[cnt];
+ }
- for (cnt = 1; cnt < cols; cnt++) {
- if (match(talign[cnt], /:=+:/)) talign[cnt]="center";
- else if (match(talign[cnt], /=+:/)) talign[cnt]="right";
- else if (match(talign[cnt], /:=+/ )) talign[cnt]="left";
- else talign[cnt]="";
+ ttext = "<thead>\n<tr>"
+ for (cnt = 1; cnt <= cols; cnt++)
+ ttext = ttext "<th align=\"" talign[cnt] "\">" _block(tarray[cnt]) "</th>"
+ ttext = ttext "</tr>\n</thead>"
}
- ttext = "<thead>\n<tr>"
- for (cnt = 1; cnt < cols; cnt++)
- ttext = ttext "<th align=\"" talign[cnt] "\">" _block(tarray[cnt]) "</th>"
- ttext = ttext "</tr>\n</thead><tbody>\n"
+ # table body
+ block = substr(block, match(block, /(\n|$)/) + 1 );
+ ttext = ttext "<tbody>\n"
while ( match(block, /^((\|([^\n]+\|)+\n)+\+(-+\+)+(\n|$))+/ ) ){
split("", tarray);
substr(block, 1, match(block, /(\n|$)/)) \
)), tread, /\|/);
block = substr(block, match(block, /(\n|$)/) + 1 );
- for (cnt = 1; cnt < cols; cnt++)
+ for (cnt = 1; cnt <= cols; cnt++)
tarray[cnt] = tarray[cnt] "\n" tread[cnt];
}
block = substr(block, match(block, /(\n|$)/) + 1 );
ttext = ttext "<tr>"
- for (cnt = 1; cnt < cols; cnt++)
+ for (cnt = 1; cnt <= cols; cnt++)
ttext = ttext "<td align=\"" talign[cnt] "\">" _block(tarray[cnt]) "</td>"
ttext = ttext "</tr>\n"
}
_block( substr( block, len + 1) );
# block images (wrapped in <figure>)
- } else if ( match(block, /^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n|$)/) ) {
+ } else if ( match(block, /^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n|$)/) ) {
len = RLENGTH;
- text = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\1", "g", block);
- href = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\2", "g", block);
- title = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\4", "g", block);
- attrib = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\6", "g", block);
+ text = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\1", "g", block);
+ href = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\2", "g", block);
+ title = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\4", "g", block);
+ attrib = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\6", "g", block);
if ( title && attrib ) {
return "<figure data-src=\"" URL(href) "\" class=\"" HTML(attrib) "\">" \
"<img src=\"" URL(href) "\" alt=\"" HTML(text) "\" class=\"" HTML(attrib) "\">" \
}
# reference style images (block)
- } else if ( match(line, /^!\[([^]]+)\] ?\[([^]]*)\](\n|$)/ ) ) {
+ } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\](\n|$)/ ) ) {
len = RLENGTH;
- text = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\](\n.*)?$/, "\\1", 1, block);
- id = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\](\n.*)?$/, "\\2", 1, block);
+ text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\1", 1, block);
+ id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\2", 1, block);
if ( ! id ) id = text;
if ( rl_href[id] && rl_title[id] ) {
return "<figure data-src=\"" URL(rl_href[id]) "\">" \