X-Git-Url: https://git.plutz.net/?a=blobdiff_plain;ds=inline;f=cgilite%2Fmarkdown.awk;h=44d4e0d7d16aa8aaab7638da20debc775f9770c3;hb=7defc340d721f26e33c0fa1df0e233c69b53c54a;hp=27d40156a414ed45f6053a758b1a9fa2f6cc254c;hpb=5a921f6c967b7971bbd3b3e135bcec89150bf75a;p=shellwiki
diff --git a/cgilite/markdown.awk b/cgilite/markdown.awk
index 27d4015..44d4e0d 100755
--- a/cgilite/markdown.awk
+++ b/cgilite/markdown.awk
@@ -4,10 +4,6 @@
# EXPERIMENTAL Markdown processor with minimal dependencies.
# Meant to support all features of John Grubers basic Markdown
# + a number of common extensions, mostly inspired by Pandoc Markdown
-#
-# ToDo:
-# - HTML processing / escaping (according to environment flag)
-# - em-dashes and arrows
# Supported Features / TODO:
# ==========================
@@ -45,18 +41,19 @@
# - ? Heading identifiers (php md, pandoc)
# - [x] Automatic heading identifiers (custom)
# - [x] Fenced code blocks (php md, pandoc)
-# - [-] Fenced code attributes
-# - [ ] Tables
+# - [x] Fenced code attributes
+# - [/] Tables
# - ? Simple table (pandoc)
# - ? Multiline table (pandoc)
-# - ? Grid table (pandoc)
-# - ? Pipe table (php md pandoc)
+# - [x] Grid table (pandoc)
+# - [x] Pipe table (php md pandoc)
# - [x] Line blocks (pandoc)
-# - [x] Task lists (pandoc)
+# - [x] Task lists (pandoc, custom)
# - [ ] Definition lists (php md, pandoc)
# - [-] Numbered example lists (pandoc)
# - [-] Metadata blocks (pandoc)
-# - [-] Fenced Divs (pandoc)
+# - [x] Metadata blocks (custom)
+# - [x] Fenced Divs (pandoc)
#
# Extensions - Inline elements:
# ----------------------------
@@ -65,6 +62,8 @@
# - [x] ^Superscript^ ~Subscript~ (pandoc)
# - [-] Bracketed spans (pandoc)
# - [-] Inline attributes (pandoc)
+# - [x] Image attributes (custom, pandoc inspired, inline only)
+# - [x] Wiki style links [[PageName]] / [[PageName|Link Text]]
# - [-] TEX-Math (pandoc)
# - ? Footnotes (php md)
# - ? Abbreviations (php md)
@@ -120,18 +119,32 @@ function inline( line, LOCAL, len, code, href, guard ) {
return "" code "
" inline( substr( line, len + 1 ) )
}
+ # Wiki style links
+ } else if ( match( line, /^\[\[([^\]\|]+)(\|([^\]]+))?\]\]/) ) {
+ len = RLENGTH;
+ href = gensub(/^\[\[([^\]\|]+)(\|([^\]]+))?\]\]/, "\\1", 1, substr(line, 1, len) );
+ text = gensub(/^\[\[([^\]\|]+)(\|([^\]]+))?\]\]/, "\\3", 1, substr(line, 1, len) );
+ if ( ! text ) text = href;
+ return "" HTML(text) "" inline( substr( line, len + 1) );
+
# quick links ("automatic links" in md doc)
} else if ( match( line, /^<[a-zA-Z]+:\/\/([-\.[:alnum:]]+)(:[0-9]*)?(\/[^>]*)?>/ ) ) {
len = RLENGTH;
href = HTML( substr( line, 2, len - 2) );
return "" href "" inline( substr( line, len + 1) );
+ # quick link email
+ } else if ( match( line, /^<[a-zA-Z0-9.!#$%&'\''*+\/=?^_`{|}~-]+@[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>/ ) ) {
+ len = RLENGTH;
+ href = HTML( substr( line, 2, len - 2) );
+ return "" href "" inline( substr( line, len + 1) );
+
# inline links
- } else if ( match(line, /^\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/) ) {
+ } else if ( match(line, /^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/) ) {
len = RLENGTH;
- text = gensub(/^\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\1", "g", line);
- href = gensub(/^\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\2", "g", line);
- title = gensub(/^\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\4", "g", line);
+ text = gensub(/^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, "\\1", 1, substr(line, 1, len) );
+ href = gensub(/^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, "\\2", 1, substr(line, 1, len) );
+ title = gensub(/^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, "\\4", 1, substr(line, 1, len) );
if ( title ) {
return "" inline( text ) "" inline( substr( line, len + 1) );
} else {
@@ -141,8 +154,8 @@ function inline( line, LOCAL, len, code, href, guard ) {
# reference style links
} else if ( match(line, /^\[([^]]+)\] ?\[([^]]*)\]/ ) ) {
len = RLENGTH;
- text = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, line);
- id = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, line);
+ text = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) );
+ id = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) );
if ( ! id ) id = text;
if ( rl_href[id] && rl_title[id] ) {
return "" inline(text) "" inline( substr( line, len + 1) );
@@ -153,13 +166,18 @@ function inline( line, LOCAL, len, code, href, guard ) {
}
# inline images
- } else if ( match(line, /^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/) ) {
+ } else if ( match(line, /^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/) ) {
len = RLENGTH;
- text = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\1", "g", line);
- href = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\2", "g", line);
- title = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\4", "g", line);
- if ( title ) {
+ text = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\1", "g", substr(line, 1, len) );
+ href = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\2", "g", substr(line, 1, len) );
+ title = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\4", "g", substr(line, 1, len) );
+ attrib = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\6", "g", substr(line, 1, len) );
+ if ( title && attrib ) {
+ return "" inline( substr( line, len + 1) );
+ } else if ( title ) {
return "" inline( substr( line, len + 1) );
+ } else if ( attrib ) {
+ return "" inline( substr( line, len + 1) );
} else {
return "" inline( substr( line, len + 1) );
}
@@ -167,8 +185,8 @@ function inline( line, LOCAL, len, code, href, guard ) {
# reference style images
} else if ( match(line, /^!\[([^]]+)\] ?\[([^]]*)\]/ ) ) {
len = RLENGTH;
- text = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, line);
- id = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, line);
+ text = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) );
+ id = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) );
if ( ! id ) id = text;
if ( rl_href[id] && rl_title[id] ) {
return "" inline( substr( line, len + 1) );
@@ -252,7 +270,7 @@ function inline( line, LOCAL, len, code, href, guard ) {
}
}
-function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent ) {
+function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib ) {
gsub( /^\n+|\n+$/, "", block );
if ( block == "" ) {
@@ -279,6 +297,12 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent ) {
} else if ( AllowHTML && match( block, /^ ? ? ?(<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)([[:space:]]*\n)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) {
len = RLENGTH; st = RSTART;
return substr(block, st, len) _block(substr(block, st + len));
+
+ # Metadata (custom, block starting with %something)
+ # Metadata is ignored but can be interpreted externally
+ } else if ( match(block, /^%[a-zA-Z]+([[:space:]][^\n]*)?(\n|$)(%[a-zA-Z]+([[:space:]][^\n]*)?(\n|$)|%([[:space:]][^\n]*)?(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) {
+ len = RLENGTH; st = RSTART;
+ return _block( substr( block, len + 1) );
# Blockquote (leading >)
} else if ( match( block, /^> /) ) {
@@ -287,6 +311,115 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent ) {
return "
\n" _block( gensub( /(^|\n)> /, "\n", "g", substr(block, 1, st - 1) ) ) "\n\n" \ _block( substr(block, st + len) ); + # Pipe Tables (pandoc / php md / gfm ) + } else if ( match(block, "^((\\|)?([^\n]+\\|)+[^\n]+(\\|)?)\n" \ + "((\\|)?:?(-+:?[\\|+])+:?-+:?(\\|)?)\n" \ + "((\\|)?([^\n]+\\|)+[^\n]+(\\|)?(\n|$))+" ) ) { + len = RLENGTH; st = RSTART; + #initialize empty arrays + split("", talign); split("", tarray); + cols = 0; cnt=0; ttext = ""; + + # table header and alignment + split( gensub( /(^\||\|$)/, "", "g", \ + gensub( /(^|[^\\])\\\|/, "\\1\\|", "g", \ + substr(block, 1, match(block, /(\n|$)/)) \ + )), tarray, /\|/); + block = substr(block, match(block, /(\n|$)/) + 1 ); + cols = split( \ + gensub( /(^\||\|$)/, "", "g", \ + substr(block, 1, match(block, /(\n|$)/)) \ + ), talign, /[+\|]/); + block = substr(block, match(block, /(\n|$)/) + 1 ); + + for( cnt = 1; cnt < cols; cnt++ ) { + if (match(talign[cnt], /:-+:/)) talign[cnt]="center"; + else if (match(talign[cnt], /-+:/)) talign[cnt]="right"; + else if (match(talign[cnt], /:-+/)) talign[cnt]="left"; + else talign[cnt]=""; + } + + ttext = "\n
" HTML( code ) "
\n" \
_block( substr( block, len + 1 ) );
+ # Fenced Divs (pandoc, custom)
+ } else if ( match( block, /^(:::+)/ ) ) {
+ guard = substr( block, 1, RLENGTH );
+ code = gensub(/^[^\n]+\n/, "", 1, block);
+ attrib = gensub(/^:::+[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\1", 1, block);
+ gsub(/[^a-zA-Z0-9_-]+/, " ", attrib);
+ gsub(/(^ | $)/, "", attrib);
+ if ( match(code, "(^|\n)" guard "+(\n|$)" ) ) {
+ len = RLENGTH; st = RSTART;
+ return "" inline( substr(block, 1, st - 1) ) "
\n" \ + _block( substr(block, st + len) ); + } + # Fenced Code Block (pandoc) } else if ( match( block, /^(~~~+|```+)/ ) ) { guard = substr( block, 1, RLENGTH ); code = gensub(/^[^\n]+\n/, "", 1, block); + attrib = gensub(/^(~~~+|```+)[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\2", 1, block); + gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); + gsub(/(^ | $)/, "", attrib); if ( match(code, "(^|\n)" guard "+(\n|$)" ) ) { len = RLENGTH; st = RSTART; - return "" HTML( substr(code, 1, st - 1) ) "
\n" \
+ return "" HTML( substr(code, 1, st - 1) ) "
\n" \
_block( substr( code, st + len ) );
} else {
match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match( block, /$/ );
@@ -349,14 +503,20 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent ) {
} else if ( match( block, /^[^\n]+\n===+(\n|$)/ ) ) {
len = RLENGTH;
HL[1]++; HL[2] = 0; HL[3] = 0; HL[4] = 0; HL[5] = 0; HL[6] = 0;
- return "" inline( substr(block, 1, st - 1) ) "
\n" \ - _block( substr(block, st + len) ); + return "" inline(block) "
\n"; } } @@ -407,12 +571,28 @@ function _list( block, last, LOCAL, p) { } sub( /\n$/, "", p ); - # Task List (pandoc) - if ( p ~ /^\[ \].*/ ) { p = "" substr(p, 4); } - else if ( p ~ /^\[[xX]\].*/ ) { p = "" substr(p, 4); } - else if ( p ~ /^\[ \].*/ ) { p = "
" substr(p, 7); } - else if ( p ~ /^
\[[xX]\].*/ ) { p = "
" substr(p, 7); } - return "
\[ \].*/ ) { return "
" \ + substr(p, 7) "
\[-\].*/ ) { return "
" \ + substr(p, 7) "
\[\?\].*/ ) { return "
" \ + substr(p, 7) "
\[\/\].*/ ) { return "
" \ + substr(p, 7) "
\[[xX]\].*/ ) { return "
" \ + substr(p, 7) "