From 61d13e28d97b124239b530b1e336395d90953793 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Paul=20H=C3=A4nsch?= <paul@plutz.net>
Date: Thu, 29 Aug 2024 21:14:40 +0200
Subject: [PATCH 01/16] tests for references and macros

---
 tests-markdown.sh | 54 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 49 insertions(+), 5 deletions(-)
diff --git a/tests-markdown.sh b/tests-markdown.sh
index 01fbda1..0ffc3d3 100755
--- a/tests-markdown.sh
+++ b/tests-markdown.sh
@@ -77,6 +77,8 @@ assert '[![Wikipedia](wikilogo.png)](<http://de.wikipedia.org>)'\
        '<p><a href="http://de.wikipedia.org"><img src="wikilogo.png" alt="Wikipedia"></a></p>'\
        "Image Link"
 
+assert ' <<macro /test -- "* weird <args>" _foo_>>' '<p> <code class="macro">macro /test -- &quot;* weird &lt;args&gt;&quot; _foo_</code></p>' "Macros"
+
 # Block checks
 printf '\n## Testing Block markup ##\n'
 
@@ -162,7 +164,7 @@ assert '![Testbild](Test Bild.jpg)' \
 
 assert '![Testbild](Test Bild.jpg "German Television *test* image ca. 1994")' \
 '<figure data-src="Test Bild.jpg"><img src="Test Bild.jpg" alt="Testbild"><figcaption>German Television <em>test</em> image ca. 1994</figcaption></figure>' \
-"inline image"
+"block image"
 
 assert '![Testbild *ARD*](Test Bild.jpg){tv ard function-check}' \
 '<figure data-src="Test Bild.jpg" class="tv ard function-check"><img src="Test Bild.jpg" alt="Testbild *ARD*" class="tv ard function-check"></figure>' \
@@ -306,7 +308,7 @@ assert '
 # 'Pipe Tables'
 
 assert '+---+---+---+
-|Col 1| Col 2 |  Col 3|
+|Col 1\\| Col\|2 |  Col 3|
 +===+:==:+===+
 | * foo1   | *bar* |```|
 | * foo2   | **qua** |code |
@@ -316,8 +318,8 @@ assert '+---+---+---+
 +-------+-----+----+
 ' \
 '<table><thead>
-<tr><th align=""><p>Col 1</p>
-</th><th align="center"><p> Col 2 </p>
+<tr><th align=""><p>Col 1&#x5C;</p>
+</th><th align="center"><p> Col&#x7C;2 </p>
 </th><th align=""><p>  Col 3</p>
 </th></tr>
 </thead><tbody>
@@ -359,4 +361,46 @@ sub bar
 </section></section>' \
 'Headline Nesting'
 
-printf '\nAll test passed!\n'
+# Reference syntax checks
+printf '\n## Testing reference syntax ##\n'
+
+assert 'Foo bar [Link] [1] for show
+
+The same in [en][]
+
+[en]: <http://en.wikipedia.org>
+[1]: http://de.wikipedia.org "Online Encyclopedia"' \
+'<p>Foo bar <a href="http://de.wikipedia.org" title="Online Encyclopedia">Link</a> for show</p>
+
+<p>The same in <a href="http://en.wikipedia.org">en</a></p>' \
+"Reference Links"
+
+assert 'Foo bar [Link] [1] for show
+
+[en]: <http://en.wikipedia.org>
+[1]: http://de.wikipedia.org
+ "Online Encyclopedia"' \
+'<p>Foo bar <a href="http://de.wikipedia.org" title="Online Encyclopedia">Link</a> for show</p>' \
+"Reference Links"
+
+assert 'Foo bar ![Image] [1] for show
+
+The same as ![PNG][]
+
+[PNG]: <mage/path/i.png>
+[1]: http://de.wikipedia.org/logo.jpg "Online Encyclopedia"' \
+'<p>Foo bar <img src="http://de.wikipedia.org/logo.jpg" alt="Image" title="Online Encyclopedia"> for show</p>
+
+<p>The same as <img src="mage/path/i.png" alt="PNG"></p>' \
+"Reference images"
+
+assert '![Image] [1]
+
+[PNG]: <mage/path/i.png>
+[1]: http://de.wikipedia.org/logo.jpg "Online Encyclopedia"' \
+'<figure data-src="http://de.wikipedia.org/logo.jpg"><img src="http://de.wikipedia.org/logo.jpg" alt="Image"><figcaption>Online Encyclopedia</figcaption></figure>' \
+"Reference images (block)"
+
+assert '<<macro /test -- "* weird <args>" _foo_>>' '<code class="macro">macro /test -- &quot;* weird &lt;args&gt;&quot; _foo_</code>' "Macros/Block"
+
+printf '\nAll tests passed!\n'
-- 
2.39.5


From 7ba97e6646261c0a15e2f46093141c3c7a164775 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Paul=20H=C3=A4nsch?= <paul@plutz.net>
Date: Fri, 30 Aug 2024 00:52:53 +0200
Subject: [PATCH 02/16] include mawk tests

---
 tests-markdown.sh | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/tests-markdown.sh b/tests-markdown.sh
index 0ffc3d3..f67cfbf 100755
--- a/tests-markdown.sh
+++ b/tests-markdown.sh
@@ -15,7 +15,7 @@ assert() {
   local md comp="$2" msg="$3"
   printf "%3i: %s ... " $acnt "$msg"
 
-  for proc in gawk bawk; do
+  for proc in gawk bawk mawk; do
     printf '%s ' $proc
     md="$(printf '%s' "$1" |md_"$proc")"
     if [ "$md" != "$comp" ]; then
@@ -52,9 +52,9 @@ assert '`_foo_-> bar`' '<p><code>_foo_-&gt; bar</code></p>' 'arrow'
 assert '<!-- comment --> <- comment' '<p>&lt;!-- comment --&gt; &larr; comment</p>' 'arrow'
 
 # Escaping
-assert "&copy;" "<p>&copy;</p>" "escape"
-assert "\&copy;" "<p>&amp;copy;</p>" "escape"
-assert "AT&T" "<p>AT&amp;T</p>" "escape"
+assert '&copy;' "<p>&copy;</p>" "escape"
+assert '\&copy;' "<p>&amp;copy;</p>" "escape"
+assert 'AT&T' "<p>AT&amp;T</p>" "escape"
 assert '`&copy;`' "<p><code>&amp;copy;</code></p>" "code span escape"
 
 # Automatic Links
@@ -131,6 +131,18 @@ not be
 but &amp;shy; &lt;escaped&gt;</code></pre>' \
 "indented code block"
 
+assert '    indented code will
+	not be
+
+	*formatted*
+    but &shy; <escaped>' \
+'<pre><code>indented code will
+not be
+
+*formatted*
+but &amp;shy; &lt;escaped&gt;</code></pre>' \
+"indented code block"
+
 assert ':::: tag
 fenced _divs_ are regular text
 
@@ -204,7 +216,7 @@ assert '#### Heading four' \
 </section>' \
 'Heading arbitrary'
 
-assert '### Heading three ######' \
+assert '###Heading three ######' \
 '<section class="h3" id="0.0.1:Heading%20three"><h3>Heading three<a class="anchor" href="#0.0.1:Heading%20three"></a></h3>
 </section>' \
 'Heading arbitrary'
@@ -351,7 +363,7 @@ sub bar
 
 ### sub sub sub ###
 
-## sub2 bar {x}
+##sub2 bar {x}
 ' \
 '<section class="h2" id="0.1:foo"><h2>foo<a class="anchor" href="#0.1:foo"></a></h2>
 </section><section class="h1" id="1:bar"><h1>bar<a class="anchor" href="#1:bar"></a></h1>
-- 
2.39.5


From 1653669a6c2a769cc541893182436fa3a603e9c4 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Paul=20H=C3=A4nsch?= <paul@plutz.net>
Date: Fri, 30 Aug 2024 00:54:16 +0200
Subject: [PATCH 03/16] compatibility changes: no gensub function

---
 markdown.awk | 166 +++++++++++++++++++++++++++++++--------------------
 1 file changed, 100 insertions(+), 66 deletions(-)

diff --git a/markdown.awk b/markdown.awk
index 7e20ebb..fcd42cf 100755
--- a/markdown.awk
+++ b/markdown.awk
@@ -154,9 +154,9 @@ function inline( line, LOCAL, len, text, code, href, guard ) {
 
   # Wiki style links
   } else if ( match( line, /^\[\[([^]|]+)(\|[^]]+)?\]\]/) ) {
-    len = RLENGTH;
-    href = gensub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\1", 1, substr(line, 1, len) );
-    text = gensub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\3", 1, substr(line, 1, len) );
+    len = RLENGTH; href = text = substr(line, 1, len);
+    sub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\1", href );
+    sub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\3", text );
     if ( ! text ) text = href;
     return "<a href=\"" HTML(href) "\">" HTML(text) "</a>" inline( substr( line, len + 1) );
 
@@ -197,9 +197,11 @@ function inline( line, LOCAL, len, text, code, href, guard ) {
 
   # reference style links
   } else if ( match(line, /^\[([^]]+)\] ?\[([^]]*)\]/ ) ) {
-    len = RLENGTH;
-    text = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) );
-      id = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) );
+    len = RLENGTH; text = id = substr(line, 1, len);
+    sub(/\n.*$/, "", text); sub(/^\[/, "", text); sub(/\] ?\[([^\n]*)\].*$/, "", text);
+    sub(/\n.*$/, "",   id); sub(/^\[([^]]+)\] ?\[/, "",   id); sub(/\].*$/, "",   id);
+    # text = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, text );
+    # id = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1,   id );
     if ( ! id ) id = text;
     if ( rl_href[id] && rl_title[id] ) {
       return "<a href=\"" HTML(rl_href[id]) "\" title=\"" HTML(rl_title[id]) "\">" inline(text) "</a>" inline( substr( line, len + 1) );
@@ -240,9 +242,11 @@ function inline( line, LOCAL, len, text, code, href, guard ) {
 
   # reference style images
   } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\]/ ) ) {
-    len = RLENGTH;
-    text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) );
-      id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) );
+    len = RLENGTH; text = id = substr(line, 1, len);
+    sub(/\n.*$/, "", text); sub(/^!\[/, "", text); sub(/\] ?\[([^\n]*)\].*$/, "", text);
+    sub(/\n.*$/, "",   id); sub(/^!\[([^]]+)\] ?\[/, "",   id); sub(/\].*$/, "",   id);
+    # text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) );
+    #   id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) );
     if ( ! id ) id = text;
     if ( rl_href[id] && rl_title[id] ) {
       return "<img src=\"" HTML(rl_href[id]) "\" alt=\"" HTML(text) "\" title=\"" HTML(rl_title[id]) "\">" \
@@ -304,7 +308,9 @@ function inline( line, LOCAL, len, text, code, href, guard ) {
     return "<em>" inline( substr( line, 2, len - 2 ) ) "</em>" inline( substr( line, len + 1 ) );
 
   # Literal HTML entities
-  } else if ( match( line, /^&([a-zA-Z]{2,32}|#[0-9]{1,7}|#[xX][0-9a-fA-F]{1,6});/) ) {
+  # } else if ( match( line, /^&([a-zA-Z]{2,32}|#[0-9]{1,7}|#[xX][0-9a-fA-F]{1,6});/) ) {
+  # mawk does not support repitition ranges
+  } else if ( match( line, /^&([a-zA-Z][a-zA-Z][a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?|#[0-9][0-9]?[0-9]?[0-9]?[0-9]?[0-9]?[0-9]?|#[xX][0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?);/) ) {
     len = RLENGTH;
     return substr( line, 1, len ) inline(substr(line, len + 1));
 
@@ -327,7 +333,8 @@ function inline( line, LOCAL, len, text, code, href, guard ) {
 }
 
 function headline( hlvl, htxt, attrib, LOCAL, sec, n, HL) {
-  match(hstack, /([0-9]+( [0-9]+){5})$/); split( substr(hstack, RSTART),  HL);
+  # match(hstack, /([0-9]+( [0-9]+){5})$/); split( substr(hstack, RSTART),  HL);
+  match(hstack, /([0-9]+( [0-9]+)( [0-9]+)( [0-9]+)( [0-9]+)( [0-9]+))$/); split( substr(hstack, RSTART),  HL);
 
   for ( n = hlvl; n <= 6; n++ ) { sec = sec (HL[n]?"</section>":""); }
   HL[hlvl]++; for ( n = hlvl + 1; n <= 6; n++) { HL[n] = 0;}
@@ -336,7 +343,8 @@ function headline( hlvl, htxt, attrib, LOCAL, sec, n, HL) {
   hid = hid HL[1]; for ( n = 2; n <= hlvl; n++) { hid = hid "." HL[n] ; }
   hid = hid ":" URL(htxt, 1);
 
-  sub(/([0-9]+( [0-9]+){5})$/, "", hstack);
+  # sub(/([0-9]+( [0-9]+){5})$/, "", hstack);
+  sub(/([0-9]+( [0-9]+)( [0-9]+)( [0-9]+)( [0-9]+)( [0-9]+))$/, "", hstack);
   hstack = hstack HL[1] " " HL[2] " " HL[3] " " HL[4] " " HL[5] " " HL[6];
 
   return sec "<section class=\"" (attrib ? "h" hlvl " " attrib : "h" hlvl)  "\" id=\"" hid "\">" \
@@ -413,15 +421,14 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code,
     cols = 0; cnt=0; ttext = "";
 
     # table header and alignment
-    split( gensub( /(^\||\|$)/, "", "g", \
-           gensub( /(^|[^\\])\\\|/, "\\1\\&#x7C;", "g", \
-           substr(block, 1, match(block, /(\n|$)/)) \
-    )), tarray, /\|/);
+    tmp = substr(block, 1, match(block, /(\n|$)/));
+    gsub( /(^|[^\\])\\\|/, "\\1\\&#x7C;", tmp );
+    gsub( /(^\||\|$)/, "", tmp)
+    split( tmp, tarray, /\|/);
     block = substr(block, match(block, /(\n|$)/) + 1 );
-    cols = split( \
-           gensub( /(^\||\|$)/, "", "g", \
-           substr(block, 1, match(block, /(\n|$)/)) \
-    ), talign, /[+\|]/);
+    tmp = substr(block, 1, match(block, /(\n|$)/));
+    gsub( /(^\||\|$)/, "", tmp );
+    cols = split( tmp , talign, /[+\|]/);
     block = substr(block, match(block, /(\n|$)/) + 1 );
 
     for( cnt = 1; cnt < cols; cnt++ ) {
@@ -437,10 +444,10 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code,
     ttext = ttext "</tr>\n</thead><tbody>\n"
 
     while ( match(block, "^((\\|)?([^\n]+\\|)+[^\n]+(\\|)?(\n|$))+" ) ){
-      split( gensub( /(^\||\|$)/, "", "g", \
-             gensub( /(^|[^\\])\\\|/, "\\1\\&#x7C;", "g", \
-             substr(block, 1, match(block, /(\n|$)/)) \
-      )), tarray, /\|/);
+      tmp = substr(block, 1, match(block, /(\n|$)/));
+      gsub( /(^|[^\\])\\\|/, "\\1\\&#x7C;", tmp );
+      gsub( /(^\||\|$)/, "", tmp );
+      split( tmp, tarray, /\|/);
       block = substr(block, match(block, /(\n|$)/) + 1 );
 
       ttext = ttext "<tr>"
@@ -469,11 +476,14 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code,
     cols = 0; cnt=0; ttext = "";
 
     # Column Count
-    cols = split(   gensub( "^(\\+(:?-+:?\\+)+)(\n.*)*$", "\\1", 1, block), tread, /\+/) - 2;
+    tmp = block; sub( "(\n.*)*$", "", tmp);
+    cols = split( tmp, tread, /\+/) - 2;
     # debug(" Cols: " gensub( "^(\\+(:?-+:?\\+)+)(\n.*)*$", "\\1", 1, block ));
 
     # table alignment
-    split( gensub( "^(.*\n)?\\+((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)(\n.*)$", "\\2", "g", block ), talign, /\+/ );
+    match(block, "((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)");
+    split( substr(block, RSTART, RLENGTH) , talign, /\+/ );
+    # split( gensub( "^(.*\n)?\\+((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)(\n.*)$", "\\2", "g", block ), talign, /\+/ );
     # debug("Align: " gensub( "^(.*\n)?\\+((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)(\n.*)$", "\\2", "g", block ));
 
     for (cnt = 1; cnt <= cols; cnt++) {
@@ -492,10 +502,10 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code,
       # table header
       block = substr(block, match(block, /(\n|$)/) + 1 );
       while ( match(block, "^\\|([^\n]+\\|)+\n") ) {
-        split( gensub( /(^\||\|$)/, "", "g", \
-                 gensub( /(^|[^\\])\\\|/, "\\1\\&#x7C;", "g", \
-                   substr(block, 1, match(block, /(\n|$)/)) \
-        )), tread, /\|/);
+        tmp = substr(block, 1, match(block, /(\n|$)/));
+        gsub( /\\\\/, "\\&#x5C;", tmp); gsub(/\\\|/, "\\&#x7C;", tmp);
+        gsub( /(^\||\|$)/, "", tmp );
+        split(tmp, tread, /\|/);
         block = substr(block, match(block, /(\n|$)/) + 1 );
         for (cnt = 1; cnt <= cols; cnt++)
           tarray[cnt] = tarray[cnt] "\n" tread[cnt];
@@ -514,10 +524,10 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code,
     while ( match(block, /^((\|([^\n]+\|)+\n)+\+(-+\+)+(\n|$))+/ ) ){
       split("", tarray);
       while ( match(block, /^\|([^\n]+\|)+\n/) ) {
-        split( gensub( /(^\||\|$)/, "", "g", \
-               gensub( /(^|[^\\])\\\|/, "\\1\\&#x7C;", "g", \
-               substr(block, 1, match(block, /(\n|$)/)) \
-        )), tread, /\|/);
+        tmp = substr(block, 1, match(block, /(\n|$)/));
+        gsub( /\\\\/, "\\&#x5C;", tmp); gsub(/\\\|/, "\\&#x7C;", tmp);
+        gsub( /(^\||\|$)/, "", tmp);
+        split( tmp, tread, /\|/);
         block = substr(block, match(block, /(\n|$)/) + 1 );
         for (cnt = 1; cnt <= cols; cnt++)
           tarray[cnt] = tarray[cnt] "\n" tread[cnt];
@@ -542,8 +552,9 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code,
     return "<div class=\"line-block\">" text "</div>\n" _block( substr( block, len + 1) );
 
   # Indented Code Block
-  } else if ( match(block, /^(    |\t)( *\t*[^ \t\n]+ *\t*)+(\n|$)((    |\t)[^\n]+(\n|$)|[ \t]*(\n|$))*/) ) {
+  } else if ( match(block, /^((    |\t)[^\n]*[^\n\t ][^\n]*(\n|$))((    |\t)[^\n]*(\n|$)|[\t ]*(\n|$))*/) ) {
     len = RLENGTH; st = RSTART;
+
     code = substr(block, 1, len);
     gsub(/(^|\n)(    |\t)/, "\n", code);
     gsub(/^\n|\n+$/, "", code);
@@ -552,9 +563,10 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code,
 
   # Fenced Divs (pandoc, custom)
   } else if ( match( block, /^(:::+)/ ) ) {
-    guard = substr( block, 1, RLENGTH );
-    code = block; sub(/^[^\n]+\n/, "", code);
-    attrib = gensub(/^:::+[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\1", 1, block);
+    guard = substr( block, 1, RLENGTH ); attrib = code = block;
+    sub(/^[^\n]+\n/, "", code);
+    sub(/^:::+[ \t]*\{?[ \t]*/, "", attrib); sub(/\}?[ \t]*\n.*$/, "", attrib);
+    # attrib = gensub(/^:::+[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\1", 1, attrib);
     gsub(/[^a-zA-Z0-9_-]+/, " ", attrib);
     gsub(/(^ | $)/, "", attrib);
     if ( match(code, "(^|\n)" guard "+(\n|$)" ) && attrib ) {
@@ -574,9 +586,10 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code,
 
   # Fenced Code Block (pandoc)
   } else if ( match( block, /^(~~~+|```+)/ ) ) {
-    guard = substr( block, 1, RLENGTH );
-    code = gensub(/^[^\n]+\n/, "", 1, block);
-    attrib = gensub(/^(~~~+|```+)[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\2", 1, block);
+    guard = substr( block, 1, RLENGTH ); attrib = code = block;
+    sub(/^[^\n]+\n/, "", code);
+    sub(/^(~~~+|```+)[ \t]*\{?[ \t]*/, "", attrib); sub(/\}?[ \t]*\n.*$/, "", attrib);
+    # attrib = gensub(/^(~~~+|```+)[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\2", 1, attrib);
     gsub(/[^a-zA-Z0-9_-]+/, " ", attrib);
     gsub(/(^ | $)/, "", attrib);
     if ( match(code, "(^|\n)" guard "+(\n|$)" ) && attrib ) {
@@ -627,22 +640,26 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code,
     return headline(2, text, 0) _block( substr( block, len + 1) );
 
   # Nth Order Heading H1 H2 H3 H4 H5 H6 + Attrib
-  } else if ( match( block, /^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n|$)/ ) ) {
+  } else if ( match( block, /^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n|$)/ ) ) {
     len = RLENGTH; text = attrib = substr(block, 1, len);
-    match(block, /^#{1,6}/); n = RLENGTH;
+    match(block, /^##?#?#?#?#?[^#]/); n = RLENGTH - 1;
 
-    sub(/^(#{1,6})[ \t]*/, "", text);   sub(/[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n.*)?$/, "", text);
-    sub(/^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*[ \t]*\{/, "", attrib);
+    # sub(/^(##?#?#?#?#?)[ \t]*/, "", text);  # not working in mawk
+    text = substr(text, n + 1); sub(/^[ \t]*/, "", text);
+    sub(/[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n.*)?$/, "", text);
+    sub(/^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*[ \t]*\{/, "", attrib);
     sub(/\})(\n.*)?$/, "", attrib);
     gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib);
 
     return headline( n, text, attrib ) _block( substr( block, len + 1) );
 
   # Nth Order Heading H1 H2 H3 H4 H5 H6
-  } else if ( match( block, /^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*(\n|$)/ ) ) {
+  } else if ( match( block, /^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*(\n|$)/ ) ) {
     len = RLENGTH; text = substr(block, 1, len);
-    match(block, /^#{1,6}/); n = RLENGTH;
-    sub(/^(#{1,6})[ \t]*/, "", text); sub(/[ \t]*#*(\n.*)?$/, "", text);
+    match(block, /^##?#?#?#?#?[^#]/); n = RLENGTH - 1;
+    # sub(/^(##?#?#?#?#?)[ \t]+/, "", text);  # not working in mawk
+    text = substr(text, n + 1); sub(/^[ \t]*/, "", text);
+    sub(/[ \t]*#*(\n.*)?$/, "", text);
 
     return headline( n, text, 0 ) _block( substr( block, len + 1) );
 
@@ -678,11 +695,12 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code,
            "</figure>\n\n" \
            _block( substr( block, len + 1) );
 
-  # reference style images (block)
-  } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\](\n|$)/ ) ) {
-    len = RLENGTH;
-    text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\1", 1, block);
-      id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\2", 1, block);
+  } else if ( match(block, /^!\[([^]]*)\] ?\[([^]]*)\](\n|$)/ ) ) {
+    len = RLENGTH; text = id = block;
+    sub(/(\n.*)?$/, "", text); sub( /^!\[/, "", text); sub(/\] ?\[([^\n]*)\]$/, "", text);
+    sub(/(\n.*)?$/, "",   id); sub( /^!\[([^\n]*)\] ?\[/, "",   id); sub(/\]$/, "",   id);
+    # text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\1", 1, block);
+    #   id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\2", 1, block);
     if ( ! id ) id = text;
     if ( rl_href[id] && rl_title[id] ) {
       return "<figure data-src=\"" HTML(rl_href[id]) "\">" \
@@ -701,8 +719,9 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code,
 
   # Macros (standalone <<macro>> calls handled as block, so they are not wrapped in paragraph)
   } else if ( match( block, /^<<(([^>]|>[^>])+)>>(\n|$)/ ) ) {
-    len = RLENGTH;
-    text = gensub(/^<<(([^>]|>[^>])+)>>(\n.*)?$/, "\\1", 1, block);
+    len = RLENGTH; text = block;
+    sub(/^<</, "", text); sub(/>>(\n.*)?$/, "", text);
+    # text = gensub(/^<<(([^>]|>[^>])+)>>(\n.*)?$/, "\\1", 1, block);
     return "<code class=\"macro\">" HTML(text) "</code>" _block(substr(block, len + 1) );
 
   # Definition list
@@ -743,7 +762,8 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code,
            _block( substr(block, st + len) );
 
   # Horizontal rule
-  } else if ( match( block, /(^|\n) ? ? ?((\* *){3,}|(- *){3,}|(_ *){3,})($|\n)/) ) {
+  # } else if ( match( block, /(^|\n) ? ? ?((\* *){3,}|(- *){3,}|(_ *){3,})($|\n)/) ) {
+  } else if ( match( block, /(^|\n) ? ? ?((\* *)(\* *)(\* *)(\* *)*|(- *)(- *)(- *)(- *)*|(_ *)(_ *)(_ *)(_ *)*)($|\n)/) ) {
     len = RLENGTH; st = RSTART;
     return _block(substr(block, 1, st - 1)) "<hr>\n" _block(substr(block, st + len));
 
@@ -753,7 +773,7 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code,
   }
 }
 
-function _startlist(block, type, mark, exclude, LOCAL, st, len, list, indent, text) {
+function _startlist(block, type, mark, exclude, LOCAL, st, len, list, indent, it, text) {
   if (match( block, "(^|\n) ? ? ?" mark "[ \t][^\n]+(\n|$)" \
                                    "(([ \t]*\n)* ? ? ?" mark "[ \t][^\n]+(\n|$)" \
                                    "|([ \t]*\n)*( ? ? ?\t|  +)[^\n]+(\n|$)" \
@@ -761,7 +781,9 @@ function _startlist(block, type, mark, exclude, LOCAL, st, len, list, indent, te
     st = RSTART; len = RLENGTH; list = substr( block, st, len);
 
     sub("^\n", "", list); match(list, "^ ? ? ?"); indent = RLENGTH;
-    gsub( "(^|\n) {0," indent "}", "\n", list); sub("^\n", "", list);
+    it = ""; while ( indent > 0 ) { it = it " ?"; indent--; }
+    # gsub( "(^|\n) {0," indent "}", "\n", list); sub("^\n", "", list);
+    gsub( "(^|\n)" it, "\n", list); sub("^\n", "", list);
 
     text = substr(block, 1, st - 1); block = substr(block, st + len);
     if (match(text, /\n[[:space:]]*\n/)) return 0;
@@ -775,10 +797,12 @@ function _startlist(block, type, mark, exclude, LOCAL, st, len, list, indent, te
   } else return 0;
 }
 
-function _list (block, mark, p, LOCAL, len, st, text, indent, task) {
+function _list (block, mark, p, LOCAL, len, st, text, indent, it, task) {
   if ( match(block, "^([ \t]*\n)*$")) return;
 
   match(block, "^" mark "[ \t]"); indent = RLENGTH;
+  it = ""; while ( indent > 0 ) { it = it " ?"; indent--; }
+
   sub("^" mark "[ \t]", "", block);
 
   if (match(block, /\n[ \t]*\n/)) p = 1;
@@ -787,7 +811,8 @@ function _list (block, mark, p, LOCAL, len, st, text, indent, task) {
   st = (RLENGTH == -1) ? length(block) + 1 : RSTART;
   text = substr(block, 1, st); block = substr(block, st + 1);
 
-  gsub("\n {0," indent "}", "\n", text);
+  # gsub("\n {0," indent "}", "\n", text);
+  gsub("\n" it, "\n", text);
 
   task = match( text, /^\[ \]/   ) ? "<li class=\"task pending\"><input type=checkbox disabled>"      : \
          match( text, /^\[-\]/   ) ? "<li class=\"task negative\"><input type=checkbox disabled>"     : \
@@ -803,7 +828,7 @@ function _list (block, mark, p, LOCAL, len, st, text, indent, task) {
   return task text "</li>\n" _list(block, mark, p);
 }
 
-function _dlist (block, LOCAL, len, st, text, indent, p) {
+function _dlist (block, LOCAL, len, st, text, indent, it, p) {
   if (match( block, "^([ \t]*\n)*[^:\n \t][^\n]+\n" )) {
     len = RLENGTH; text = substr(block, 1, len);
     gsub( "(^\n*|\n*$)", "", text );
@@ -816,8 +841,10 @@ function _dlist (block, LOCAL, len, st, text, indent, p) {
     len = RLENGTH; text = substr(block, 1, len);
     sub( "^([ \t]*\n)*", "", text);
     match(text, "^ ? ? ?:(\t| +)"); indent = RLENGTH;
+    it = ""; while ( indent > 0 ) { it = it " ?"; indent--; }
     sub( "^ ? ? ?:(\t| +)", "", text);
-    gsub( "(^|\n) {0," indent "}", "\n", text );
+    # gsub( "(^|\n) {0," indent "}", "\n", text );
+    gsub( "(^|\n)" it, "\n", text );
 
     text = _nblock(text);
     if (match( text, "^<p>(</p[^>]|</[^p]|<[^/]|[^<])*</p>\n$" ))
@@ -858,10 +885,17 @@ BEGIN {
   re_reflink = "(^|\n) ? ? ?\\[([^]\n]+)\\]: ([^ \t\n]+)(\n?[ \t]+(\"([^\"]+)\"|'([^']+)'|\\(([^)]+)\\)))?(\n|$)";
   # /(^|\n) ? ? ?\[([^]\n]+)\]: ([^ \t\n]+)(\n?[ \t]+("([^"]+)"|'([^']+)'|\(([^)]+)\)))?(\n|$)/
   while ( match(f, re_reflink ) ) {
-    rl_id           = gensub( re_reflink, "\\2", 1, substr(f, RSTART, RLENGTH) );
-    rl_href[rl_id]  = gensub( re_reflink, "\\3", 1, substr(f, RSTART, RLENGTH) );
-    rl_title[rl_id] = gensub( re_reflink, "\\5", 1, substr(f, RSTART, RLENGTH) );
-    f = substr(f, RSTART + RLENGTH);
+    tt = th = ti = substr(f, RSTART, RLENGTH); f = substr(f, RSTART + RLENGTH);
+    sub("(^|\n) ? ? ?\\[", "", ti); sub("\\]: ([^ \t\n]+)(\n?[ \t]+(\"([^\"]+)\"|'([^']+)'|\\(([^)]+)\\)))?(\n.*)?$", "", ti);
+    sub("(^|\n) ? ? ?\\[([^]\n]+)\\]: ", "", th); sub("(\n?[ \t]+(\"([^\"]+)\"|'([^']+)'|\\(([^)]+)\\)))?(\n.*)?$", "", th);
+    if (match(tt, "(^|\n) ? ? ?\\[([^]\n]+)\\]: ([^ \t\n]+)(\n?[ \t]+(\"([^\"]+)\"|'([^']+)'|\\(([^)]+)\\)))(\n|$)")) {
+      sub("(^|\n) ? ? ?\\[([^]\n]+)\\]: ([^ \t\n]+)", "", tt); sub("^\n?[ \t]+", "", tt); sub("(\n.*)?$", "", tt);
+    } else { tt = ""; }
+    rl_id = ti; rl_href[rl_id] = th; rl_title[rl_id] = tt;
+    # rl_id           = gensub( re_reflink, "\\2", 1, substr(f, RSTART, RLENGTH) );
+    # rl_href[rl_id]  = gensub( re_reflink, "\\3", 1, substr(f, RSTART, RLENGTH) );
+    # rl_title[rl_id] = gensub( re_reflink, "\\5", 1, substr(f, RSTART, RLENGTH) );
+    # f = substr(f, RSTART + RLENGTH);
     rl_title[rl_id] = substr( rl_title[rl_id], 2, length(rl_title[rl_id]) - 2 );
     if ( rl_href[rl_id] ~ /<.*>/ ) rl_href[rl_id] = substr( rl_href[rl_id], 2, length(rl_href[rl_id]) - 2 );
   }
-- 
2.39.5


From b329161df6fbf03345ccadce649151496693e18a Mon Sep 17 00:00:00 2001
From: =?utf8?q?Paul=20H=C3=A4nsch?= <paul@plutz.net>
Date: Fri, 30 Aug 2024 13:09:11 +0200
Subject: [PATCH 04/16] bugfix Wiki Links, bugfix regex syntax

---
 markdown.awk | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/markdown.awk b/markdown.awk
index fcd42cf..26fc11b 100755
--- a/markdown.awk
+++ b/markdown.awk
@@ -155,8 +155,10 @@ function inline( line, LOCAL, len, text, code, href, guard ) {
   # Wiki style links
   } else if ( match( line, /^\[\[([^]|]+)(\|[^]]+)?\]\]/) ) {
     len = RLENGTH; href = text = substr(line, 1, len);
-    sub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\1", href );
-    sub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\3", text );
+    sub(/^\[\[/, "", href); sub(/(\|([^]]+))?\]\].*$/, "", href);
+    sub(/^\[\[([^]|]+)/, "", text); sub(/\]\].*$/, "", text); sub(/^\|/, "", text);
+    # sub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\1", href );
+    # sub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\3", text );
     if ( ! text ) text = href;
     return "<a href=\"" HTML(href) "\">" HTML(text) "</a>" inline( substr( line, len + 1) );
 
@@ -648,7 +650,7 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code,
     text = substr(text, n + 1); sub(/^[ \t]*/, "", text);
     sub(/[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n.*)?$/, "", text);
     sub(/^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*[ \t]*\{/, "", attrib);
-    sub(/\})(\n.*)?$/, "", attrib);
+    sub(/\}(\n.*)?$/, "", attrib);
     gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib);
 
     return headline( n, text, attrib ) _block( substr( block, len + 1) );
-- 
2.39.5


From 1744198c8b528ceb4f9d66ad076347d5a00858fd Mon Sep 17 00:00:00 2001
From: =?utf8?q?Paul=20H=C3=A4nsch?= <paul@plutz.net>
Date: Fri, 30 Aug 2024 13:15:47 +0200
Subject: [PATCH 05/16] test wiki links and some full pages

---
 tests-markdown.sh | 803 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 800 insertions(+), 3 deletions(-)

diff --git a/tests-markdown.sh b/tests-markdown.sh
index f67cfbf..ff56c02 100755
--- a/tests-markdown.sh
+++ b/tests-markdown.sh
@@ -1,5 +1,7 @@
 #!/bin/sh
 
+runtimes="gawk bawk goawk"
+
 BR='
 '
 CR="$(printf \r)"
@@ -9,15 +11,20 @@ awk() { /bin/awk "$@"; }
 md_gawk() { gawk -f markdown.awk "$@"; }
 md_bawk() { busybox awk -f markdown.awk "$@"; }
 md_mawk() { mawk -f markdown.awk "$@"; }
+md_goawk() { goawk -f markdown.awk "$@"; }
 
 acnt=1  # assertion count
 assert() {
-  local md comp="$2" msg="$3"
+  local md comp="$2" msg="$3" ex
   printf "%3i: %s ... " $acnt "$msg"
 
-  for proc in gawk bawk mawk; do
+  for proc in $runtimes; do
     printf '%s ' $proc
-    md="$(printf '%s' "$1" |md_"$proc")"
+    md="$(printf '%s' "$1" |md_"$proc")"; ex=$?
+    if [ "$ex" != 0 ]; then
+      printf "Fail!\nExit Code: %i\n" $ex
+      exit 1
+    fi
     if [ "$md" != "$comp" ]; then
       printf "Fail!\n:\n%s\n:\n%s\n" "$md" "$comp"
       exit 1
@@ -57,6 +64,10 @@ assert '\&copy;' "<p>&amp;copy;</p>" "escape"
 assert 'AT&T' "<p>AT&amp;T</p>" "escape"
 assert '`&copy;`' "<p><code>&amp;copy;</code></p>" "code span escape"
 
+# Wiki Links
+assert '[[Link/]]' '<p><a href="Link/">Link/</a></p>' "Wiki Link"
+assert '[[Link/|Linked Page]]' '<p><a href="Link/">Linked Page</a></p>' "Wiki Link"
+
 # Automatic Links
 assert '<https://de.wikipedia.org>' "<p><a href=\"https://de.wikipedia.org\">https://de.wikipedia.org</a></p>" "automatic link"
 assert '<http://de.wikipedia.org>' "<p><a href=\"http://de.wikipedia.org\">http://de.wikipedia.org</a></p>" "automatic link"
@@ -415,4 +426,790 @@ assert '![Image] [1]
 
 assert '<<macro /test -- "* weird <args>" _foo_>>' '<code class="macro">macro /test -- &quot;* weird &lt;args&gt;&quot; _foo_</code>' "Macros/Block"
 
+
+printf '\n## Testing example pages ##\n'
+
+assert 'Markdown.awk
+============
+
+Supported Features / TODO:
+--------------------------
+- [x] done
+- [ ] todo
+- [-] not planned
+- ?  unsure (whether to implement)
+- [/] partial
+
+### Basic Markdown - Block elements: ###
+- [x] Paragraphs
+  - [x] Double space line breaks
+- [x] Proper block element nesting
+- [x] Headings
+- [x] ATX-Style Headings
+- [x] Blockquotes
+- [x] Lists (ordered, unordered)
+- [x] Code blocks (using indention)
+- [x] Horizontal rules
+- [x] Verbatim HTML block (disabled by default)
+
+### Basic Markdown - Inline elements: ###
+- [x] Links
+- [x] Reference style links
+- [x] Emphasis *em*/**strong** (*Asterisk*, _Underscore_)
+- [x] `code`, also ``code containing `backticks` ``
+- [x] Images / reference style images
+- [x] <automatic links>
+- [x] backslash escapes
+- [x] Verbatim HTML inline (disabled by default)
+- [x] HTML escaping
+
+NOTE: Set the environment variable `MD_HTML=true` to enable verbatim HTML
+
+### Extensions - Block elements: ###
+- [x] Automatic <section>-wrapping (custom)
+-  ?  Heading identifiers (php md, pandoc)
+  - [x] Heading attributes (custom)
+- [x] Automatic heading identifiers (custom)
+- [x] Fenced code blocks (php md, pandoc)
+  - [x] Fenced code attributes
+- [x] Images (as block elements, <figure>-wrapped) (custom)
+  - [x] reference style block images
+- [/] Tables
+  -  ?  Simple table (pandoc)
+  -  ?  Multiline table (pandoc)
+  - [x] Grid table (pandoc)
+    - [x] Headerless
+  - [x] Pipe table (php md, pandoc)
+- [x] Line blocks (pandoc)
+- [x] Task lists (pandoc, custom)
+- [x] Definition lists (php md, pandoc)
+- [-] Numbered example lists (pandoc)
+- [-] Metadata blocks (pandoc)
+- [x] Metadata blocks (custom)
+- [x] Fenced Divs (pandoc)
+
+### Extensions - Inline elements: ###
+- [x] Ignore embedded_underscores (php md, pandoc)
+- [x] ~~strikeout~~ (pandoc)
+- [x] ^Superscript^ ~Subscript~ (pandoc)
+- [-] Bracketed spans (pandoc)
+  - [-] Inline attributes (pandoc)
+- [x] Image attributes (custom, pandoc inspired, not for reference style)
+- [x] Wiki style links [[PageName]] / [[PageName|Link Text]]
+- [-] TEX-Math (pandoc)
+-  ?  Footnotes (php md)
+-  ?  Abbreviations (php md)
+-  ?  "Curly quotes" (smartypants)
+- [ ] em-dashes (--) (smartypants old)
+-  ?  ... three-dot ellipsis (smartypants)
+- [-] en-dash (smartypants)
+- [ ] Automatic em-dash / en-dash
+- [x] Automatic -> Arrows <- (custom)
+
+Compatibility
+-------------
+Markdown.awk can run in GNU awk (`gawk`) and in Busybox awk. It is _not_ fully POSIX compliant and does not run in `mawk` or `nawk`. In particular it makes heavy use of the `gensub()` function and its ability to use paranthesized subexpressions in the replacement text. This feature is not available in the POSIX specified `sub()` and `gsub()` functions. Hence it cannot be replaced without effort.
+
+Tests
+-----
+[Link with Title](https://en.wikipedia.org/wiki/Markdown "Markdown in Wikipedia"), *emphasis*, **strong**, **strong containing *emphasis***, `inline code`, `` code with `backticks` ``. See more tests [here](./tests/).' \
+'<section class="h1" id="1:Markdown.awk"><h1>Markdown.awk<a class="anchor" href="#1:Markdown.awk"></a></h1>
+<section class="h2" id="1.1:Supported%20Features%20/%20TODO:"><h2>Supported Features / TODO:<a class="anchor" href="#1.1:Supported%20Features%20/%20TODO:"></a></h2>
+<ul>
+<li class="task done"><input type=checkbox disabled checked> done</li>
+<li class="task pending"><input type=checkbox disabled> todo</li>
+<li class="task negative"><input type=checkbox disabled> not planned</li>
+<li>?  unsure (whether to implement)</li>
+<li class="task partial"><input type=checkbox disabled> partial</li>
+</ul>
+<section class="h3" id="1.1.1:Basic%20Markdown%20-%20Block%20elements:"><h3>Basic Markdown - Block elements:<a class="anchor" href="#1.1.1:Basic%20Markdown%20-%20Block%20elements:"></a></h3>
+<ul>
+<li class="task done"><input type=checkbox disabled checked><p> Paragraphs</p>
+<ul>
+<li class="task done"><input type=checkbox disabled checked> Double space line breaks</li>
+</ul>
+</li>
+<li class="task done"><input type=checkbox disabled checked> Proper block element nesting</li>
+<li class="task done"><input type=checkbox disabled checked> Headings</li>
+<li class="task done"><input type=checkbox disabled checked> ATX-Style Headings</li>
+<li class="task done"><input type=checkbox disabled checked> Blockquotes</li>
+<li class="task done"><input type=checkbox disabled checked> Lists (ordered, unordered)</li>
+<li class="task done"><input type=checkbox disabled checked> Code blocks (using indention)</li>
+<li class="task done"><input type=checkbox disabled checked> Horizontal rules</li>
+<li class="task done"><input type=checkbox disabled checked> Verbatim HTML block (disabled by default)</li>
+</ul>
+</section><section class="h3" id="1.1.2:Basic%20Markdown%20-%20Inline%20elements:"><h3>Basic Markdown - Inline elements:<a class="anchor" href="#1.1.2:Basic%20Markdown%20-%20Inline%20elements:"></a></h3>
+<ul>
+<li class="task done"><input type=checkbox disabled checked> Links</li>
+<li class="task done"><input type=checkbox disabled checked> Reference style links</li>
+<li class="task done"><input type=checkbox disabled checked> Emphasis <em>em</em>/<strong>strong</strong> (<em>Asterisk</em>, <em>Underscore</em>)</li>
+<li class="task done"><input type=checkbox disabled checked> <code>code</code>, also <code>code containing `backticks`</code></li>
+<li class="task done"><input type=checkbox disabled checked> Images / reference style images</li>
+<li class="task done"><input type=checkbox disabled checked> &lt;automatic links&gt;</li>
+<li class="task done"><input type=checkbox disabled checked> backslash escapes</li>
+<li class="task done"><input type=checkbox disabled checked> Verbatim HTML inline (disabled by default)</li>
+<li class="task done"><input type=checkbox disabled checked> HTML escaping</li>
+</ul>
+<p>NOTE: Set the environment variable <code>MD_HTML=true</code> to enable verbatim HTML</p>
+
+</section><section class="h3" id="1.1.3:Extensions%20-%20Block%20elements:"><h3>Extensions - Block elements:<a class="anchor" href="#1.1.3:Extensions%20-%20Block%20elements:"></a></h3>
+<ul>
+<li class="task done"><input type=checkbox disabled checked> Automatic &lt;section&gt;-wrapping (custom)</li>
+<li><p> ?  Heading identifiers (php md, pandoc)</p>
+<ul>
+<li class="task done"><input type=checkbox disabled checked> Heading attributes (custom)</li>
+</ul>
+</li>
+<li class="task done"><input type=checkbox disabled checked> Automatic heading identifiers (custom)</li>
+<li class="task done"><input type=checkbox disabled checked><p> Fenced code blocks (php md, pandoc)</p>
+<ul>
+<li class="task done"><input type=checkbox disabled checked> Fenced code attributes</li>
+</ul>
+</li>
+<li class="task done"><input type=checkbox disabled checked><p> Images (as block elements, &lt;figure&gt;-wrapped) (custom)</p>
+<ul>
+<li class="task done"><input type=checkbox disabled checked> reference style block images</li>
+</ul>
+</li>
+<li class="task partial"><input type=checkbox disabled><p> Tables</p>
+<ul>
+<li> ?  Simple table (pandoc)</li>
+<li> ?  Multiline table (pandoc)</li>
+<li class="task done"><input type=checkbox disabled checked><p> Grid table (pandoc)</p>
+<ul>
+<li class="task done"><input type=checkbox disabled checked> Headerless</li>
+</ul>
+</li>
+<li class="task done"><input type=checkbox disabled checked> Pipe table (php md, pandoc)</li>
+</ul>
+</li>
+<li class="task done"><input type=checkbox disabled checked> Line blocks (pandoc)</li>
+<li class="task done"><input type=checkbox disabled checked> Task lists (pandoc, custom)</li>
+<li class="task done"><input type=checkbox disabled checked> Definition lists (php md, pandoc)</li>
+<li class="task negative"><input type=checkbox disabled> Numbered example lists (pandoc)</li>
+<li class="task negative"><input type=checkbox disabled> Metadata blocks (pandoc)</li>
+<li class="task done"><input type=checkbox disabled checked> Metadata blocks (custom)</li>
+<li class="task done"><input type=checkbox disabled checked> Fenced Divs (pandoc)</li>
+</ul>
+</section><section class="h3" id="1.1.4:Extensions%20-%20Inline%20elements:"><h3>Extensions - Inline elements:<a class="anchor" href="#1.1.4:Extensions%20-%20Inline%20elements:"></a></h3>
+<ul>
+<li class="task done"><input type=checkbox disabled checked> Ignore embedded_underscores (php md, pandoc)</li>
+<li class="task done"><input type=checkbox disabled checked> <del>strikeout</del> (pandoc)</li>
+<li class="task done"><input type=checkbox disabled checked> <sup>Superscript</sup> <sub>Subscript</sub> (pandoc)</li>
+<li class="task negative"><input type=checkbox disabled><p> Bracketed spans (pandoc)</p>
+<ul>
+<li class="task negative"><input type=checkbox disabled> Inline attributes (pandoc)</li>
+</ul>
+</li>
+<li class="task done"><input type=checkbox disabled checked> Image attributes (custom, pandoc inspired, not for reference style)</li>
+<li class="task done"><input type=checkbox disabled checked> Wiki style links <a href="PageName">PageName</a> / <a href="PageName">Link Text</a></li>
+<li class="task negative"><input type=checkbox disabled> TEX-Math (pandoc)</li>
+<li> ?  Footnotes (php md)</li>
+<li> ?  Abbreviations (php md)</li>
+<li> ?  &quot;Curly quotes&quot; (smartypants)</li>
+<li class="task pending"><input type=checkbox disabled> em-dashes (--) (smartypants old)</li>
+<li> ?  ... three-dot ellipsis (smartypants)</li>
+<li class="task negative"><input type=checkbox disabled> en-dash (smartypants)</li>
+<li class="task pending"><input type=checkbox disabled> Automatic em-dash / en-dash</li>
+<li class="task done"><input type=checkbox disabled checked> Automatic &rarr; Arrows &larr; (custom)</li>
+</ul>
+</section></section><section class="h2" id="1.2:Compatibility"><h2>Compatibility<a class="anchor" href="#1.2:Compatibility"></a></h2>
+<p>Markdown.awk can run in GNU awk (<code>gawk</code>) and in Busybox awk. It is <em>not</em> fully POSIX compliant and does not run in <code>mawk</code> or <code>nawk</code>. In particular it makes heavy use of the <code>gensub()</code> function and its ability to use paranthesized subexpressions in the replacement text. This feature is not available in the POSIX specified <code>sub()</code> and <code>gsub()</code> functions. Hence it cannot be replaced without effort.</p>
+
+</section><section class="h2" id="1.3:Tests"><h2>Tests<a class="anchor" href="#1.3:Tests"></a></h2>
+<p><a href="https://en.wikipedia.org/wiki/Markdown" title="Markdown in Wikipedia">Link with Title</a>, <em>emphasis</em>, <strong>strong</strong>, <strong>strong containing <em>emphasis</em></strong>, <code>inline code</code>, <code>code with `backticks`</code>. See more tests <a href="./tests/">here</a>.</p>
+</section></section>' \
+'Full Page (cgilite markdown)'
+
+assert 'Headline First Order
+====================
+
+Headline Second Order
+---------------------
+
+    Code Block
+    with indentation
+
+> Blockquote
+> ----------
+> like in an email
+
+### Headline 3rd order
+
+- unordered List
+1. with sub points
+     
+   sometimes longer ones
+
+2. which are ordered
+3. [ ] and have a Todo item
+- more list points
+  - and a sublist
+- [x] some of which ae done
+
+----------
++ A lazy, lazy, list
+item.
+
++ Another one; this looks
+bad but is legal.
+
+    Second paragraph of second
+list item.
+
+---------
+
+~~~ {.blue}
+Fenced Code Block
+# with verbatim Text
+`and an attribute`
+~~~
+
+| The limerick packs laughs anatomical
+| In space that is quite economical.
+|    But the *good* ones I'\''ve seen
+|    So seldom are *clean*
+| And the clean ones so seldom are comical
+
+| The Right Honorable Most Venerable and Righteous Samuel L.
+  Constable, Jr.
+| 200 Main St.
+| Berkeley, CA 94718
+
+Term 1
+
+:   This is a definition with two paragraphs. Lorem ipsum 
+    dolor sit amet, consectetuer adipiscing elit. Aliquam 
+    hendrerit mi posuere lectus.
+
+    Vestibulum enim wisi, viverra nec, fringilla in, laoreet
+    vitae, risus.
+
+:   Second definition for term 1, also wrapped in a paragraph
+    because of the blank line preceding it.
+
+Term 2
+
+:   This definition has a code block, a blockquote and a list.
+
+        code block.
+
+    > block quote
+    > on two lines.
+
+    1.  first list item
+    2.  second list item' \
+'<section class="h1" id="1:Headline%20First%20Order"><h1>Headline First Order<a class="anchor" href="#1:Headline%20First%20Order"></a></h1>
+<section class="h2" id="1.1:Headline%20Second%20Order"><h2>Headline Second Order<a class="anchor" href="#1.1:Headline%20Second%20Order"></a></h2>
+<pre><code>Code Block
+with indentation</code></pre>
+<blockquote><section class="h2" id="1/0.1:Blockquote"><h2>Blockquote<a class="anchor" href="#1/0.1:Blockquote"></a></h2>
+<p>like in an email</p>
+</section></blockquote>
+
+<section class="h3" id="1.1.1:Headline%203rd%20order"><h3>Headline 3rd order<a class="anchor" href="#1.1.1:Headline%203rd%20order"></a></h3>
+<ul>
+<li>unordered List</li>
+</ul>
+<ol>
+<li><p>with sub points</p>
+
+<p>sometimes longer ones</p>
+</li>
+<li><p>which are ordered</p>
+</li>
+<li class="task pending"><input type=checkbox disabled><p> and have a Todo item</p>
+</li>
+</ol>
+<ul>
+<li><p>more list points</p>
+<ul>
+<li>and a sublist</li>
+</ul>
+</li>
+<li class="task done"><input type=checkbox disabled checked> some of which ae done</li>
+</ul>
+<hr>
+<ul>
+<li><p>A lazy, lazy, list
+item.</p>
+</li>
+<li><p>Another one; this looks
+bad but is legal.</p>
+
+<p>  Second paragraph of second
+list item.</p>
+</li>
+</ul>
+<hr>
+
+<pre><code class="blue">Fenced Code Block
+# with verbatim Text
+`and an attribute`</code></pre>
+<div class="line-block">The limerick packs laughs anatomical<br>
+In space that is quite economical.<br>
+   But the <em>good</em> ones I&#x27;ve seen<br>
+   So seldom are <em>clean</em><br>
+And the clean ones so seldom are comical</div>
+<div class="line-block">The Right Honorable Most Venerable and Righteous Samuel L. Constable, Jr.<br>
+200 Main St.<br>
+Berkeley, CA 94718</div>
+<dl>
+<dt>Term 1</dt>
+<dd><p>This is a definition with two paragraphs. Lorem ipsum 
+dolor sit amet, consectetuer adipiscing elit. Aliquam 
+hendrerit mi posuere lectus.</p>
+
+<p>Vestibulum enim wisi, viverra nec, fringilla in, laoreet
+vitae, risus.</p>
+</dd>
+<dd>Second definition for term 1, also wrapped in a paragraph
+because of the blank line preceding it.</dd>
+<dt>Term 2</dt>
+<dd><p>This definition has a code block, a blockquote and a list.</p>
+
+<pre><code>code block.</code></pre>
+<blockquote><p>block quote
+on two lines.</p></blockquote>
+
+<ol>
+<li> first list item</li>
+<li> second list item</li>
+</ol>
+</dd>
+</dl>
+</section></section></section>' \
+'Full Page (MD Tests)'
+
+assert '%css shellwiki.css
+
+Shellwiki
+=========
+Shellwiki is a Wiki and Content Management System with minimal dependencies. It can run on embedded devices, as well as full size web servers.  Its goals are:
+
+  - **easy deployment**
+
+    *ShellWiki* can run on any Unix-Like web server. It requires no
+    scripting languages beyound the regular (Bourne style) Unix
+    shell, `awk`, and `sed`, all of which can be providede by
+    `busybox`. It can be launched via `netcat`, `inetd`, `systemd`,
+    or any cgi capable webserver like `apache` or `lighttpd`.  
+    *ShellWiki* can run easily on embedded systems like OpenWRT or
+    RaspberryPi, and just as easily on internet web servers
+    providing multisite setups.
+
+  - **accessibility**
+
+    *ShellWiki* requires no browserside scripting. It aims to be rendered
+    in all web browsers including `w3m` and `links` besides graphical
+    browsers like `chromium` or `firefox`. It is as accessible on mobile
+    screens as on desktop computers.  
+    *ShellWiki* uses the well known `markdown` syntax for formatting and
+    aims to provide consistent UI controls for various use cases.
+
+  - **adaptability**
+
+    *ShellWiki* is extensible through plugins and provides theming and
+    styling capabilities that make it suitable not only as a wiki, but
+    also as a CMS, including access scopes for different authors and
+    stylisticly distinct subpages.
+
+  - **simplicity**
+
+    *ShellWiki* avoids complexity in both software design and user
+    interface. It aims to be secure and predictable. Extensions can
+    be written and modified by system administrators.  
+
+<<toc 2 2>>
+
+Features
+--------
+  - **Markdown Wiki Syntax**
+
+    The wiki syntax is based on [John Grubers Markdown](https://daringfireball.net/projects/markdown/)
+    with extensions inspired by [Pandoc](https://pandoc.org/MANUAL.html#pandocs-markdown),
+    [PHP Markdown Extra](https://michelf.ca/projects/php-markdown/extra/), and
+    [Github Flavored Markdown](https://github.github.com/gfm/).
+    Additional Macros are provided to enable functions like an automatic table of contents, listing of
+    sub pages, etc.
+
+    See [Markdown](/software/cgilite/markdown/)  
+    and [Macros](macros/)
+
+  - **Plain file Storage**
+
+    Pages and attachments are stored as plain files on disk. There is no need for a separate database
+    system.
+
+  - **Git revisioning**
+
+    If `git` is available on the web server, pages can be revisioned so that past versions can be
+    revisited. Optionally attachments can be revisioned too. Server administrators may use the git
+    archives to synchronise sites across servers by adding their own mechanics.
+
+  - **Multisite Installation**
+
+    Code and data directories are stricly separate on the server. Directory pathes are obtained from
+    environment variables, so that multiple sites can be served from the same installation directory.
+
+    See also: [Installation](installation/)
+
+  - **Semantic HTML5**
+
+    for accessible rendering of pages
+
+  - **Descriptive Page Names**
+
+    URLs of pages can be freely provided by the user. User access can be constrained to specific sub
+    pages. Within their access permissions users can move and rename pages as they like.
+
+  - **File Upload / Attachment**
+
+    While pages are merely text documents themselves, users can upload additional attachments and
+    link to them in a page. Images and media files can be embedded directly into a page.
+
+  - **Image scaling**
+
+    If `ImageMagick` is available on the web server, huge attachment images are automatically compressed
+    and scaled to HD resolution when referred to in a page.
+    Of course the original version can still be linked to.
+
+  - **Permissions via ACL**
+
+    Grant read/write access for pages and sub-pages
+
+  - **User provided CSS**
+
+    Aside from full theming in the installation directory, pages can be styled using CSS files
+    uploaded as attachments.
+
+  - **No reliance on Javascript**
+
+    Authors and visitors can use the site without being forced to run untrusted code.
+    The main theme still provides collapsible menus and a responsive layout.
+
+  - **Complete GDPR Compliance** without consent walls
+
+    Because shellwiki does not track page visitors and does not
+    serve cookies to visitors by default it does not need to coerce
+    visitors into handling GDPR "consent" forms.
+
+    (Login for authors still requires a session cookie)
+
+  - **True multilanguage capability**
+
+    - Pages can be translated
+    - Switching language does not require a cookie
+    - Fallback language for missing translations
+    - Users stay on a translated version, even if single page translations are missing
+
+  - **Full text indexing and search**
+
+    Shellwiki contains its own basic text indexer without external dependencies.
+
+  - **Extensibility** through
+
+    - [Themes](themes/)
+    - [Macros](macros/)
+    - [URL Handlers](handlers/)
+    - [Custom Syntax parsers](parsers/)
+
+Dependencies
+------------
+Shellwiki is based on [cgilite](/software/cgilite/), which is included in the installation. It is written in posix compliant shell script, and the markdown renderer is written in ~~posix compliant~~ AWK. The entire wiki system can run with nothing more than a busybox. In fact it can be served from the rescue shell in a Debian initrd, or from an OpenWRT router.
+
+**Its precise requirements are:**
+
+ - A Posix Shell (as provided by busybox, but bash is OK)
+ - An AWK interpreter (as provided by busybox, but GNU AWK is OK)
+   - `mawk` and `nawk` will currently not work
+ - inetd (as provided by busybox)
+
+   **or** any CGI-Capable web server
+
+ - _Optional:_ GIT for revisioning
+ - _Optional:_ ImageMagick for image compression
+ - _Optional:_ Sendmail for sending password reminders, etc.
+
+Installation
+------------
+Also see -> [[installation/]]
+
+You can try out shellwiki right now using busybox:
+
+    ~$ git clone https://git.plutz.net/git/shellwiki ~/shellwiki
+    ~$ _DATA=~/wikidata busybox nc -llp 1080 -e ~/shellwiki/index.cgi
+
+For additional examples, regarding permanent installation and configuration in webservers see [[installation/]].
+
+Syntax
+------
+The wiki syntax is based on John Grubers [Markdown](https://daringfireball.net/projects/markdown/) with extensions borrowed from [Pandoc](https://pandoc.org/MANUAL%202.html#pandocs-markdown) and [PHP Markdown Extra](https://michelf.ca/projects/php-markdown/extra/). The Markdown parser is provided by [Cgilite](/software/cgilite/) and its full documentation can be looked at [here](/software/cgilite/markdown/).
+
+<<include --nolink /[wiki]/editorhelp/>>
+
+Macros
+------
+Also see -> [[macros/]]
+
+In addition to the Markdown syntax, wiki pages can include Macros, which perform additional functions on a page, like generating an image gallery, including parts of other pages, etc. Macros make Shellwiki truly dynamic and flexible.
+
+For example you can include a table of content for the current page by including the line
+
+    <<toc>>
+
+in your page. Macros can receive additional parameters, which modify their behaviour.
+
+Macros are the most easy to write type of extension. See [Macros](macros/) for a full list of available macros.
+
+Themes
+------
+Also see -> [[theming/]]
+
+While Shellwiki supports plugins for [theming](dev-theming/), it'\''s apearance can mostly be configured by the user. Pages can be configured to use custom CSS files. In addition page headers and footers are themselves wiki pages which can be modified to add menus, custom logos, links, etc. The same goes for error pages.
+
+For an example, see the [technical pages](/[wiki]/) for this wiki.
+
+Multiple Languages
+------------------
+To enable a multilingual setup you must set a default language in your configuration environment:
+
+```
+export LANGUAGE_DEFAULT=en
+```
+
+Once this is the case, pagenames starting with a colon (`:`) will be considered translated versions of their parent pages. I.e. the pages `/`, `/:de`, and `/:fr` will serve as the default, german, and french home page respectively.
+
+The names of the languages can be arbitrary, but I recommend using [ISO-639](https://en.wikipedia.org/wiki/ISO_639-1) codes, because the code is used in the `lang=""` attribute of the pages top level html element. You can however make up non-standardised or fantastic language names as well.
+
+Links on each page will automatically be suffixed with the same language tag, so a visitor keeps browsing the same language without needing a cookie. Attachments should only be uploaded to the default language page, and attachment links in the translated pages will correctly point to the main page attachments. You can create a language menu on the header page, simply by linking to `./:en`, `./:es` , `./:fr`, etc.
+
+Header, footer, and error pages will be included from their respective language version, as will all macro includes, etc. Should a page not exist in a given language, the default page will be displayed instead. However, included elements will still be taken from the respective language version, possibly mixing languages between the selected user language and the default.
+
+### Constraints of the current implementation
+ - There can be only one default language, with no priority of different fallback languages
+ - Page URLs can currently not be translated. Doing so would require a model for manually assigning translated page names and would not be trivial to use.
+
+Developer Documentation
+-----------------------
+How to write:
+
+ - [Themes](dev-theming/)
+ - [Macros](dev-macros/)
+ - [Handlers](dev-handlers/)
+ - [Parsers](dev-parsers/)' \
+'<section class="h1" id="1:Shellwiki"><h1>Shellwiki<a class="anchor" href="#1:Shellwiki"></a></h1>
+<p>Shellwiki is a Wiki and Content Management System with minimal dependencies. It can run on embedded devices, as well as full size web servers.  Its goals are:</p>
+<ul>
+<li><p><strong>easy deployment</strong></p>
+
+<p><em>ShellWiki</em> can run on any Unix-Like web server. It requires no
+scripting languages beyound the regular (Bourne style) Unix
+shell, <code>awk</code>, and <code>sed</code>, all of which can be providede by
+<code>busybox</code>. It can be launched via <code>netcat</code>, <code>inetd</code>, <code>systemd</code>,
+or any cgi capable webserver like <code>apache</code> or <code>lighttpd</code>.<br>
+<em>ShellWiki</em> can run easily on embedded systems like OpenWRT or
+RaspberryPi, and just as easily on internet web servers
+providing multisite setups.</p>
+</li>
+<li><p><strong>accessibility</strong></p>
+
+<p><em>ShellWiki</em> requires no browserside scripting. It aims to be rendered
+in all web browsers including <code>w3m</code> and <code>links</code> besides graphical
+browsers like <code>chromium</code> or <code>firefox</code>. It is as accessible on mobile
+screens as on desktop computers.<br>
+<em>ShellWiki</em> uses the well known <code>markdown</code> syntax for formatting and
+aims to provide consistent UI controls for various use cases.</p>
+</li>
+<li><p><strong>adaptability</strong></p>
+
+<p><em>ShellWiki</em> is extensible through plugins and provides theming and
+styling capabilities that make it suitable not only as a wiki, but
+also as a CMS, including access scopes for different authors and
+stylisticly distinct subpages.</p>
+</li>
+<li><p><strong>simplicity</strong></p>
+
+<p><em>ShellWiki</em> avoids complexity in both software design and user
+interface. It aims to be secure and predictable. Extensions can
+be written and modified by system administrators.  </p>
+</li>
+</ul>
+<code class="macro">toc 2 2</code><section class="h2" id="1.1:Features"><h2>Features<a class="anchor" href="#1.1:Features"></a></h2>
+<ul>
+<li><p><strong>Markdown Wiki Syntax</strong></p>
+
+<p>The wiki syntax is based on <a href="https://daringfireball.net/projects/markdown/">John Grubers Markdown</a>
+with extensions inspired by <a href="https://pandoc.org/MANUAL.html#pandocs-markdown">Pandoc</a>,
+<a href="https://michelf.ca/projects/php-markdown/extra/">PHP Markdown Extra</a>, and
+<a href="https://github.github.com/gfm/">Github Flavored Markdown</a>.
+Additional Macros are provided to enable functions like an automatic table of contents, listing of
+sub pages, etc.</p>
+
+<p>See <a href="/software/cgilite/markdown/">Markdown</a><br>
+and <a href="macros/">Macros</a></p>
+</li>
+<li><p><strong>Plain file Storage</strong></p>
+
+<p>Pages and attachments are stored as plain files on disk. There is no need for a separate database
+system.</p>
+</li>
+<li><p><strong>Git revisioning</strong></p>
+
+<p>If <code>git</code> is available on the web server, pages can be revisioned so that past versions can be
+revisited. Optionally attachments can be revisioned too. Server administrators may use the git
+archives to synchronise sites across servers by adding their own mechanics.</p>
+</li>
+<li><p><strong>Multisite Installation</strong></p>
+
+<p>Code and data directories are stricly separate on the server. Directory pathes are obtained from
+environment variables, so that multiple sites can be served from the same installation directory.</p>
+
+<p>See also: <a href="installation/">Installation</a></p>
+</li>
+<li><p><strong>Semantic HTML5</strong></p>
+
+<p>for accessible rendering of pages</p>
+</li>
+<li><p><strong>Descriptive Page Names</strong></p>
+
+<p>URLs of pages can be freely provided by the user. User access can be constrained to specific sub
+pages. Within their access permissions users can move and rename pages as they like.</p>
+</li>
+<li><p><strong>File Upload / Attachment</strong></p>
+
+<p>While pages are merely text documents themselves, users can upload additional attachments and
+link to them in a page. Images and media files can be embedded directly into a page.</p>
+</li>
+<li><p><strong>Image scaling</strong></p>
+
+<p>If <code>ImageMagick</code> is available on the web server, huge attachment images are automatically compressed
+and scaled to HD resolution when referred to in a page.
+Of course the original version can still be linked to.</p>
+</li>
+<li><p><strong>Permissions via ACL</strong></p>
+
+<p>Grant read/write access for pages and sub-pages</p>
+</li>
+<li><p><strong>User provided CSS</strong></p>
+
+<p>Aside from full theming in the installation directory, pages can be styled using CSS files
+uploaded as attachments.</p>
+</li>
+<li><p><strong>No reliance on Javascript</strong></p>
+
+<p>Authors and visitors can use the site without being forced to run untrusted code.
+The main theme still provides collapsible menus and a responsive layout.</p>
+</li>
+<li><p><strong>Complete GDPR Compliance</strong> without consent walls</p>
+
+<p>Because shellwiki does not track page visitors and does not
+serve cookies to visitors by default it does not need to coerce
+visitors into handling GDPR &quot;consent&quot; forms.</p>
+
+<p>(Login for authors still requires a session cookie)</p>
+</li>
+<li><p><strong>True multilanguage capability</strong></p>
+<ul>
+<li>Pages can be translated</li>
+<li>Switching language does not require a cookie</li>
+<li>Fallback language for missing translations</li>
+<li>Users stay on a translated version, even if single page translations are missing</li>
+</ul>
+</li>
+<li><p><strong>Full text indexing and search</strong></p>
+
+<p>Shellwiki contains its own basic text indexer without external dependencies.</p>
+</li>
+<li><p><strong>Extensibility</strong> through</p>
+<ul>
+<li><a href="themes/">Themes</a></li>
+<li><a href="macros/">Macros</a></li>
+<li><a href="handlers/">URL Handlers</a></li>
+<li><a href="parsers/">Custom Syntax parsers</a></li>
+</ul>
+</li>
+</ul>
+</section><section class="h2" id="1.2:Dependencies"><h2>Dependencies<a class="anchor" href="#1.2:Dependencies"></a></h2>
+<p>Shellwiki is based on <a href="/software/cgilite/">cgilite</a>, which is included in the installation. It is written in posix compliant shell script, and the markdown renderer is written in <del>posix compliant</del> AWK. The entire wiki system can run with nothing more than a busybox. In fact it can be served from the rescue shell in a Debian initrd, or from an OpenWRT router.</p>
+
+<p><strong>Its precise requirements are:</strong></p>
+<ul>
+<li><p>A Posix Shell (as provided by busybox, but bash is OK)</p>
+</li>
+<li><p>An AWK interpreter (as provided by busybox, but GNU AWK is OK)</p>
+<ul>
+<li><code>mawk</code> and <code>nawk</code> will currently not work</li>
+</ul>
+</li>
+<li><p>inetd (as provided by busybox)</p>
+
+<p><strong>or</strong> any CGI-Capable web server</p>
+</li>
+<li><p><em>Optional:</em> GIT for revisioning</p>
+</li>
+<li><p><em>Optional:</em> ImageMagick for image compression</p>
+</li>
+<li><p><em>Optional:</em> Sendmail for sending password reminders, etc.</p>
+</li>
+</ul>
+</section><section class="h2" id="1.3:Installation"><h2>Installation<a class="anchor" href="#1.3:Installation"></a></h2>
+<p>Also see &rarr; <a href="installation/">installation/</a></p>
+
+<p>You can try out shellwiki right now using busybox:</p>
+
+<pre><code>~$ git clone https://git.plutz.net/git/shellwiki ~/shellwiki
+~$ _DATA=~/wikidata busybox nc -llp 1080 -e ~/shellwiki/index.cgi</code></pre>
+<p>For additional examples, regarding permanent installation and configuration in webservers see <a href="installation/">installation/</a>.</p>
+
+</section><section class="h2" id="1.4:Syntax"><h2>Syntax<a class="anchor" href="#1.4:Syntax"></a></h2>
+<p>The wiki syntax is based on John Grubers <a href="https://daringfireball.net/projects/markdown/">Markdown</a> with extensions borrowed from <a href="https://pandoc.org/MANUAL%202.html#pandocs-markdown">Pandoc</a> and <a href="https://michelf.ca/projects/php-markdown/extra/">PHP Markdown Extra</a>. The Markdown parser is provided by <a href="/software/cgilite/">Cgilite</a> and its full documentation can be looked at <a href="/software/cgilite/markdown/">here</a>.</p>
+
+<code class="macro">include --nolink /[wiki]/editorhelp/</code></section><section class="h2" id="1.5:Macros"><h2>Macros<a class="anchor" href="#1.5:Macros"></a></h2>
+<p>Also see &rarr; <a href="macros/">macros/</a></p>
+
+<p>In addition to the Markdown syntax, wiki pages can include Macros, which perform additional functions on a page, like generating an image gallery, including parts of other pages, etc. Macros make Shellwiki truly dynamic and flexible.</p>
+
+<p>For example you can include a table of content for the current page by including the line</p>
+
+<pre><code>&lt;&lt;toc&gt;&gt;</code></pre>
+<p>in your page. Macros can receive additional parameters, which modify their behaviour.</p>
+
+<p>Macros are the most easy to write type of extension. See <a href="macros/">Macros</a> for a full list of available macros.</p>
+
+</section><section class="h2" id="1.6:Themes"><h2>Themes<a class="anchor" href="#1.6:Themes"></a></h2>
+<p>Also see &rarr; <a href="theming/">theming/</a></p>
+
+<p>While Shellwiki supports plugins for <a href="dev-theming/">theming</a>, it&#x27;s apearance can mostly be configured by the user. Pages can be configured to use custom CSS files. In addition page headers and footers are themselves wiki pages which can be modified to add menus, custom logos, links, etc. The same goes for error pages.</p>
+
+<p>For an example, see the <a href="/[wiki]/">technical pages</a> for this wiki.</p>
+
+</section><section class="h2" id="1.7:Multiple%20Languages"><h2>Multiple Languages<a class="anchor" href="#1.7:Multiple%20Languages"></a></h2>
+<p>To enable a multilingual setup you must set a default language in your configuration environment:</p>
+
+<pre><code>export LANGUAGE_DEFAULT=en</code></pre>
+<p>Once this is the case, pagenames starting with a colon (<code>:</code>) will be considered translated versions of their parent pages. I.e. the pages <code>/</code>, <code>/:de</code>, and <code>/:fr</code> will serve as the default, german, and french home page respectively.</p>
+
+<p>The names of the languages can be arbitrary, but I recommend using <a href="https://en.wikipedia.org/wiki/ISO_639-1">ISO-639</a> codes, because the code is used in the <code>lang=""</code> attribute of the pages top level html element. You can however make up non-standardised or fantastic language names as well.</p>
+
+<p>Links on each page will automatically be suffixed with the same language tag, so a visitor keeps browsing the same language without needing a cookie. Attachments should only be uploaded to the default language page, and attachment links in the translated pages will correctly point to the main page attachments. You can create a language menu on the header page, simply by linking to <code>./:en</code>, <code>./:es</code> , <code>./:fr</code>, etc.</p>
+
+<p>Header, footer, and error pages will be included from their respective language version, as will all macro includes, etc. Should a page not exist in a given language, the default page will be displayed instead. However, included elements will still be taken from the respective language version, possibly mixing languages between the selected user language and the default.</p>
+
+<section class="h3" id="1.7.1:Constraints%20of%20the%20current%20implementation"><h3>Constraints of the current implementation<a class="anchor" href="#1.7.1:Constraints%20of%20the%20current%20implementation"></a></h3>
+<ul>
+<li>There can be only one default language, with no priority of different fallback languages</li>
+<li>Page URLs can currently not be translated. Doing so would require a model for manually assigning translated page names and would not be trivial to use.</li>
+</ul>
+</section></section><section class="h2" id="1.8:Developer%20Documentation"><h2>Developer Documentation<a class="anchor" href="#1.8:Developer%20Documentation"></a></h2>
+<p>How to write:</p>
+<ul>
+<li><a href="dev-theming/">Themes</a></li>
+<li><a href="dev-macros/">Macros</a></li>
+<li><a href="dev-handlers/">Handlers</a></li>
+<li><a href="dev-parsers/">Parsers</a></li>
+</ul>
+</section></section>' \
+'Full Page (ShellWiki)'
+
 printf '\nAll tests passed!\n'
-- 
2.39.5


From c46fa28d9b531c7fe1ffa5d1bbce39a4b71f6597 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Paul=20H=C3=A4nsch?= <paul@plutz.net>
Date: Fri, 30 Aug 2024 14:38:22 +0200
Subject: [PATCH 06/16] avoid some recursion vor better compatibility and
 performance

---
 markdown.awk | 458 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 250 insertions(+), 208 deletions(-)

diff --git a/markdown.awk b/markdown.awk
index 26fc11b..d1c8b5b 100755
--- a/markdown.awk
+++ b/markdown.awk
@@ -121,217 +121,259 @@ function URL ( text, sharp ) {
   return text;
 }
 
-function inline( line, LOCAL, len, text, code, href, guard ) {
-  if ( line ~ /^$/ ) {  # Recursion End
-    return "";
-
-  # omit processing of escaped characters
-  } else if ( line ~ /^\\./) {
-    return HTML(substr(line, 2, 1)) inline( substr(line, 3) );
-
-  # hard brakes
-  } else if ( match(line, /^  \n/) ) {
-    return "<br>\n" inline( substr(line, RLENGTH + 1) );
-
-  #  ``code spans``
-  } else if ( match( line, /^`+/) ) {
-    len = RLENGTH
-    guard = substr( line, 1, len )
-    if ( match(line, guard ".*" guard) ) {
-      code = substr( line, len + 1, match( substr(line, len + 1), guard ) - 1)
-      len = 2 * length(guard) + length(code)
-      #  strip single surrounding white spaces
-      gsub( /^ | $/, "", code)
-      #  escape HTML within code span
-      gsub( /&/, "\\&amp;", code ); gsub( /</, "\\&lt;", code ); gsub( />/, "\\&gt;", code );
-      return "<code>" code "</code>" inline( substr( line, len + 1 ) )
-    }
-
-  # Macros
-  } else if ( match( line, /^<<([^>]|>[^>])+>>/ ) ) {
-    len = RLENGTH;
-    return "<code class=\"macro\">" HTML( substr( line, 3, len - 4 ) ) "</code>" inline(substr(line, len + 1));
-
-  # Wiki style links
-  } else if ( match( line, /^\[\[([^]|]+)(\|[^]]+)?\]\]/) ) {
-    len = RLENGTH; href = text = substr(line, 1, len);
-    sub(/^\[\[/, "", href); sub(/(\|([^]]+))?\]\].*$/, "", href);
-    sub(/^\[\[([^]|]+)/, "", text); sub(/\]\].*$/, "", text); sub(/^\|/, "", text);
-    # sub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\1", href );
-    # sub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\3", text );
-    if ( ! text ) text = href;
-    return "<a href=\"" HTML(href) "\">" HTML(text) "</a>" inline( substr( line, len + 1) );
-
-  #  quick links ("automatic links" in md doc)
-  } else if ( match( line, /^<[a-zA-Z]+:\/\/([-\.[:alnum:]]+)(:[0-9]*)?(\/[^>]*)?>/ ) ) {
-    len = RLENGTH;
-    href = HTML( substr( line, 2, len - 2) );
-    return "<a href=\"" href "\">" href "</a>" inline( substr( line, len + 1) );
-
-  # quick link email
-  } else if ( match( line, /^<[a-zA-Z0-9.!#$%&'\''*+\/=?^_`{|}~-]+@[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>/ ) ) {
-    len = RLENGTH;
-    href = HTML( substr( line, 2, len - 2) );
-    return "<a href=\"mailto:" href "\">" href "</a>" inline( substr( line, len + 1) );
-
-  # Verbatim inline HTML
-  } else if ( AllowHTML && match( line, /^(<!--([^-]|-[^-]|--[^>])*-->|<\?([^\?]|\?[^>])*\?>|<![A-Z][^>]*>|<!\[CDATA\[([^\]]|\][^\]]|\]\][^>])*\]\]>|<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)/) ) {
-    len = RLENGTH;
-    return substr( line, 1, len) inline(substr(line, len + 1));
-
-  # inline links
-  } else if ( match(line, "^" lii "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)") ) {
-    len = RLENGTH;
-    text = href = title = substr( line, 1, len);
-    sub("^\\[", "", text); sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)$", "", text);
-    sub("^" lii "\\([\n\t ]*", "", href); sub("([\n\t ]+" lit ")?[\n\t ]*\\)$", "", href);
-    sub("^" lii "\\([\n\t ]*" lid, "", title); sub("[\n\t ]*\\)$", "", title); sub("^[\n\t ]+", "", title);
-
-    if ( match(href, /^<.*>$/) ) { sub(/^</, "", href); sub(/>$/, "", href); }
-         if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); }
-    else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); }
-    else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); }
-
-    gsub(/\\/, "", href); gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title);
-
-    return "<a href=\"" HTML(href) "\"" (title?" title=\"" HTML(title) "\"":"") ">" \
-           inline( text ) "</a>" inline( substr( line, len + 1) );
-
-  # reference style links
-  } else if ( match(line, /^\[([^]]+)\] ?\[([^]]*)\]/ ) ) {
-    len = RLENGTH; text = id = substr(line, 1, len);
-    sub(/\n.*$/, "", text); sub(/^\[/, "", text); sub(/\] ?\[([^\n]*)\].*$/, "", text);
-    sub(/\n.*$/, "",   id); sub(/^\[([^]]+)\] ?\[/, "",   id); sub(/\].*$/, "",   id);
-    # text = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, text );
-    # id = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1,   id );
-    if ( ! id ) id = text;
-    if ( rl_href[id] && rl_title[id] ) {
-      return "<a href=\"" HTML(rl_href[id]) "\" title=\"" HTML(rl_title[id]) "\">" inline(text) "</a>" inline( substr( line, len + 1) );
-    } else if ( rl_href[id] ) {
-      return "<a href=\"" HTML(rl_href[id]) "\">" inline(text) "</a>" inline( substr( line, len + 1) );
-    } else {
-      return "" HTML(substr(line, 1, len)) inline( substr(line, len + 1) );
-    }
-
-  # inline images
-  } else if ( match(line, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?") ) {
-    len = RLENGTH; text = href = title = attrib = substr( line, 1, len);
-
-    sub("^!\\[", "", text);
-    sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", text);
-
-    sub("^!" lix "\\([\n\t ]*", "", href);
-    sub("([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", href);
-
-    sub("^!" lix "\\([\n\t ]*" lid, "", title);
-    sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", title);
-    sub("^[\n\t ]+", "", title);
+function inline( line, LOCAL, len, text, code, href, guard, ret ) {
+  ret = "";
+  while (line !~ /^$/) {
+    # omit processing of escaped characters
+    if ( line ~ /^\\./) {
+      ret = ret HTML(substr(line, 2, 1)); line = substr(line, 3);
+      continue;
+
+    # hard brakes
+    } else if ( match(line, /^  \n/) ) {
+      ret = ret "<br>\n"; line = substr(line, RLENGTH + 1);
+      continue;
+
+    #  ``code spans``
+    } else if ( match( line, /^`+/) ) {
+      len = RLENGTH
+      guard = substr( line, 1, len )
+      if ( match(line, guard ".*" guard) ) {
+        code = substr( line, len + 1, match( substr(line, len + 1), guard ) - 1)
+        len = 2 * length(guard) + length(code)
+        #  strip single surrounding white spaces
+        gsub( /^ | $/, "", code)
+        #  escape HTML within code span
+        gsub( /&/, "\\&amp;", code ); gsub( /</, "\\&lt;", code ); gsub( />/, "\\&gt;", code );
+        ret = ret "<code>" code "</code>"; line = substr( line, len + 1 );
+        continue;
+      }
 
-    sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib);
-    sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib);
+    # Macros
+    } else if ( match( line, /^<<([^>]|>[^>])+>>/ ) ) {
+      len = RLENGTH;
+      ret = ret "<code class=\"macro\">" HTML( substr( line, 3, len - 4 ) ) "</code>"; line = substr(line, len + 1);
+      continue;
+
+    # Wiki style links
+    } else if ( match( line, /^\[\[([^]|]+)(\|[^]]+)?\]\]/) ) {
+      len = RLENGTH; href = text = substr(line, 1, len);
+      sub(/^\[\[/, "", href); sub(/(\|([^]]+))?\]\].*$/, "", href);
+      sub(/^\[\[([^]|]+)/, "", text); sub(/\]\].*$/, "", text); sub(/^\|/, "", text);
+      # sub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\1", href );
+      # sub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\3", text );
+      if ( ! text ) text = href;
+      ret = ret "<a href=\"" HTML(href) "\">" HTML(text) "</a>"; line = substr( line, len + 1);
+      continue;
+
+    #  quick links ("automatic links" in md doc)
+    } else if ( match( line, /^<[a-zA-Z]+:\/\/([-\.[:alnum:]]+)(:[0-9]*)?(\/[^>]*)?>/ ) ) {
+      len = RLENGTH;
+      href = HTML( substr( line, 2, len - 2) );
+      ret = ret "<a href=\"" href "\">" href "</a>"; line = substr( line, len + 1);
+      continue;
+
+    # quick link email
+    # } else if ( match( line, /^<[a-zA-Z0-9.!#$%&'\''*+\/=?^_`{|}~-]+@[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>/ ) ) {
+    } else if ( match( line, /^<[a-zA-Z0-9.!#$%&'\''*+\/=?^_`{|}~-]+@[a-zA-Z0-9]([a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9])?)*>/ ) ) {
+      len = RLENGTH;
+      href = HTML( substr( line, 2, len - 2) );
+      ret = ret "<a href=\"mailto:" href "\">" href "</a>"; line = substr( line, len + 1);
+      continue;
+
+    # Verbatim inline HTML
+    } else if ( AllowHTML && match( line, /^(<!--([^-]|-[^-]|--[^>])*-->|<\?([^\?]|\?[^>])*\?>|<![A-Z][^>]*>|<!\[CDATA\[([^\]]|\][^\]]|\]\][^>])*\]\]>|<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)/) ) {
+      len = RLENGTH;
+      ret = ret substr( line, 1, len); line =substr(line, len + 1);
+      continue;
+
+    # inline links
+    } else if ( match(line, "^" lii "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)") ) {
+      len = RLENGTH;
+      text = href = title = substr( line, 1, len);
+      sub("^\\[", "", text); sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)$", "", text);
+      sub("^" lii "\\([\n\t ]*", "", href); sub("([\n\t ]+" lit ")?[\n\t ]*\\)$", "", href);
+      sub("^" lii "\\([\n\t ]*" lid, "", title); sub("[\n\t ]*\\)$", "", title); sub("^[\n\t ]+", "", title);
+
+      if ( match(href, /^<.*>$/) ) { sub(/^</, "", href); sub(/>$/, "", href); }
+           if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); }
+      else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); }
+      else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); }
+
+      gsub(/\\/, "", href); gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title);
+
+      ret = ret "<a href=\"" HTML(href) "\"" (title?" title=\"" HTML(title) "\"":"") ">" \
+             inline( text ) "</a>";
+      line = substr( line, len + 1);
+      continue;
+
+    # reference style links
+    } else if ( match(line, /^\[([^]]+)\] ?\[([^]]*)\]/ ) ) {
+      len = RLENGTH; text = id = substr(line, 1, len);
+      sub(/\n.*$/, "", text); sub(/^\[/, "", text); sub(/\] ?\[([^\n]*)\].*$/, "", text);
+      sub(/\n.*$/, "",   id); sub(/^\[([^]]+)\] ?\[/, "",   id); sub(/\].*$/, "",   id);
+      # text = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, text );
+      # id = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1,   id );
+      if ( ! id ) id = text;
+
+      if ( rl_href[id] && rl_title[id] ) {
+        ret = ret "<a href=\"" HTML(rl_href[id]) "\" title=\"" HTML(rl_title[id]) "\">" inline(text) "</a>";
+        line = substr( line, len + 1);
+        continue;
+
+      } else if ( rl_href[id] ) {
+        ret = ret "<a href=\"" HTML(rl_href[id]) "\">" inline(text) "</a>"; line = substr( line, len + 1);
+        continue;
+
+      } else {
+        ret = ret "" HTML(substr(line, 1, len)); line = substr(line, len + 1);
+        continue;
+      }
 
-    if ( match(href, /^<.*>$/) ) { sub(/^</, "", href); sub(/>$/, "", href); }
-         if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); }
-    else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); }
-    else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); }
+    # inline images
+    } else if ( match(line, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?") ) {
+      len = RLENGTH; text = href = title = attrib = substr( line, 1, len);
+
+      sub("^!\\[", "", text);
+      sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", text);
+
+      sub("^!" lix "\\([\n\t ]*", "", href);
+      sub("([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", href);
+
+      sub("^!" lix "\\([\n\t ]*" lid, "", title);
+      sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", title);
+      sub("^[\n\t ]+", "", title);
+
+      sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib);
+      sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib);
+
+      if ( match(href, /^<.*>$/) ) { sub(/^</, "", href); sub(/>$/, "", href); }
+           if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); }
+      else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); }
+      else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); }
+
+      gsub(/^[\t ]+$/, "", text); gsub(/\\/, "", href);
+      gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title);
+
+      ret = ret "<img src=\"" HTML(href) "\" alt=\"" HTML(text?text:title?title:href) "\"" \
+             (title?" title=\"" HTML(title) "\"":"") (attrib?" class=\"" HTML(attrib) "\"":"") \
+             ">";
+      line = substr( line, len + 1);
+      continue;
+
+    # reference style images
+    } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\]/ ) ) {
+      len = RLENGTH; text = id = substr(line, 1, len);
+      sub(/\n.*$/, "", text); sub(/^!\[/, "", text); sub(/\] ?\[([^\n]*)\].*$/, "", text);
+      sub(/\n.*$/, "",   id); sub(/^!\[([^]]+)\] ?\[/, "",   id); sub(/\].*$/, "",   id);
+      # text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) );
+      #   id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) );
+      if ( ! id ) id = text;
+      if ( rl_href[id] && rl_title[id] ) {
+        ret = ret "<img src=\"" HTML(rl_href[id]) "\" alt=\"" HTML(text) "\" title=\"" HTML(rl_title[id]) "\">";
+        line = substr( line, len + 1);
+        continue;
+
+      } else if ( rl_href[id] ) {
+        ret = ret "<img src=\"" HTML(rl_href[id]) "\" alt=\"" HTML(text) "\">";
+        line = substr( line, len + 1);
+        continue;
+
+      } else {
+        ret = ret "" HTML(substr(line, 1, len)); line = substr(line, len + 1);
+        continue;
+      }
 
-    gsub(/^[\t ]+$/, "", text); gsub(/\\/, "", href);
-    gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title);
-
-    return "<img src=\"" HTML(href) "\" alt=\"" HTML(text?text:title?title:href) "\"" \
-           (title?" title=\"" HTML(title) "\"":"") (attrib?" class=\"" HTML(attrib) "\"":"") \
-           ">" inline( substr( line, len + 1) );
-
-  # reference style images
-  } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\]/ ) ) {
-    len = RLENGTH; text = id = substr(line, 1, len);
-    sub(/\n.*$/, "", text); sub(/^!\[/, "", text); sub(/\] ?\[([^\n]*)\].*$/, "", text);
-    sub(/\n.*$/, "",   id); sub(/^!\[([^]]+)\] ?\[/, "",   id); sub(/\].*$/, "",   id);
-    # text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) );
-    #   id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) );
-    if ( ! id ) id = text;
-    if ( rl_href[id] && rl_title[id] ) {
-      return "<img src=\"" HTML(rl_href[id]) "\" alt=\"" HTML(text) "\" title=\"" HTML(rl_title[id]) "\">" \
-             inline( substr( line, len + 1) );
-    } else if ( rl_href[id] ) {
-      return "<img src=\"" HTML(rl_href[id]) "\" alt=\"" HTML(text) "\">" \
-             inline( substr( line, len + 1) );
+    #  ~~strikeout~~ (pandoc)
+    } else if ( match(line, /^~~([[:graph:]]|[[:graph:]]([^~]|~[^~])*[[:graph:]])~~/) ) {
+      len = RLENGTH;
+      ret = ret "<del>" inline( substr( line, 3, len - 4 ) ) "</del>"; line = substr( line, len + 1 );
+      continue;
+
+    #  ^superscript^ (pandoc)
+    } else if ( match(line, /^\^([^[:space:]^]|\\[ ^])+\^/) ) {
+      len = RLENGTH;
+      ret = ret "<sup>" inline( substr( line, 2, len - 2 ) ) "</sup>"; line = substr( line, len + 1 );
+      continue;
+
+    #  ~subscript~ (pandoc)
+    } else if ( match(line, /^~([^[:space:]~]|\\[ ~])+~/) ) {
+      len = RLENGTH;
+      ret = ret "<sub>" inline( substr( line, 2, len - 2 ) ) "</sub>"; line = substr( line, len + 1 );
+      continue;
+
+    # ignore embedded underscores (pandoc, php md)
+    } else if ( match(line, "^[[:alnum:]](__|_)") ) {
+      ret = ret HTML(substr( line, 1, RLENGTH)); line = substr(line, RLENGTH + 1);
+      continue;
+
+    #  __strong__$
+    } else if ( match(line, "^__(([^_[:space:]]|" ieu ")|([^_[:space:]]|" ieu ")(" nu "|" ieu ")*([^_[:space:]]|" ieu "))__$") ) {
+      len = RLENGTH;
+      ret = ret "<strong>" inline( substr( line, 3, len - 4 ) ) "</strong>"; line = substr( line, len + 1 );
+      continue;
+
+    #  __strong__
+    } else if ( match(line, "^__(([^_[:space:]]|" ieu ")|([^_[:space:]]|" ieu ")(" nu "|" ieu ")*([^_[:space:]]|" ieu "))__[[:space:][:punct:]]") ) {
+      len = RLENGTH;
+      ret = ret "<strong>" inline( substr( line, 3, len - 5 ) ) "</strong>"; line = substr( line, len);
+      continue;
+
+    #  **strong**
+    } else if ( match(line, "^\\*\\*(([^\\*[:space:]]|" iea ")|([^\\*[:space:]]|" iea ")(" na "|" iea ")*([^\\*[:space:]]|" iea "))\\*\\*") ) {
+      len = RLENGTH;
+      ret = ret "<strong>" inline( substr( line, 3, len - 4 ) ) "</strong>"; line = substr( line, len + 1 );
+      continue;
+
+    #  _em_$
+    } else if ( match(line, "^_(([^_[:space:]]|" isu ")|([^_[:space:]]|" isu ")(" nu "|" isu ")*([^_[:space:]]|" isu "))_$") ) {
+      len = RLENGTH;
+      ret = ret "<em>" inline( substr( line, 2, len - 2 ) ) "</em>"; line = substr( line, len + 1 );
+      continue;
+
+    #  _em_
+    } else if ( match(line, "^_(([^_[:space:]]|" isu ")|([^_[:space:]]|" isu ")(" nu "|" isu ")*([^_[:space:]]|" isu "))_[[:space:][:punct:]]") ) {
+      len = RLENGTH;
+      ret = ret "<em>" inline( substr( line, 2, len - 3 ) ) "</em>"; line = substr( line, len );
+      continue;
+
+    #  *em*
+    } else if ( match(line, "^\\*(([^\\*[:space:]]|" isa ")|([^\\*[:space:]]|" isa ")(" na "|" isa ")*([^\\*[:space:]]|" isa "))\\*") ) {
+      len = RLENGTH;
+      ret = ret "<em>" inline( substr( line, 2, len - 2 ) ) "</em>"; line = substr( line, len + 1 );
+      continue;
+
+    # Literal HTML entities
+    # } else if ( match( line, /^&([a-zA-Z]{2,32}|#[0-9]{1,7}|#[xX][0-9a-fA-F]{1,6});/) ) {
+    # mawk does not support repitition ranges
+    } else if ( match( line, /^&([a-zA-Z][a-zA-Z][a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?|#[0-9][0-9]?[0-9]?[0-9]?[0-9]?[0-9]?[0-9]?|#[xX][0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?);/) ) {
+      len = RLENGTH;
+      ret = ret substr( line, 1, len ); line = substr(line, len + 1);
+      continue;
+
+    # Arrows
+    } else if ( line ~ /^-->( |$)/) {  # ignore multidash-arrow
+      ret = ret "--&gt;"; line = substr(line, 4);
+      continue;
+    } else if ( line ~ /^<-( |$)/) {
+      ret = ret "&larr;"; line = substr(line, 3);
+      continue;
+    } else if ( line ~ /^->( |$)/) {
+      ret = ret "&rarr;"; line = substr(line, 3);
+      continue;
+
+    # Escape lone HTML character
+    } else if ( match( line, /^[&<>"']/) ) {
+      ret = ret HTML(substr(line, 1, 1)); line = substr(line, 2);
+      continue;
+
+    #  continue walk over string
     } else {
-      return "" HTML(substr(line, 1, len)) inline( substr(line, len + 1) );
+      ret = ret substr(line, 1, 1); line = substr(line, 2);
+      continue;
     }
-
-  #  ~~strikeout~~ (pandoc)
-  } else if ( match(line, /^~~([[:graph:]]|[[:graph:]]([^~]|~[^~])*[[:graph:]])~~/) ) {
-    len = RLENGTH;
-    return "<del>" inline( substr( line, 3, len - 4 ) ) "</del>" inline( substr( line, len + 1 ) );
-
-  #  ^superscript^ (pandoc)
-  } else if ( match(line, /^\^([^[:space:]^]|\\[ ^])+\^/) ) {
-    len = RLENGTH;
-    return "<sup>" inline( substr( line, 2, len - 2 ) ) "</sup>" inline( substr( line, len + 1 ) );
-
-  #  ~subscript~ (pandoc)
-  } else if ( match(line, /^~([^[:space:]~]|\\[ ~])+~/) ) {
-    len = RLENGTH;
-    return "<sub>" inline( substr( line, 2, len - 2 ) ) "</sub>" inline( substr( line, len + 1 ) );
-
-  # ignore embedded underscores (pandoc, php md)
-  } else if ( match(line, "^[[:alnum:]](__|_)") ) {
-    return HTML(substr( line, 1, RLENGTH)) inline( substr(line, RLENGTH + 1) );
-
-  #  __strong__$
-  } else if ( match(line, "^__(([^_[:space:]]|" ieu ")|([^_[:space:]]|" ieu ")(" nu "|" ieu ")*([^_[:space:]]|" ieu "))__$") ) {
-    len = RLENGTH;
-    return "<strong>" inline( substr( line, 3, len - 4 ) ) "</strong>" inline( substr( line, len + 1 ) );
-
-  #  __strong__
-  } else if ( match(line, "^__(([^_[:space:]]|" ieu ")|([^_[:space:]]|" ieu ")(" nu "|" ieu ")*([^_[:space:]]|" ieu "))__[[:space:][:punct:]]") ) {
-    len = RLENGTH;
-    return "<strong>" inline( substr( line, 3, len - 5 ) ) "</strong>" inline( substr( line, len) );
-
-  #  **strong**
-  } else if ( match(line, "^\\*\\*(([^\\*[:space:]]|" iea ")|([^\\*[:space:]]|" iea ")(" na "|" iea ")*([^\\*[:space:]]|" iea "))\\*\\*") ) {
-    len = RLENGTH;
-    return "<strong>" inline( substr( line, 3, len - 4 ) ) "</strong>" inline( substr( line, len + 1 ) );
-
-  #  _em_$
-  } else if ( match(line, "^_(([^_[:space:]]|" isu ")|([^_[:space:]]|" isu ")(" nu "|" isu ")*([^_[:space:]]|" isu "))_$") ) {
-    len = RLENGTH;
-    return "<em>" inline( substr( line, 2, len - 2 ) ) "</em>" inline( substr( line, len + 1 ) );
-
-  #  _em_
-  } else if ( match(line, "^_(([^_[:space:]]|" isu ")|([^_[:space:]]|" isu ")(" nu "|" isu ")*([^_[:space:]]|" isu "))_[[:space:][:punct:]]") ) {
-    len = RLENGTH;
-    return "<em>" inline( substr( line, 2, len - 3 ) ) "</em>" inline( substr( line, len ) );
-
-  #  *em*
-  } else if ( match(line, "^\\*(([^\\*[:space:]]|" isa ")|([^\\*[:space:]]|" isa ")(" na "|" isa ")*([^\\*[:space:]]|" isa "))\\*") ) {
-    len = RLENGTH;
-    return "<em>" inline( substr( line, 2, len - 2 ) ) "</em>" inline( substr( line, len + 1 ) );
-
-  # Literal HTML entities
-  # } else if ( match( line, /^&([a-zA-Z]{2,32}|#[0-9]{1,7}|#[xX][0-9a-fA-F]{1,6});/) ) {
-  # mawk does not support repitition ranges
-  } else if ( match( line, /^&([a-zA-Z][a-zA-Z][a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?|#[0-9][0-9]?[0-9]?[0-9]?[0-9]?[0-9]?[0-9]?|#[xX][0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?);/) ) {
-    len = RLENGTH;
-    return substr( line, 1, len ) inline(substr(line, len + 1));
-
-  # Arrows
-  } else if ( line ~ /^-->( |$)/) {  # ignore multidash-arrow
-    return "--&gt;" inline( substr(line, 4) );
-  } else if ( line ~ /^<-( |$)/) {
-    return "&larr;" inline( substr(line, 3) );
-  } else if ( line ~ /^->( |$)/) {
-    return "&rarr;" inline( substr(line, 3) );
-
-  # Escape lone HTML character
-  } else if ( match( line, /^[&<>"']/) ) {
-    return HTML(substr(line, 1, 1)) inline(substr(line, 2));
-
-  #  continue walk over string
-  } else {
-    return substr(line, 1, 1) inline( substr(line, 2) );
   }
+  return ret;
 }
 
 function headline( hlvl, htxt, attrib, LOCAL, sec, n, HL) {
@@ -364,14 +406,14 @@ function _nblock( block, LOCAL, sec, n ) {
   for ( n = blvl + 1; n in BL; n++) { delete BL[n]; }
 
   block = _block( block );
-  match(hstack, /([0-9]+( [0-9]+){5})$/); split( substr(hstack, RSTART),  HL);
+  match(hstack, /([0-9]+( [0-9]+)( [0-9]+)?( [0-9]+)?( [0-9]+)?( [0-9]+)?)$/); split( substr(hstack, RSTART),  HL);
   sec = ""; for ( n = 1; n <= 6; n++ ) { sec = sec (HL[n]?"</section>":""); }
 
-  sub("( +[0-9]+){6} *$", "", hstack); blvl--;
+  sub("( +[0-9]+)( +[0-9]+)?( +[0-9]+)?( +[0-9]+)?( +[0-9]+)?( +[0-9]+)? *$", "", hstack); blvl--;
   return block sec;
 }
 
-function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, indent, list ) {
+function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, indent, list, tmp ) {
   gsub( "(^\n+|\n+$)", "", block );
 
   if ( block == "" ) {
-- 
2.39.5


From b6f82bc119dd614862f61df3b1978cc5789ab31a Mon Sep 17 00:00:00 2001
From: =?utf8?q?Paul=20H=C3=A4nsch?= <paul@plutz.net>
Date: Fri, 30 Aug 2024 16:30:00 +0200
Subject: [PATCH 07/16] avoid recursion in _block function to increase
 compatibility

---
 markdown.awk | 756 ++++++++++++++++++++++++++-------------------------
 1 file changed, 393 insertions(+), 363 deletions(-)

diff --git a/markdown.awk b/markdown.awk
index d1c8b5b..fc2f203 100755
--- a/markdown.awk
+++ b/markdown.awk
@@ -413,408 +413,438 @@ function _nblock( block, LOCAL, sec, n ) {
   return block sec;
 }
 
-function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, indent, list, tmp ) {
-  gsub( "(^\n+|\n+$)", "", block );
-
-  if ( block == "" ) {
-    return "";
-
-  # HTML #2 #3 #4 $5
-  } else if ( AllowHTML && match( block, /(^|\n) ? ? ?(<!--([^-]|-[^-]|--[^>])*(-->|$)|<\?([^\?]|\?[^>])*(\?>|$)|<![A-Z][^>]*(>|$)|<!\[CDATA\[([^\]]|\][^\]]|\]\][^>])*(\]\]>|$))/) ) {
-    len = RLENGTH; st = RSTART;
-    return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len));
-
-  # HTML #6
-  } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<\/?(address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)([[:space:]\n>]|\/>)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) {
-    len = RLENGTH; st = RSTART;
-    return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len));
-
-  # HTML #1
-  } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<(script|pre|style)([[:space:]\n>]).*(<\/script>|<\/pre>|<\/style>|$)/) ) {
-    len = RLENGTH; st = RSTART;
-    match( tolower(substr(block, st, len)), /(<\/script>|<\/pre>|<\/style>)/);
-    len = RSTART + RLENGTH;
-    return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len));
-
-  # HTML #7
-  } else if ( AllowHTML && match( block, /^ ? ? ?(<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)([[:space:]]*\n)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) {
-    len = RLENGTH; st = RSTART;
-    return substr(block, st, len) _block(substr(block, st + len));
-
-  # Metadata (custom, block starting with %something)
-  # Metadata is ignored but can be interpreted externally
-  } else if ( match(block, /^%[a-zA-Z-]+([[:space:]][^\n]*)?(\n|$)(%[a-zA-Z-]+([[:space:]][^\n]*)?(\n|$)|%([[:space:]][^\n]*)?(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) {
-    len = RLENGTH; st = RSTART;
-    return  _block( substr( block, len + 1) );
+function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, indent, list, tmp, ret) {
+  ret = "";
+  while ( block != "" ) {
+    gsub( "(^\n+|\n+$)", "", block );
+
+    # HTML #2 #3 #4 $5
+    if ( AllowHTML && match( block, /(^|\n) ? ? ?(<!--([^-]|-[^-]|--[^>])*(-->|$)|<\?([^\?]|\?[^>])*(\?>|$)|<![A-Z][^>]*(>|$)|<!\[CDATA\[([^\]]|\][^\]]|\]\][^>])*(\]\]>|$))/) ) {
+      len = RLENGTH; st = RSTART;
+      ret = ret _block(substr(block, 1, st - 1)) substr(block, st, len); block = substr(block, st + len);
+      continue;
+
+    # HTML #6
+    } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<\/?(address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)([[:space:]\n>]|\/>)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) {
+      len = RLENGTH; st = RSTART;
+      ret = ret _block(substr(block, 1, st - 1)) substr(block, st, len); block = substr(block, st + len);
+      continue;
+
+    # HTML #1
+    } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<(script|pre|style)([[:space:]\n>]).*(<\/script>|<\/pre>|<\/style>|$)/) ) {
+      len = RLENGTH; st = RSTART;
+      match( tolower(substr(block, st, len)), /(<\/script>|<\/pre>|<\/style>)/);
+      len = RSTART + RLENGTH;
+      ret = ret _block(substr(block, 1, st - 1)) substr(block, st, len); block = substr(block, st + len);
+      continue;
+
+    # HTML #7
+    } else if ( AllowHTML && match( block, /^ ? ? ?(<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)([[:space:]]*\n)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) {
+      len = RLENGTH; st = RSTART;
+      ret = ret substr(block, st, len); block = substr(block, st + len);
+      continue;
+
+    # Metadata (custom, block starting with %something)
+    # Metadata is ignored but can be interpreted externally
+    } else if ( match(block, /^%[a-zA-Z-]+([[:space:]][^\n]*)?(\n|$)(%[a-zA-Z-]+([[:space:]][^\n]*)?(\n|$)|%([[:space:]][^\n]*)?(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) {
+      len = RLENGTH; st = RSTART;
+      block = substr( block, len + 1);
+      continue;
  
-  # Blockquote (leading >)
-  } else if ( match( block, /^> /) ) {
-    match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match(block, /$/);
-    len = RLENGTH; st = RSTART;
-    text = substr(block, 1, st - 1); gsub( /(^|\n)> /, "\n", text );
-    text = _nblock( text ); gsub( /^\n|\n$/, "", text )
-    return "<blockquote>" text "</blockquote>\n\n" _block( substr(block, st + len) );
-
-  # Pipe Tables (pandoc / php md / gfm )
-  } else if ( match(block, "^((\\|)?([^\n]+\\|)+[^\n]+(\\|)?)\n" \
-                           "((\\|)?(:?-+:?[\\|+])+:?-+:?(\\|)?)\n" \
-                           "((\\|)?([^\n]+\\|)+[^\n]+(\\|)?(\n|$))+" ) ) {
-    len = RLENGTH; st = RSTART;
-    #initialize empty arrays
-    split("", talign); split("", tarray);
-    cols = 0; cnt=0; ttext = "";
-
-    # table header and alignment
-    tmp = substr(block, 1, match(block, /(\n|$)/));
-    gsub( /(^|[^\\])\\\|/, "\\1\\&#x7C;", tmp );
-    gsub( /(^\||\|$)/, "", tmp)
-    split( tmp, tarray, /\|/);
-    block = substr(block, match(block, /(\n|$)/) + 1 );
-    tmp = substr(block, 1, match(block, /(\n|$)/));
-    gsub( /(^\||\|$)/, "", tmp );
-    cols = split( tmp , talign, /[+\|]/);
-    block = substr(block, match(block, /(\n|$)/) + 1 );
-
-    for( cnt = 1; cnt < cols; cnt++ ) {
-           if (match(talign[cnt], /:-+:/)) talign[cnt]="center";
-      else if (match(talign[cnt],  /-+:/)) talign[cnt]="right";
-      else if (match(talign[cnt],  /:-+/)) talign[cnt]="left";
-      else talign[cnt]="";
-    }
+    # Blockquote (leading >)
+    } else if ( match( block, /^> /) ) {
+      match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match(block, /$/);
+      len = RLENGTH; st = RSTART;
+      text = substr(block, 1, st - 1); gsub( /(^|\n)> /, "\n", text );
+      text = _nblock( text ); gsub( /^\n|\n$/, "", text )
+      ret = ret "<blockquote>" text "</blockquote>\n\n"; block = substr(block, st + len);
+      continue;
 
-    ttext = "<thead>\n<tr>"
-    for (cnt = 1; cnt < cols; cnt++)
-      ttext = ttext "<th align=\"" talign[cnt] "\">" inline(tarray[cnt]) "</th>"
-    ttext = ttext "</tr>\n</thead><tbody>\n"
+    # Pipe Tables (pandoc / php md / gfm )
+    } else if ( match(block, "^((\\|)?([^\n]+\\|)+[^\n]+(\\|)?)\n" \
+                             "((\\|)?(:?-+:?[\\|+])+:?-+:?(\\|)?)\n" \
+                             "((\\|)?([^\n]+\\|)+[^\n]+(\\|)?(\n|$))+" ) ) {
+      len = RLENGTH; st = RSTART;
+      #initialize empty arrays
+      split("", talign); split("", tarray);
+      cols = 0; cnt=0; ttext = "";
 
-    while ( match(block, "^((\\|)?([^\n]+\\|)+[^\n]+(\\|)?(\n|$))+" ) ){
+      # table header and alignment
       tmp = substr(block, 1, match(block, /(\n|$)/));
       gsub( /(^|[^\\])\\\|/, "\\1\\&#x7C;", tmp );
-      gsub( /(^\||\|$)/, "", tmp );
+      gsub( /(^\||\|$)/, "", tmp)
       split( tmp, tarray, /\|/);
       block = substr(block, match(block, /(\n|$)/) + 1 );
+      tmp = substr(block, 1, match(block, /(\n|$)/));
+      gsub( /(^\||\|$)/, "", tmp );
+      cols = split( tmp , talign, /[+\|]/);
+      block = substr(block, match(block, /(\n|$)/) + 1 );
+
+      for( cnt = 1; cnt < cols; cnt++ ) {
+             if (match(talign[cnt], /:-+:/)) talign[cnt]="center";
+        else if (match(talign[cnt],  /-+:/)) talign[cnt]="right";
+        else if (match(talign[cnt],  /:-+/)) talign[cnt]="left";
+        else talign[cnt]="";
+      }
 
-      ttext = ttext "<tr>"
+      ttext = "<thead>\n<tr>"
       for (cnt = 1; cnt < cols; cnt++)
-        ttext = ttext "<td align=\"" talign[cnt] "\">" inline(tarray[cnt]) "</td>"
-      ttext = ttext "</tr>\n"
-    }
-    return "<table>" ttext "</tbody></table>\n" _block(block);
-
-  # Grid Tables (pandoc)
-  # (with, and without header)
-  } else if ( match( block, "^\\+(-+\\+)+\n" \
-                            "(\\|([^\n]+\\|)+\n)+" \
-                            "(\\+(:?=+:?\\+)+)\n" \
-                           "((\\|([^\n]+\\|)+\n)+" \
-                             "\\+(-+\\+)+(\n|$))+" \
-                   ) || \
-              match( block, "^()()()" \
-                            "(\\+(:?-+:?\\+)+)\n" \
-                           "((\\|([^\n]+\\|)+\n)+" \
-                             "\\+(-+\\+)+(\n|$))+" \
-  ) ) {
-    len = RLENGTH; st = RSTART;
-    #initialize empty arrays
-    split("", talign); split("", tarray); split("", tread);
-    cols = 0; cnt=0; ttext = "";
-
-    # Column Count
-    tmp = block; sub( "(\n.*)*$", "", tmp);
-    cols = split( tmp, tread, /\+/) - 2;
-    # debug(" Cols: " gensub( "^(\\+(:?-+:?\\+)+)(\n.*)*$", "\\1", 1, block ));
-
-    # table alignment
-    match(block, "((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)");
-    split( substr(block, RSTART, RLENGTH) , talign, /\+/ );
-    # split( gensub( "^(.*\n)?\\+((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)(\n.*)$", "\\2", "g", block ), talign, /\+/ );
-    # debug("Align: " gensub( "^(.*\n)?\\+((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)(\n.*)$", "\\2", "g", block ));
-
-    for (cnt = 1; cnt <= cols; cnt++) {
-           if (match(talign[cnt], /:(-+|=+):/)) talign[cnt]="center";
-      else if (match(talign[cnt],  /(-+|=+):/)) talign[cnt]="right";
-      else if (match(talign[cnt], /:(-+|=+)/ )) talign[cnt]="left";
-      else talign[cnt]="";
-    }
+        ttext = ttext "<th align=\"" talign[cnt] "\">" inline(tarray[cnt]) "</th>"
+      ttext = ttext "</tr>\n</thead><tbody>\n"
 
-    if ( match(block, "^\\+(-+\\+)+\n" \
-                      "(\\|([^\n]+\\|)+\n)+" \
-                       "\\+(:?=+:?\\+)+\n" \
-                     "((\\|([^\n]+\\|)+\n)+" \
-                       "\\+(-+\\+)+(\n|$))+" \
-    ) ) {
-      # table header
-      block = substr(block, match(block, /(\n|$)/) + 1 );
-      while ( match(block, "^\\|([^\n]+\\|)+\n") ) {
+      while ( match(block, "^((\\|)?([^\n]+\\|)+[^\n]+(\\|)?(\n|$))+" ) ){
         tmp = substr(block, 1, match(block, /(\n|$)/));
-        gsub( /\\\\/, "\\&#x5C;", tmp); gsub(/\\\|/, "\\&#x7C;", tmp);
+        gsub( /(^|[^\\])\\\|/, "\\1\\&#x7C;", tmp );
         gsub( /(^\||\|$)/, "", tmp );
-        split(tmp, tread, /\|/);
+        split( tmp, tarray, /\|/);
         block = substr(block, match(block, /(\n|$)/) + 1 );
-        for (cnt = 1; cnt <= cols; cnt++)
-          tarray[cnt] = tarray[cnt] "\n" tread[cnt];
-      }
 
-      ttext = "<thead>\n<tr>"
-      for (cnt = 1; cnt <= cols; cnt++)
-        ttext = ttext "<th align=\"" talign[cnt] "\">" _nblock(tarray[cnt]) "</th>"
-      ttext = ttext "</tr>\n</thead>"
-    }
+        ttext = ttext "<tr>"
+        for (cnt = 1; cnt < cols; cnt++)
+          ttext = ttext "<td align=\"" talign[cnt] "\">" inline(tarray[cnt]) "</td>"
+        ttext = ttext "</tr>\n"
+      }
+      ret = ret "<table>" ttext "</tbody></table>\n";
+      continue;
 
-    # table body
-    block = substr(block, match(block, /(\n|$)/) + 1 );
-    ttext = ttext "<tbody>\n"
+    # Grid Tables (pandoc)
+    # (with, and without header)
+    } else if ( match( block, "^\\+(-+\\+)+\n" \
+                              "(\\|([^\n]+\\|)+\n)+" \
+                              "(\\+(:?=+:?\\+)+)\n" \
+                             "((\\|([^\n]+\\|)+\n)+" \
+                               "\\+(-+\\+)+(\n|$))+" \
+                     ) || \
+                match( block, "^()()()" \
+                              "(\\+(:?-+:?\\+)+)\n" \
+                             "((\\|([^\n]+\\|)+\n)+" \
+                               "\\+(-+\\+)+(\n|$))+" \
+    ) ) {
+      len = RLENGTH; st = RSTART;
+      #initialize empty arrays
+      split("", talign); split("", tarray); split("", tread);
+      cols = 0; cnt=0; ttext = "";
+
+      # Column Count
+      tmp = block; sub( "(\n.*)*$", "", tmp);
+      cols = split( tmp, tread, /\+/) - 2;
+      # debug(" Cols: " gensub( "^(\\+(:?-+:?\\+)+)(\n.*)*$", "\\1", 1, block ));
+
+      # table alignment
+      match(block, "((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)");
+      split( substr(block, RSTART, RLENGTH) , talign, /\+/ );
+      # split( gensub( "^(.*\n)?\\+((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)(\n.*)$", "\\2", "g", block ), talign, /\+/ );
+      # debug("Align: " gensub( "^(.*\n)?\\+((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)(\n.*)$", "\\2", "g", block ));
+
+      for (cnt = 1; cnt <= cols; cnt++) {
+             if (match(talign[cnt], /:(-+|=+):/)) talign[cnt]="center";
+        else if (match(talign[cnt],  /(-+|=+):/)) talign[cnt]="right";
+        else if (match(talign[cnt], /:(-+|=+)/ )) talign[cnt]="left";
+        else talign[cnt]="";
+      }
 
-    while ( match(block, /^((\|([^\n]+\|)+\n)+\+(-+\+)+(\n|$))+/ ) ){
-      split("", tarray);
-      while ( match(block, /^\|([^\n]+\|)+\n/) ) {
-        tmp = substr(block, 1, match(block, /(\n|$)/));
-        gsub( /\\\\/, "\\&#x5C;", tmp); gsub(/\\\|/, "\\&#x7C;", tmp);
-        gsub( /(^\||\|$)/, "", tmp);
-        split( tmp, tread, /\|/);
+      if ( match(block, "^\\+(-+\\+)+\n" \
+                        "(\\|([^\n]+\\|)+\n)+" \
+                         "\\+(:?=+:?\\+)+\n" \
+                       "((\\|([^\n]+\\|)+\n)+" \
+                         "\\+(-+\\+)+(\n|$))+" \
+      ) ) {
+        # table header
         block = substr(block, match(block, /(\n|$)/) + 1 );
+        while ( match(block, "^\\|([^\n]+\\|)+\n") ) {
+          tmp = substr(block, 1, match(block, /(\n|$)/));
+          gsub( /\\\\/, "\\&#x5C;", tmp); gsub(/\\\|/, "\\&#x7C;", tmp);
+          gsub( /(^\||\|$)/, "", tmp );
+          split(tmp, tread, /\|/);
+          block = substr(block, match(block, /(\n|$)/) + 1 );
+          for (cnt = 1; cnt <= cols; cnt++)
+            tarray[cnt] = tarray[cnt] "\n" tread[cnt];
+        }
+
+        ttext = "<thead>\n<tr>"
         for (cnt = 1; cnt <= cols; cnt++)
-          tarray[cnt] = tarray[cnt] "\n" tread[cnt];
+          ttext = ttext "<th align=\"" talign[cnt] "\">" _nblock(tarray[cnt]) "</th>"
+        ttext = ttext "</tr>\n</thead>"
       }
+
+      # table body
       block = substr(block, match(block, /(\n|$)/) + 1 );
+      ttext = ttext "<tbody>\n"
+
+      while ( match(block, /^((\|([^\n]+\|)+\n)+\+(-+\+)+(\n|$))+/ ) ){
+        split("", tarray);
+        while ( match(block, /^\|([^\n]+\|)+\n/) ) {
+          tmp = substr(block, 1, match(block, /(\n|$)/));
+          gsub( /\\\\/, "\\&#x5C;", tmp); gsub(/\\\|/, "\\&#x7C;", tmp);
+          gsub( /(^\||\|$)/, "", tmp);
+          split( tmp, tread, /\|/);
+          block = substr(block, match(block, /(\n|$)/) + 1 );
+          for (cnt = 1; cnt <= cols; cnt++)
+            tarray[cnt] = tarray[cnt] "\n" tread[cnt];
+        }
+        block = substr(block, match(block, /(\n|$)/) + 1 );
 
-      ttext = ttext "<tr>"
-      for (cnt = 1; cnt <= cols; cnt++)
-        ttext = ttext "<td align=\"" talign[cnt] "\">" _nblock(tarray[cnt]) "</td>"
-      ttext = ttext "</tr>\n"
-    }
-    return "<table>" ttext "</tbody></table>\n" _nblock(block);
-
-  # Line Blocks (pandoc)
-  } else if ( match(block, /^\| [^\n]*(\n|$)(\| [^\n]*(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) {
-    len = RLENGTH; st = RSTART;
-
-    text = substr(block, 1, len); gsub(/\n[[:space:]]+/, " ", text);
-    gsub(/\n\| /, "\n", text); gsub(/^\| |\n$/, "", text);
-    text = inline(text); gsub(/\n/, "<br>\n", text);
-
-    return "<div class=\"line-block\">" text "</div>\n" _block( substr( block, len + 1) );
-
-  # Indented Code Block
-  } else if ( match(block, /^((    |\t)[^\n]*[^\n\t ][^\n]*(\n|$))((    |\t)[^\n]*(\n|$)|[\t ]*(\n|$))*/) ) {
-    len = RLENGTH; st = RSTART;
-
-    code = substr(block, 1, len);
-    gsub(/(^|\n)(    |\t)/, "\n", code);
-    gsub(/^\n|\n+$/, "", code);
-    return "<pre><code>" HTML( code ) "</code></pre>\n" \
-           _block( substr( block, len + 1 ) );
-
-  # Fenced Divs (pandoc, custom)
-  } else if ( match( block, /^(:::+)/ ) ) {
-    guard = substr( block, 1, RLENGTH ); attrib = code = block;
-    sub(/^[^\n]+\n/, "", code);
-    sub(/^:::+[ \t]*\{?[ \t]*/, "", attrib); sub(/\}?[ \t]*\n.*$/, "", attrib);
-    # attrib = gensub(/^:::+[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\1", 1, attrib);
-    gsub(/[^a-zA-Z0-9_-]+/, " ", attrib);
-    gsub(/(^ | $)/, "", attrib);
-    if ( match(code, "(^|\n)" guard "+(\n|$)" ) && attrib ) {
-      len = RLENGTH; st = RSTART;
-      return "<div class=\"" attrib "\">" _nblock( substr(code, 1, st - 1) ) "</div>\n" \
-             _block( substr( code, st + len ) );
-    } else if ( match(code, "(^|\n)" guard "+(\n|$)" ) ) {
-      len = RLENGTH; st = RSTART;
-      return "<div>" _nblock( substr(code, 1, st - 1) ) "</div>\n" \
-             _block( substr( code, st + len ) );
-    } else {
-      match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match( block, /$/ );
-      len = RLENGTH; st = RSTART;
-      return "<p>" inline( substr(block, 1, st - 1) ) "</p>\n" \
-             _block( substr(block, st + len) );
-    }
+        ttext = ttext "<tr>"
+        for (cnt = 1; cnt <= cols; cnt++)
+          ttext = ttext "<td align=\"" talign[cnt] "\">" _nblock(tarray[cnt]) "</td>"
+        ttext = ttext "</tr>\n"
+      }
+      return ret "<table>" ttext "</tbody></table>\n" _nblock(block);
 
-  # Fenced Code Block (pandoc)
-  } else if ( match( block, /^(~~~+|```+)/ ) ) {
-    guard = substr( block, 1, RLENGTH ); attrib = code = block;
-    sub(/^[^\n]+\n/, "", code);
-    sub(/^(~~~+|```+)[ \t]*\{?[ \t]*/, "", attrib); sub(/\}?[ \t]*\n.*$/, "", attrib);
-    # attrib = gensub(/^(~~~+|```+)[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\2", 1, attrib);
-    gsub(/[^a-zA-Z0-9_-]+/, " ", attrib);
-    gsub(/(^ | $)/, "", attrib);
-    if ( match(code, "(^|\n)" guard "+(\n|$)" ) && attrib ) {
+    # Line Blocks (pandoc)
+    } else if ( match(block, /^\| [^\n]*(\n|$)(\| [^\n]*(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) {
       len = RLENGTH; st = RSTART;
-      return "<pre><code class=\"" attrib "\">" HTML( substr(code, 1, st - 1) ) "</code></pre>\n" \
-             _block( substr( code, st + len ) );
-    } else if ( match(code, "(^|\n)" guard "+(\n|$)" ) ) {
-      len = RLENGTH; st = RSTART;
-      return "<pre><code>" HTML( substr(code, 1, st - 1) ) "</code></pre>\n" \
-             _block( substr( code, st + len ) );
-    } else {
-      match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match( block, /$/ );
+
+      text = substr(block, 1, len); gsub(/\n[[:space:]]+/, " ", text);
+      gsub(/\n\| /, "\n", text); gsub(/^\| |\n$/, "", text);
+      text = inline(text); gsub(/\n/, "<br>\n", text);
+
+      ret = ret "<div class=\"line-block\">" text "</div>\n"; block =  substr( block, len + 1);
+      continue;
+
+    # Indented Code Block
+    } else if ( match(block, /^((    |\t)[^\n]*[^\n\t ][^\n]*(\n|$))((    |\t)[^\n]*(\n|$)|[\t ]*(\n|$))*/) ) {
       len = RLENGTH; st = RSTART;
-      return "<p>" inline( substr(block, 1, st - 1) ) "</p>\n" \
-             _block( substr(block, st + len) );
-    }
 
-  # First Order Heading H1 + Attrib
-  } else if ( match( block, /^([^\n]+)([ \t]*\{([^\}\n]+)\})\n===+(\n|$)/ ) ) {
-    len = RLENGTH; text = attrib = block;
-    sub(/([ \t]*\{([^\}\n]+)\})\n===+(\n.*)?$/, "", text);
-    sub(/\}\n===+(\n.*)?$/, "", attrib); sub(/^([^\n]+)[ \t]*\{/, "", attrib);
-    gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib);
+      code = substr(block, 1, len);
+      gsub(/(^|\n)(    |\t)/, "\n", code);
+      gsub(/^\n|\n+$/, "", code);
+      ret = ret "<pre><code>" HTML( code ) "</code></pre>\n"; block = substr( block, len + 1 );
+      continue;
 
-    return headline(1, text, attrib) _block( substr( block, len + 1 ) );
+    # Fenced Divs (pandoc, custom)
+    } else if ( match( block, /^(:::+)/ ) ) {
+      guard = substr( block, 1, RLENGTH ); attrib = code = block;
+      sub(/^[^\n]+\n/, "", code);
+      sub(/^:::+[ \t]*\{?[ \t]*/, "", attrib); sub(/\}?[ \t]*\n.*$/, "", attrib);
+      # attrib = gensub(/^:::+[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\1", 1, attrib);
+      gsub(/[^a-zA-Z0-9_-]+/, " ", attrib);
+      gsub(/(^ | $)/, "", attrib);
+      if ( match(code, "(^|\n)" guard "+(\n|$)" ) && attrib ) {
+        len = RLENGTH; st = RSTART;
+        ret = ret "<div class=\"" attrib "\">" _nblock( substr(code, 1, st - 1) ) "</div>\n";
+        block = substr( code, st + len );
+        continue;
 
-  # First Order Heading H1
-  } else if ( match( block, /^([^\n]+)\n===+(\n|$)/ ) ) {
-    len = RLENGTH; text = substr(block, 1, len);
-    sub(/\n===+(\n.*)?$/, "", text);
+      } else if ( match(code, "(^|\n)" guard "+(\n|$)" ) ) {
+        len = RLENGTH; st = RSTART;
+        ret = ret "<div>" _nblock( substr(code, 1, st - 1) ) "</div>\n"; block = substr( code, st + len );
+        continue;
+
+      } else {
+        match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match( block, /$/ );
+        len = RLENGTH; st = RSTART;
+        ret = ret "<p>" inline( substr(block, 1, st - 1) ) "</p>\n"; block = substr(block, st + len);
+        continue;
+      }
 
-    return headline(1, text, 0) _block( substr( block, len + 1 ) );
+    # Fenced Code Block (pandoc)
+    } else if ( match( block, /^(~~~+|```+)/ ) ) {
+      guard = substr( block, 1, RLENGTH ); attrib = code = block;
+      sub(/^[^\n]+\n/, "", code);
+      sub(/^(~~~+|```+)[ \t]*\{?[ \t]*/, "", attrib); sub(/\}?[ \t]*\n.*$/, "", attrib);
+      # attrib = gensub(/^(~~~+|```+)[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\2", 1, attrib);
+      gsub(/[^a-zA-Z0-9_-]+/, " ", attrib);
+      gsub(/(^ | $)/, "", attrib);
+      if ( match(code, "(^|\n)" guard "+(\n|$)" ) && attrib ) {
+        len = RLENGTH; st = RSTART;
+        ret = ret "<pre><code class=\"" attrib "\">" HTML( substr(code, 1, st - 1) ) "</code></pre>\n";
+        block = substr( code, st + len );
+        continue;
 
-  # Second Order Heading H2 + Attrib
-  } else if ( match( block, /^([^\n]+)([ \t]*\{([^\}\n]+)\})\n---+(\n|$)/ ) ) {
-    len = RLENGTH; text = attrib = block;
-    sub(/([ \t]*\{([^\}\n]+)\})\n---+(\n.*)?$/, "", text);
-    sub(/\}\n---+(\n.*)?$/, "", attrib); sub(/^([^\n]+)[ \t]*\{/, "", attrib);
-    gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib);
+      } else if ( match(code, "(^|\n)" guard "+(\n|$)" ) ) {
+        len = RLENGTH; st = RSTART;
+        ret = ret "<pre><code>" HTML( substr(code, 1, st - 1) ) "</code></pre>\n";
+        block = substr( code, st + len );
+        continue;
 
-    return headline(2, text, attrib) _block( substr( block, len + 1) );
+      } else {
+        match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match( block, /$/ );
+        len = RLENGTH; st = RSTART;
+        ret = ret "<p>" inline( substr(block, 1, st - 1) ) "</p>\n"; block = substr(block, st + len);
+        continue;
+      }
 
-  # Second Order Heading H2
-  } else if ( match( block, /^([^\n]+)\n---+(\n|$)/ ) ) {
-    len = RLENGTH; text = substr(block, 1, len);
-    sub(/\n---+(\n.*)?$/, "", text);
+    # First Order Heading H1 + Attrib
+    } else if ( match( block, /^([^\n]+)([ \t]*\{([^\}\n]+)\})\n===+(\n|$)/ ) ) {
+      len = RLENGTH; text = attrib = block;
+      sub(/([ \t]*\{([^\}\n]+)\})\n===+(\n.*)?$/, "", text);
+      sub(/\}\n===+(\n.*)?$/, "", attrib); sub(/^([^\n]+)[ \t]*\{/, "", attrib);
+      gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib);
 
-    return headline(2, text, 0) _block( substr( block, len + 1) );
+      ret = ret headline(1, text, attrib) ; block = substr( block, len + 1 );
+      continue;
 
-  # Nth Order Heading H1 H2 H3 H4 H5 H6 + Attrib
-  } else if ( match( block, /^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n|$)/ ) ) {
-    len = RLENGTH; text = attrib = substr(block, 1, len);
-    match(block, /^##?#?#?#?#?[^#]/); n = RLENGTH - 1;
+    # First Order Heading H1
+    } else if ( match( block, /^([^\n]+)\n===+(\n|$)/ ) ) {
+      len = RLENGTH; text = substr(block, 1, len);
+      sub(/\n===+(\n.*)?$/, "", text);
 
-    # sub(/^(##?#?#?#?#?)[ \t]*/, "", text);  # not working in mawk
-    text = substr(text, n + 1); sub(/^[ \t]*/, "", text);
-    sub(/[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n.*)?$/, "", text);
-    sub(/^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*[ \t]*\{/, "", attrib);
-    sub(/\}(\n.*)?$/, "", attrib);
-    gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib);
+      ret = ret headline(1, text, 0) ; block = substr( block, len + 1 );
+      continue;
 
-    return headline( n, text, attrib ) _block( substr( block, len + 1) );
+    # Second Order Heading H2 + Attrib
+    } else if ( match( block, /^([^\n]+)([ \t]*\{([^\}\n]+)\})\n---+(\n|$)/ ) ) {
+      len = RLENGTH; text = attrib = block;
+      sub(/([ \t]*\{([^\}\n]+)\})\n---+(\n.*)?$/, "", text);
+      sub(/\}\n---+(\n.*)?$/, "", attrib); sub(/^([^\n]+)[ \t]*\{/, "", attrib);
+      gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib);
 
-  # Nth Order Heading H1 H2 H3 H4 H5 H6
-  } else if ( match( block, /^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*(\n|$)/ ) ) {
-    len = RLENGTH; text = substr(block, 1, len);
-    match(block, /^##?#?#?#?#?[^#]/); n = RLENGTH - 1;
-    # sub(/^(##?#?#?#?#?)[ \t]+/, "", text);  # not working in mawk
-    text = substr(text, n + 1); sub(/^[ \t]*/, "", text);
-    sub(/[ \t]*#*(\n.*)?$/, "", text);
-
-    return headline( n, text, 0 ) _block( substr( block, len + 1) );
-
-  # block images (wrapped in <figure>)
-  } else if ( match(block, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n|$)") ) {
-    len = RLENGTH; text = href = title = attrib = substr( block, 1, len);
-
-    sub("^!\\[", "", text);
-    sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", text);
-
-    sub("^!" lix "\\([\n\t ]*", "", href);
-    sub("([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", href);
-
-    sub("^!" lix "\\([\n\t ]*" lid, "", title);
-    sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", title);
-    sub("^[\n\t ]+", "", title);
-
-    sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib);
-    sub("(\n.*)?$", "", attrib);
-    sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib);
-
-    if ( match(href, /^<.*>$/) ) { sub(/^</, "", href); sub(/>$/, "", href); }
-         if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); }
-    else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); }
-    else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); }
-
-    gsub(/^[\t ]+$/, "", text); gsub(/\\/, "", href);
-
-    return "<figure data-src=\"" HTML(href) "\"" (attrib?" class=\"" HTML(attrib) "\"":"") ">" \
-           "<img src=\"" HTML(href) "\" alt=\"" HTML(text?text:title?title:href) "\"" \
-           (attrib?" class=\"" HTML(attrib) "\"":"") ">" \
-           (title?"<figcaption>" inline(title) "</figcaption>":"") \
-           "</figure>\n\n" \
-           _block( substr( block, len + 1) );
-
-  } else if ( match(block, /^!\[([^]]*)\] ?\[([^]]*)\](\n|$)/ ) ) {
-    len = RLENGTH; text = id = block;
-    sub(/(\n.*)?$/, "", text); sub( /^!\[/, "", text); sub(/\] ?\[([^\n]*)\]$/, "", text);
-    sub(/(\n.*)?$/, "",   id); sub( /^!\[([^\n]*)\] ?\[/, "",   id); sub(/\]$/, "",   id);
-    # text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\1", 1, block);
-    #   id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\2", 1, block);
-    if ( ! id ) id = text;
-    if ( rl_href[id] && rl_title[id] ) {
-      return "<figure data-src=\"" HTML(rl_href[id]) "\">" \
-               "<img src=\"" HTML(rl_href[id]) "\" alt=\"" HTML(text) "\">" \
-               "<figcaption>" inline(rl_title[id]) "</figcaption>" \
-             "</figure>\n\n" \
-             _block( substr( block, len + 1) );
-    } else if ( rl_href[id] ) {
-      return "<figure data-src=\"" HTML(rl_href[id]) "\">" \
-               "<img src=\"" HTML(rl_href[id]) "\" alt=\"" HTML(text) "\">" \
-             "</figure>\n\n" \
-             _block( substr( block, len + 1) );
+      ret = ret headline(2, text, attrib) ; block = substr( block, len + 1);
+      continue;
+
+    # Second Order Heading H2
+    } else if ( match( block, /^([^\n]+)\n---+(\n|$)/ ) ) {
+      len = RLENGTH; text = substr(block, 1, len);
+      sub(/\n---+(\n.*)?$/, "", text);
+
+      ret = ret headline(2, text, 0) ; block = substr( block, len + 1);
+      continue;
+
+    # Nth Order Heading H1 H2 H3 H4 H5 H6 + Attrib
+    } else if ( match( block, /^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n|$)/ ) ) {
+      len = RLENGTH; text = attrib = substr(block, 1, len);
+      match(block, /^##?#?#?#?#?[^#]/); n = RLENGTH - 1;
+
+      # sub(/^(##?#?#?#?#?)[ \t]*/, "", text);  # not working in mawk
+      text = substr(text, n + 1); sub(/^[ \t]*/, "", text);
+      sub(/[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n.*)?$/, "", text);
+      sub(/^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*[ \t]*\{/, "", attrib);
+      sub(/\}(\n.*)?$/, "", attrib);
+      gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib);
+
+      ret = ret headline( n, text, attrib ) ; block = substr( block, len + 1);
+      continue;
+
+    # Nth Order Heading H1 H2 H3 H4 H5 H6
+    } else if ( match( block, /^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*(\n|$)/ ) ) {
+      len = RLENGTH; text = substr(block, 1, len);
+      match(block, /^##?#?#?#?#?[^#]/); n = RLENGTH - 1;
+      # sub(/^(##?#?#?#?#?)[ \t]+/, "", text);  # not working in mawk
+      text = substr(text, n + 1); sub(/^[ \t]*/, "", text);
+      sub(/[ \t]*#*(\n.*)?$/, "", text);
+
+      ret = ret headline( n, text, 0 ) ; block = substr( block, len + 1);
+      continue;
+
+    # block images (wrapped in <figure>)
+    } else if ( match(block, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n|$)") ) {
+      len = RLENGTH; text = href = title = attrib = substr( block, 1, len);
+
+      sub("^!\\[", "", text);
+      sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", text);
+
+      sub("^!" lix "\\([\n\t ]*", "", href);
+      sub("([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", href);
+
+      sub("^!" lix "\\([\n\t ]*" lid, "", title);
+      sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", title);
+      sub("^[\n\t ]+", "", title);
+
+      sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib);
+      sub("(\n.*)?$", "", attrib);
+      sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib);
+
+      if ( match(href, /^<.*>$/) ) { sub(/^</, "", href); sub(/>$/, "", href); }
+           if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); }
+      else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); }
+      else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); }
+
+      gsub(/^[\t ]+$/, "", text); gsub(/\\/, "", href);
+
+      ret = ret "<figure data-src=\"" HTML(href) "\"" (attrib?" class=\"" HTML(attrib) "\"":"") ">" \
+             "<img src=\"" HTML(href) "\" alt=\"" HTML(text?text:title?title:href) "\"" \
+             (attrib?" class=\"" HTML(attrib) "\"":"") ">" \
+             (title?"<figcaption>" inline(title) "</figcaption>":"") \
+             "</figure>\n\n";
+      block = substr( block, len + 1);
+      continue;
+
+    } else if ( match(block, /^!\[([^]]*)\] ?\[([^]]*)\](\n|$)/ ) ) {
+      len = RLENGTH; text = id = block;
+      sub(/(\n.*)?$/, "", text); sub( /^!\[/, "", text); sub(/\] ?\[([^\n]*)\]$/, "", text);
+      sub(/(\n.*)?$/, "",   id); sub( /^!\[([^\n]*)\] ?\[/, "",   id); sub(/\]$/, "",   id);
+      # text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\1", 1, block);
+      #   id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\2", 1, block);
+      if ( ! id ) id = text;
+      if ( rl_href[id] && rl_title[id] ) {
+        ret = ret "<figure data-src=\"" HTML(rl_href[id]) "\">" \
+                 "<img src=\"" HTML(rl_href[id]) "\" alt=\"" HTML(text) "\">" \
+                 "<figcaption>" inline(rl_title[id]) "</figcaption>" \
+               "</figure>\n\n";
+        block = substr( block, len + 1);
+        continue;
+
+      } else if ( rl_href[id] ) {
+        ret = ret "<figure data-src=\"" HTML(rl_href[id]) "\">" \
+                 "<img src=\"" HTML(rl_href[id]) "\" alt=\"" HTML(text) "\">" \
+               "</figure>\n\n";
+        block = substr( block, len + 1);
+        continue;
+      } else {
+        ret = ret "<p>" HTML(substr(block, 1, len)) "</p>\n" ; block = substr(block, len + 1);
+        continue;
+      }
+
+    # Macros (standalone <<macro>> calls handled as block, so they are not wrapped in paragraph)
+    } else if ( match( block, /^<<(([^>]|>[^>])+)>>(\n|$)/ ) ) {
+      len = RLENGTH; text = block;
+      sub(/^<</, "", text); sub(/>>(\n.*)?$/, "", text);
+      # text = gensub(/^<<(([^>]|>[^>])+)>>(\n.*)?$/, "\\1", 1, block);
+      ret = ret "<code class=\"macro\">" HTML(text) "</code>" ; block = substr(block, len + 1);
+      continue;
+
+    # Definition list
+    } else if (match( block, "^(([ \t]*\n)*[^:\n \t][^\n]+\n" \
+                             "([ \t]*\n)* ? ? ?:[ \t][^\n]+(\n|$)" \
+                            "(([ \t]*\n)* ? ? ?:[ \t][^\n]+(\n|$)" \
+                             "|[^:\n \t][^\n]+(\n|$)" \
+                             "|( ? ? ?\t|  +)[^\n]+(\n|$)" \
+                             "|([ \t]*\n)+( ? ? ?\t|  +)[^\n]+(\n|$))*)+" \
+    )) {
+      list = substr( block, 1, RLENGTH); block = substr( block, RLENGTH + 1);
+      ret = ret "<dl>\n" _dlist( list ) "</dl>\n";
+      continue;
+
+    # Unordered list types
+    } else if ( text = _startlist( block, "ul", "-",   "([+*â¢]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) {
+      return ret text;
+    } else if ( text = _startlist( block, "ul", "\\+", "([-*â¢]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) {
+      return ret text;
+    } else if ( text = _startlist( block, "ul", "\\*", "([-+â¢]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) {
+      return ret text;
+    } else if ( text = _startlist( block, "ul", "â¢", "([-+*]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) {
+      return ret text;
+
+    # Ordered list types
+    } else if ( text = _startlist( block, "ol", "[0-9]+\\.", "([-+*â¢]|#\\.|[0-9]+\\)|#\\))") ) {
+      return ret text;
+    } else if ( text = _startlist( block, "ol", "[0-9]+\\)", "([-+*â¢]|[0-9]+\\.|#\\.|#\\))") ) {
+      return ret text;
+    } else if ( text = _startlist( block, "ol", "#\\.", "([-+*â¢]|[0-9]+\\.|[0-9]+\\)|#\\))") ) {
+      return ret text;
+    } else if ( text = _startlist( block, "ol", "#\\)", "([-+*â¢]|[0-9]+\\.|#\\.|[0-9]+\\))") ) {
+      return ret text;
+
+    # Split paragraphs
+    } else if ( match( block, /(^|\n)[[:space:]]*(\n|$)/) ) {
+      len = RLENGTH; st = RSTART;
+      ret = ret _block( substr(block, 1, st - 1) ) "\n"; block = substr(block, st + len);
+      continue;
+
+    # Horizontal rule
+    # } else if ( match( block, /(^|\n) ? ? ?((\* *){3,}|(- *){3,}|(_ *){3,})($|\n)/) ) {
+    } else if ( match( block, /(^|\n) ? ? ?((\* *)(\* *)(\* *)(\* *)*|(- *)(- *)(- *)(- *)*|(_ *)(_ *)(_ *)(_ *)*)($|\n)/) ) {
+      len = RLENGTH; st = RSTART;
+      ret = ret _block(substr(block, 1, st - 1)) "<hr>\n"; block = substr(block, st + len);
+      continue;
+
+    # Plain paragraph
     } else {
-      return "<p>" HTML(substr(block, 1, len)) "</p>\n" _block( substr(block, len + 1) );
+      return ret "<p>" inline(block) "</p>\n";
     }
-
-  # Macros (standalone <<macro>> calls handled as block, so they are not wrapped in paragraph)
-  } else if ( match( block, /^<<(([^>]|>[^>])+)>>(\n|$)/ ) ) {
-    len = RLENGTH; text = block;
-    sub(/^<</, "", text); sub(/>>(\n.*)?$/, "", text);
-    # text = gensub(/^<<(([^>]|>[^>])+)>>(\n.*)?$/, "\\1", 1, block);
-    return "<code class=\"macro\">" HTML(text) "</code>" _block(substr(block, len + 1) );
-
-  # Definition list
-  } else if (match( block, "^(([ \t]*\n)*[^:\n \t][^\n]+\n" \
-                           "([ \t]*\n)* ? ? ?:[ \t][^\n]+(\n|$)" \
-                          "(([ \t]*\n)* ? ? ?:[ \t][^\n]+(\n|$)" \
-                           "|[^:\n \t][^\n]+(\n|$)" \
-                           "|( ? ? ?\t|  +)[^\n]+(\n|$)" \
-                           "|([ \t]*\n)+( ? ? ?\t|  +)[^\n]+(\n|$))*)+" \
-  )) {
-    list = substr( block, 1, RLENGTH); block = substr( block, RLENGTH + 1);
-    return "<dl>\n" _dlist( list ) "</dl>\n" _block( block );
-
-  # Unordered list types
-  } else if ( text = _startlist( block, "ul", "-",   "([+*â¢]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) {
-    return text;
-  } else if ( text = _startlist( block, "ul", "\\+", "([-*â¢]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) {
-    return text;
-  } else if ( text = _startlist( block, "ul", "\\*", "([-+â¢]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) {
-    return text;
-  } else if ( text = _startlist( block, "ul", "â¢", "([-+*]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) {
-    return text;
-
-  # Ordered list types
-  } else if ( text = _startlist( block, "ol", "[0-9]+\\.", "([-+*â¢]|#\\.|[0-9]+\\)|#\\))") ) {
-    return text;
-  } else if ( text = _startlist( block, "ol", "[0-9]+\\)", "([-+*â¢]|[0-9]+\\.|#\\.|#\\))") ) {
-    return text;
-  } else if ( text = _startlist( block, "ol", "#\\.", "([-+*â¢]|[0-9]+\\.|[0-9]+\\)|#\\))") ) {
-    return text;
-  } else if ( text = _startlist( block, "ol", "#\\)", "([-+*â¢]|[0-9]+\\.|#\\.|[0-9]+\\))") ) {
-    return text;
-
-  # Split paragraphs
-  } else if ( match( block, /(^|\n)[[:space:]]*(\n|$)/) ) {
-    len = RLENGTH; st = RSTART;
-    return _block( substr(block, 1, st - 1) ) "\n" \
-           _block( substr(block, st + len) );
-
-  # Horizontal rule
-  # } else if ( match( block, /(^|\n) ? ? ?((\* *){3,}|(- *){3,}|(_ *){3,})($|\n)/) ) {
-  } else if ( match( block, /(^|\n) ? ? ?((\* *)(\* *)(\* *)(\* *)*|(- *)(- *)(- *)(- *)*|(_ *)(_ *)(_ *)(_ *)*)($|\n)/) ) {
-    len = RLENGTH; st = RSTART;
-    return _block(substr(block, 1, st - 1)) "<hr>\n" _block(substr(block, st + len));
-
-  # Plain paragraph
-  } else {
-    return "<p>" inline(block) "</p>\n";
   }
+  return ret;
 }
 
 function _startlist(block, type, mark, exclude, LOCAL, st, len, list, indent, it, text) {
-- 
2.39.5


From 98df52bf69b6f2d838264902de21369013602102 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Paul=20H=C3=A4nsch?= <paul@plutz.net>
Date: Fri, 30 Aug 2024 19:18:04 +0200
Subject: [PATCH 08/16] modify regexes for use in mawk

---
 markdown.awk | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/markdown.awk b/markdown.awk
index fc2f203..6e3febe 100755
--- a/markdown.awk
+++ b/markdown.awk
@@ -701,23 +701,25 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code,
       ret = ret headline(2, text, 0) ; block = substr( block, len + 1);
       continue;
 
-    # Nth Order Heading H1 H2 H3 H4 H5 H6 + Attrib
-    } else if ( match( block, /^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n|$)/ ) ) {
+    # # Nth Order Heading H1 H2 H3 H4 H5 H6 + Attrib
+    # } else if ( match( block, /^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*[ \t]*\{[a-zA-Z \t-]*\}(\n|$)/ ) ) {
+    } else if ( match( block, /^##?#?#?#?#?[^#\n]([^\n#]|#[^\t\n# ]|#[\t ]+[^\t\n ])+#*[\t ]*\{[\ta-zA-Z -]*\}(\n|$)/ ) ) {
       len = RLENGTH; text = attrib = substr(block, 1, len);
       match(block, /^##?#?#?#?#?[^#]/); n = RLENGTH - 1;
-
       # sub(/^(##?#?#?#?#?)[ \t]*/, "", text);  # not working in mawk
       text = substr(text, n + 1); sub(/^[ \t]*/, "", text);
       sub(/[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n.*)?$/, "", text);
-      sub(/^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*[ \t]*\{/, "", attrib);
+
+      sub(/^##?#?#?#?#?[^#\n]([^\n#]|#[^\t\n# ]|#[\t ]+[^\t\n ])+#*[\t ]*\{/, "", attrib);
       sub(/\}(\n.*)?$/, "", attrib);
       gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib);
 
-      ret = ret headline( n, text, attrib ) ; block = substr( block, len + 1);
+      ret = ret headline( n, text, attrib ); block = substr( block, len + 1);
       continue;
 
     # Nth Order Heading H1 H2 H3 H4 H5 H6
-    } else if ( match( block, /^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*(\n|$)/ ) ) {
+    # } else if ( match( block, /^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*(\n|$)/ ) ) {
+    } else if ( match( block, /^##?#?#?#?#?[^#\n]([^\n#]|#[^\t\n# ]|#[\t ]+[^\t\n ])+#*(\n|$)/ ) ) {
       len = RLENGTH; text = substr(block, 1, len);
       match(block, /^##?#?#?#?#?[^#]/); n = RLENGTH - 1;
       # sub(/^(##?#?#?#?#?)[ \t]+/, "", text);  # not working in mawk
@@ -854,10 +856,12 @@ function _startlist(block, type, mark, exclude, LOCAL, st, len, list, indent, it
                                    "|[^\n \t][^\n]+(\n|$))*" ) ) {
     st = RSTART; len = RLENGTH; list = substr( block, st, len);
 
-    sub("^\n", "", list); match(list, "^ ? ? ?"); indent = RLENGTH;
-    it = ""; while ( indent > 0 ) { it = it " ?"; indent--; }
+    sub("^\n", "", list); match(list, "^(   |  | |)"); indent = RLENGTH;
     # gsub( "(^|\n) {0," indent "}", "\n", list); sub("^\n", "", list);
-    gsub( "(^|\n)" it, "\n", list); sub("^\n", "", list);
+    # emulate greedy range matcher for mawk
+    it = "("; while ( indent > 0 ) { for (k = indent; k > 0; k--) { it = it " "; } it = it "|"; indent--; }
+    sub(/\|$/, ")?", it); sub(/^\($/, "", it);
+    gsub( "(^|\n)" it, "\n", list ); sub("^\n", "", list);
 
     text = substr(block, 1, st - 1); block = substr(block, st + len);
     if (match(text, /\n[[:space:]]*\n/)) return 0;
@@ -875,7 +879,6 @@ function _list (block, mark, p, LOCAL, len, st, text, indent, it, task) {
   if ( match(block, "^([ \t]*\n)*$")) return;
 
   match(block, "^" mark "[ \t]"); indent = RLENGTH;
-  it = ""; while ( indent > 0 ) { it = it " ?"; indent--; }
 
   sub("^" mark "[ \t]", "", block);
 
@@ -886,6 +889,9 @@ function _list (block, mark, p, LOCAL, len, st, text, indent, it, task) {
   text = substr(block, 1, st); block = substr(block, st + 1);
 
   # gsub("\n {0," indent "}", "\n", text);
+  # emulate greedy range matcher for mawk
+  it = "("; while ( indent > 0 ) { for (k = indent; k > 0; k--) { it = it " "; } it = it "|"; indent--; }
+  sub(/\|$/, ")?", it); sub(/^\($/, "", it);
   gsub("\n" it, "\n", text);
 
   task = match( text, /^\[ \]/   ) ? "<li class=\"task pending\"><input type=checkbox disabled>"      : \
@@ -915,9 +921,11 @@ function _dlist (block, LOCAL, len, st, text, indent, it, p) {
     len = RLENGTH; text = substr(block, 1, len);
     sub( "^([ \t]*\n)*", "", text);
     match(text, "^ ? ? ?:(\t| +)"); indent = RLENGTH;
-    it = ""; while ( indent > 0 ) { it = it " ?"; indent--; }
     sub( "^ ? ? ?:(\t| +)", "", text);
     # gsub( "(^|\n) {0," indent "}", "\n", text );
+    # emulate greedy range matcher for mawk
+    it = "("; while ( indent > 0 ) { for (k = indent; k > 0; k--) { it = it " "; } it = it "|"; indent--; }
+    sub(/\|$/, ")?", it); sub(/^\($/, "", it);
     gsub( "(^|\n)" it, "\n", text );
 
     text = _nblock(text);
-- 
2.39.5


From 42028f563c652ea0cbdd069940b09e58771ea4e5 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Paul=20H=C3=A4nsch?= <paul@plutz.net>
Date: Fri, 30 Aug 2024 21:42:54 +0200
Subject: [PATCH 09/16] improved tests for nested emphasis

---
 tests-markdown.sh | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests-markdown.sh b/tests-markdown.sh
index ff56c02..2bbcf5d 100755
--- a/tests-markdown.sh
+++ b/tests-markdown.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-runtimes="gawk bawk goawk"
+runtimes="gawk bawk mawk goawk"
 
 BR='
 '
@@ -54,6 +54,13 @@ assert '***strem***' '<p><strong><em>strem</em></strong></p>' "strong em"
 assert '***str**em*' '<p><em><strong>str</strong>em</em></p>' "em strong"
 assert '_**strem**_' '<p><em><strong>strem</strong></em></p>' "em strong"
 
+assert '*foo**str**bar**str**qua*' '<p><em>foo<strong>str</strong>bar<strong>str</strong>qua</em></p>' 'em strong asterisk'
+assert '**foo*em*bar*em*qua**' '<p><strong>foo<em>em</em>bar<em>em</em>qua</strong></p>' 'strong em asterisk'
+
+assert '_foo__str__bar__str__qua_' '<p><em>foo__str__bar__str__qua</em></p>' 'em embedded underscore'
+assert '__foo_em_bar_em_qua__' '<p><strong>foo_em_bar_em_qua</strong></p>' 'strong embedded underscore'
+assert '_**str**foo**str**_' '<p><em><strong>str</strong>foo<strong>str</strong></em></p>' 'em strong mixed'
+
 assert '_foo_-> bar' '<p><em>foo</em>&rarr; bar</p>' 'arrow'
 assert '`_foo_-> bar`' '<p><code>_foo_-&gt; bar</code></p>' 'arrow'
 assert '<!-- comment --> <- comment' '<p>&lt;!-- comment --&gt; &larr; comment</p>' 'arrow'
-- 
2.39.5


From 898d470f90e4055d0bcfe616bc009cca8d7f5692 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Paul=20H=C3=A4nsch?= <paul@plutz.net>
Date: Fri, 30 Aug 2024 21:59:59 +0200
Subject: [PATCH 10/16] optimized emphasis regex for performance in mawk

---
 markdown.awk | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/markdown.awk b/markdown.awk
index 6e3febe..6e1440c 100755
--- a/markdown.awk
+++ b/markdown.awk
@@ -307,6 +307,7 @@ function inline( line, LOCAL, len, text, code, href, guard, ret ) {
       ret = ret HTML(substr( line, 1, RLENGTH)); line = substr(line, RLENGTH + 1);
       continue;
 
+    # strong / em matchers use pre match pattern to make processing cheaper
     #  __strong__$
     } else if ( match(line, "^__(([^_[:space:]]|" ieu ")|([^_[:space:]]|" ieu ")(" nu "|" ieu ")*([^_[:space:]]|" ieu "))__$") ) {
       len = RLENGTH;
@@ -320,7 +321,7 @@ function inline( line, LOCAL, len, text, code, href, guard, ret ) {
       continue;
 
     #  **strong**
-    } else if ( match(line, "^\\*\\*(([^\\*[:space:]]|" iea ")|([^\\*[:space:]]|" iea ")(" na "|" iea ")*([^\\*[:space:]]|" iea "))\\*\\*") ) {
+    } else if ( match(line, "^\\*\\*(([^*[:space:]]|" iea ")|([^*[:space:]]|" iea ")(" na "|" iea ")*([^*[:space:]]|" iea "))\\*\\*") ) {
       len = RLENGTH;
       ret = ret "<strong>" inline( substr( line, 3, len - 4 ) ) "</strong>"; line = substr( line, len + 1 );
       continue;
@@ -338,7 +339,7 @@ function inline( line, LOCAL, len, text, code, href, guard, ret ) {
       continue;
 
     #  *em*
-    } else if ( match(line, "^\\*(([^\\*[:space:]]|" isa ")|([^\\*[:space:]]|" isa ")(" na "|" isa ")*([^\\*[:space:]]|" isa "))\\*") ) {
+    } else if ( match(line, "^\\*(([^*[:space:]]|" isa ")|([^*[:space:]]|" isa ")(" na "|" isa ")*([^*[:space:]]|" isa "))\\*") ) {
       len = RLENGTH;
       ret = ret "<em>" inline( substr( line, 2, len - 2 ) ) "</em>"; line = substr( line, len + 1 );
       continue;
@@ -944,12 +945,12 @@ BEGIN {
   # hls = "0 0 0 0 0 0";
 
   # Universal Patterns
-  nu = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\_]|_[[:alnum:]])*"    # not underline (except when escaped)
-  na = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\\\*])*"  # not asterisk (except when escaped)
-  ieu =  "_([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])_"                 # inner <em> (underline)
-  isu = "__([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])__"                # inner <strong> (underline)
-  iea =    "\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*"     # inner <em> (asterisk)
-  isa = "\\*\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*\\*"  # inner <strong> (asterisk)
+  nu = "([^_\\\\]|\\\\.|_[[:alnum:]])"  # not underline (except when escaped, or inside a word)
+  na = "([^*\\\\]|\\\\.)"               # not asterisk (except when escaped)
+  ieu =  "_([^_[:space:]]|[^_[:space:]]" nu "*[^_[:space:]])_"                 # inner <em> (underline)
+  isu = "__([^_[:space:]]|[^_[:space:]]" nu "*[^_[:space:]])__"                # inner <strong> (underline)
+  iea =    "\\*([^*[:space:]]|[^*[:space:]]" na "*[^*[:space:]])\\*"     # inner <em> (asterisk)
+  isa = "\\*\\*([^*[:space:]]|[^*[:space:]]" na "*[^*[:space:]])\\*\\*"  # inner <strong> (asterisk)
 
   lix="\\[(\\\\[^\n]|[^]\n\\\\[])*\\]"  # link text
   lid="(<(\\\\[^\n]|[^\n<>\\\\])*>|(\\\\.|[^()\"'\\\\])+|([^<\n\t ()\\\\]|\\\\[^\n])(\\\\[\n]|[^\n\t \\(\\)\\\\])*)"  # link dest
-- 
2.39.5


From 9b019ee3d845d1edac5a6a33ebf385b120f136f8 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Paul=20H=C3=A4nsch?= <paul@plutz.net>
Date: Sat, 31 Aug 2024 19:24:38 +0200
Subject: [PATCH 11/16] simpler reex for matching email links (again, for
 portability )

---
 markdown.awk      | 2 +-
 tests-markdown.sh | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/markdown.awk b/markdown.awk
index 6e1440c..ad8c438 100755
--- a/markdown.awk
+++ b/markdown.awk
@@ -175,7 +175,7 @@ function inline( line, LOCAL, len, text, code, href, guard, ret ) {
 
     # quick link email
     # } else if ( match( line, /^<[a-zA-Z0-9.!#$%&'\''*+\/=?^_`{|}~-]+@[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>/ ) ) {
-    } else if ( match( line, /^<[a-zA-Z0-9.!#$%&'\''*+\/=?^_`{|}~-]+@[a-zA-Z0-9]([a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9-]?[a-zA-Z0-9])?)*>/ ) ) {
+    } else if ( match( line, /^<[a-zA-Z0-9.!#$%&'\''*+\/=?^_`{|}~-]+@([a-zA-Z0-9]\.[a-zA-Z0-9]|[a-zA-Z0-9-])+>/ ) ) {
       len = RLENGTH;
       href = HTML( substr( line, 2, len - 2) );
       ret = ret "<a href=\"mailto:" href "\">" href "</a>"; line = substr( line, len + 1);
diff --git a/tests-markdown.sh b/tests-markdown.sh
index 2bbcf5d..a834391 100755
--- a/tests-markdown.sh
+++ b/tests-markdown.sh
@@ -80,6 +80,9 @@ assert '<https://de.wikipedia.org>' "<p><a href=\"https://de.wikipedia.org\">htt
 assert '<http://de.wikipedia.org>' "<p><a href=\"http://de.wikipedia.org\">http://de.wikipedia.org</a></p>" "automatic link"
 # assert '<//de.wikipedia.org>' "<p><a href=\"//de.wikipedia.org\">http://de.wikipedia.org</a></p>" "automatic link"
 
+assert '<hello&goodbye@sub-test.example.com>' "<p><a href=\"mailto:hello&amp;goodbye@sub-test.example.com\">hello&amp;goodbye@sub-test.example.com</a></p>" "automatic link, email"
+# assert '<hÃ¤llÃ¶&guttbei@sub-test.example.com>' "<p><a href=\"mailto:hÃ¤llÃ¶&amp;guttbei@sub-test.example.com\">hÃ¤llÃ¶&amp;guttbei@sub-test.example.com</a></p>" "automatic link, email"
+
 # Inline Links
 assert '[Wikipedia](http://de.wikipedia.org)' "<p><a href=\"http://de.wikipedia.org\">Wikipedia</a></p>" "inline link"
 assert '[Wikipedia](http://de.wikipedia.org "Online Encyclopedia")' "<p><a href=\"http://de.wikipedia.org\" title=\"Online Encyclopedia\">Wikipedia</a></p>" "inline link"
-- 
2.39.5


From 32b4555b66c086805df962e68070956c4c1780d7 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Paul=20H=C3=A4nsch?= <paul@plutz.net>
Date: Sat, 31 Aug 2024 21:53:16 +0200
Subject: [PATCH 12/16] split/modify some regexes for compatibility with old
 mawk implementations

---
 markdown.awk | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/markdown.awk b/markdown.awk
index ad8c438..356a987 100755
--- a/markdown.awk
+++ b/markdown.awk
@@ -347,7 +347,11 @@ function inline( line, LOCAL, len, text, code, href, guard, ret ) {
     # Literal HTML entities
     # } else if ( match( line, /^&([a-zA-Z]{2,32}|#[0-9]{1,7}|#[xX][0-9a-fA-F]{1,6});/) ) {
     # mawk does not support repitition ranges
-    } else if ( match( line, /^&([a-zA-Z][a-zA-Z][a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?|#[0-9][0-9]?[0-9]?[0-9]?[0-9]?[0-9]?[0-9]?|#[xX][0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?);/) ) {
+    } else if ( match( line, /^&[a-zA-Z][a-zA-Z][a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?;/) ) {
+      len = RLENGTH;
+      ret = ret substr( line, 1, len ); line = substr(line, len + 1);
+      continue;
+    } else if ( match( line, /^&(#[0-9][0-9]?[0-9]?[0-9]?[0-9]?[0-9]?[0-9]?|#[xX][0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?);/) ) {
       len = RLENGTH;
       ret = ret substr( line, 1, len ); line = substr(line, len + 1);
       continue;
@@ -425,8 +429,14 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code,
       ret = ret _block(substr(block, 1, st - 1)) substr(block, st, len); block = substr(block, st + len);
       continue;
 
-    # HTML #6
-    } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<\/?(address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)([[:space:]\n>]|\/>)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) {
+    # HTML #6 (part1)
+    } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<\/?(address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset)([[:space:]\n>]|\/>)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) {
+      len = RLENGTH; st = RSTART;
+      ret = ret _block(substr(block, 1, st - 1)) substr(block, st, len); block = substr(block, st + len);
+      continue;
+
+    # HTML #6 (part2)
+    } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<\/?(h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)([[:space:]\n>]|\/>)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) {
       len = RLENGTH; st = RSTART;
       ret = ret _block(substr(block, 1, st - 1)) substr(block, st, len); block = substr(block, st + len);
       continue;
@@ -516,8 +526,7 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code,
                              "((\\|([^\n]+\\|)+\n)+" \
                                "\\+(-+\\+)+(\n|$))+" \
                      ) || \
-                match( block, "^()()()" \
-                              "(\\+(:?-+:?\\+)+)\n" \
+                match( block, "^(\\+(:?-+:?\\+)+)\n" \
                              "((\\|([^\n]+\\|)+\n)+" \
                                "\\+(-+\\+)+(\n|$))+" \
     ) ) {
@@ -857,7 +866,7 @@ function _startlist(block, type, mark, exclude, LOCAL, st, len, list, indent, it
                                    "|[^\n \t][^\n]+(\n|$))*" ) ) {
     st = RSTART; len = RLENGTH; list = substr( block, st, len);
 
-    sub("^\n", "", list); match(list, "^(   |  | |)"); indent = RLENGTH;
+    sub("^\n", "", list); match(list, "^(   |  | )?"); indent = RLENGTH;
     # gsub( "(^|\n) {0," indent "}", "\n", list); sub("^\n", "", list);
     # emulate greedy range matcher for mawk
     it = "("; while ( indent > 0 ) { for (k = indent; k > 0; k--) { it = it " "; } it = it "|"; indent--; }
-- 
2.39.5


From 290cc67245726ca72bb8450162cb2adc5a8c78d1 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Paul=20H=C3=A4nsch?= <paul@plutz.net>
Date: Wed, 9 Oct 2024 07:10:39 +0200
Subject: [PATCH 13/16] bugfix: avoid endless loops for some incomplete matches

---
 markdown.awk      | 15 +++++++--------
 tests-markdown.sh | 13 +++++++++++--
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/markdown.awk b/markdown.awk
index 356a987..bef97d1 100755
--- a/markdown.awk
+++ b/markdown.awk
@@ -372,11 +372,10 @@ function inline( line, LOCAL, len, text, code, href, guard, ret ) {
       ret = ret HTML(substr(line, 1, 1)); line = substr(line, 2);
       continue;
 
-    #  continue walk over string
-    } else {
-      ret = ret substr(line, 1, 1); line = substr(line, 2);
-      continue;
-    }
+    }  # inline patterns end
+
+    # continue walk over string
+    ret = ret substr(line, 1, 1); line = substr(line, 2);
   }
   return ret;
 }
@@ -851,10 +850,10 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code,
       ret = ret _block(substr(block, 1, st - 1)) "<hr>\n"; block = substr(block, st + len);
       continue;
 
+    }  # block patterns end
+
     # Plain paragraph
-    } else {
-      return ret "<p>" inline(block) "</p>\n";
-    }
+    return ret "<p>" inline(block) "</p>\n";
   }
   return ret;
 }
diff --git a/tests-markdown.sh b/tests-markdown.sh
index a834391..6df4224 100755
--- a/tests-markdown.sh
+++ b/tests-markdown.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-runtimes="gawk bawk mawk goawk"
+runtimes="gawk busybox mawk goawk"
 
 BR='
 '
@@ -9,7 +9,7 @@ fail() { printf '%s\n' "$@"; exit 1; }
 
 awk() { /bin/awk "$@"; }
 md_gawk() { gawk -f markdown.awk "$@"; }
-md_bawk() { busybox awk -f markdown.awk "$@"; }
+md_busybox() { busybox awk -f markdown.awk "$@"; }
 md_mawk() { mawk -f markdown.awk "$@"; }
 md_goawk() { goawk -f markdown.awk "$@"; }
 
@@ -45,6 +45,7 @@ assert '~sub~' '<p><sub>sub</sub></p>' "subscript"
 
 assert "foo  ${BR}bar" "<p>foo<br>${BR}bar</p>" 'double space line break'
 assert '```&copy;```' "<p><code>&amp;copy;</code></p>" "code span escape"
+assert '````' "<pre><code>````</code></pre>" "empty code span"
 
 assert '_emphasized text_' '<p><em>emphasized text</em></p>' "em"
 assert '_emphasized_text_' '<p><em>emphasized_text</em></p>' "em"
@@ -190,6 +191,14 @@ not be
 but &amp;shy; &lt;escaped&gt;</code></pre>' \
 "fenced code block"
 
+assert 'foobar
+````
+foobar' \
+'<p>foobar
+````
+foobar</p>' \
+"Open Fence"
+
 # Block Images
 assert '![Testbild](Test Bild.jpg)' \
 '<figure data-src="Test Bild.jpg"><img src="Test Bild.jpg" alt="Testbild"></figure>' \
-- 
2.39.5


From ac4031bc57b6ae56ecaba5af2b0f9cfd5a13ceb2 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Paul=20H=C3=A4nsch?= <paul@plutz.net>
Date: Sun, 6 Apr 2025 12:03:07 +0200
Subject: [PATCH 14/16] allow `check` function for field indexes

---
 db23.sh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/db23.sh b/db23.sh
index e8a0d64..8ee6f4f 100755
--- a/db23.sh
+++ b/db23.sh
@@ -31,8 +31,12 @@ DB2() {
     open|load) file="$1"
       cat "$file" || return 1
       ;;
-    check|contains) key="$(STRING "$1")" val=''
+    check|contains) key="$(STRING "$1")"  seq="${2:-1}"
       val="${data##*"${BR}${key}"	}" val="${val%%"${BR}"*}"
+      [ "$val" = '' ] && return 1 || val="${val}	"
+      while [ $seq -gt 1 ]; do
+        seq=$((seq - 1)) val="${val#*	}"
+      done
       [ "$val" = '' ] && return 1
       ;;
     count) key="$(STRING "$1")" val='' seq=0
-- 
2.39.5


From b8dae070a1652d8a46a8eb81eaf4fc82a1a3b8de Mon Sep 17 00:00:00 2001
From: =?utf8?q?Paul=20H=C3=A4nsch?= <paul@plutz.net>
Date: Mon, 5 May 2025 11:39:14 +0200
Subject: [PATCH 15/16] updated copyright info

---
 markdown.awk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/markdown.awk b/markdown.awk
index bef97d1..90dddbf 100755
--- a/markdown.awk
+++ b/markdown.awk
@@ -5,7 +5,7 @@
 # Meant to support all features of John Grubers basic Markdown
 # + a number of common extensions, mostly inspired by Pandoc Markdown
 
-# Copyright 2021 - 2023 Paul HÃ¤nsch
+# Copyright 2021 - 2024 Paul HÃ¤nsch
 # 
 # Permission to use, copy, modify, and/or distribute this software for any
 # purpose with or without fee is hereby granted, provided that the above
-- 
2.39.5


From 0c39114d102cb1d523b2b7b63f7d9caba4cf441c Mon Sep 17 00:00:00 2001
From: =?utf8?q?Paul=20H=C3=A4nsch?= <paul@plutz.net>
Date: Mon, 26 May 2025 21:30:20 +0200
Subject: [PATCH 16/16] bugfix: prevent content duplication when deleting
 nonexist key

---
 db23.sh | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/db23.sh b/db23.sh
index 8ee6f4f..b7ab548 100755
--- a/db23.sh
+++ b/db23.sh
@@ -65,8 +65,12 @@ DB2() {
     delete|remove) key="$(STRING "$1")"
       val="${data#*"${BR}${key}"	*"${BR}"}"
       key="${data%"${BR}${key}"	*"${BR}"*}"
-      [ "${key}${BR}${val}" = "${data}" ] && return 1
-      printf '%s' "${key#"${BR}"}${BR}${val%"${BR}"}"
+      if [ "${val}" = "${data}" ]; then
+        printf %s\\n "${data}"
+        return 1
+      else
+        printf '%s' "${key#"${BR}"}${BR}${val%"${BR}"}"
+      fi
       ;;
     set|store) key="$(STRING "$1")" val=""
       shift 1
-- 
2.39.5


Col 1 -	Col 2 +
Col 1\ +	Col\|2	Col 3
" inline(tarray[cnt]) "
" inline(tarray[cnt]) "