X-Git-Url: http://git.plutz.net/?a=blobdiff_plain;f=markdown.awk;h=6143b73dea4ccd07e4262867480152d4ad85b028;hb=211f2eec02232cfeab5fa2865a8bc5123e17a20f;hp=57a04e986703b122bc1d0f2a629ab0c03cf792cf;hpb=8e79399f2cf87e6d8715b7daeb62ee39f3436a6b;p=cgilite diff --git a/markdown.awk b/markdown.awk index 57a04e9..6143b73 100755 --- a/markdown.awk +++ b/markdown.awk @@ -5,6 +5,20 @@ # Meant to support all features of John Grubers basic Markdown # + a number of common extensions, mostly inspired by Pandoc Markdown +# Copyright 2021 - 2023 Paul Hänsch +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + # Supported Features / TODO: # ========================== # [x] done [ ] todo [-] not planned ? unsure @@ -277,9 +291,9 @@ function inline( line, LOCAL, len, text, code, href, guard ) { return "" inline( substr( line, 2, len - 2 ) ) "" inline( substr( line, len + 1 ) ); # Macros - } else if ( AllowMacros && match( line, /^<<([^>]|>[^>])+>>/) ) { + } else if ( match( line, /^<<([^>]|>[^>])+>>/ ) ) { len = RLENGTH; - return macro( substr( line, 3, len - 4 ) ) inline(substr(line, len + 1)); + return "" HTML( substr( line, 3, len - 4 ) ) "" inline(substr(line, len + 1)); # Verbatim inline HTML } else if ( AllowHTML && match( line, /^(|<\?([^\?]|\?[^>])*\?>|]*>|])*\]\]>|<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)/) ) { @@ -388,7 +402,7 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, # Pipe Tables (pandoc / php md / gfm ) } else if ( match(block, "^((\\|)?([^\n]+\\|)+[^\n]+(\\|)?)\n" \ - "((\\|)?:?(-+:?[\\|+])+:?-+:?(\\|)?)\n" \ + "((\\|)?(:?-+:?[\\|+])+:?-+:?(\\|)?)\n" \ "((\\|)?([^\n]+\\|)+[^\n]+(\\|)?(\n|$))+" ) ) { len = RLENGTH; st = RSTART; #initialize empty arrays @@ -674,10 +688,10 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, } # Macros (standalone <> calls handled as block, so they are not wrapped in paragraph) - } else if ( AllowMacros && match( block, /^<<(([^>]|>[^>])+)>>(\n|$)/) ) { + } else if ( match( block, /^<<(([^>]|>[^>])+)>>(\n|$)/ ) ) { len = RLENGTH; text = gensub(/^<<(([^>]|>[^>])+)>>(\n.*)?$/, "\\1", 1, block); - return macro(text) _block(substr(block, len + 1) ); + return "" HTML(text) "" _block(substr(block, len + 1) ); # Definition list } else if (match( block, "^(([ \t]*\n)*[^:\n \t][^\n]+\n" \ @@ -690,39 +704,25 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, list = substr( block, 1, RLENGTH); block = substr( block, RLENGTH + 1); return "\n
\n" _dlist( list ) "
\n" _block( block ); - # Unordered list - } else if ( match( block, "(^|\n) ? ? ?[-+*][ \t][^\n]+(\n|$)" \ - "(([ \t]*\n)* ? ? ?[-+*][ \t][^\n]+(\n|$)" \ - "|([ \t]*\n)*( ? ? ?\t| +)[^\n]+(\n|$)" \ - "|[^\n \t][^\n]+(\n|$))*" ) ) { - st = RSTART; len = RLENGTH; list = substr( block, RSTART, RLENGTH); - sub("^\n", "", list); match(list, "^ ? ? ?[-+*]"); indent = RLENGTH; - gsub( "(^|\n) {0," indent - 1 "}", "\n", list); sub("^\n", "", list); - - text = substr(block, 1, st - 1); block = substr(block, st + len); - if (match( list, "\n([0-9]+\\.|#\\.)[ \t]" )) { - block = substr(list, RSTART + 1) block; - list = substr(list, 1, RSTART); - } - - return _block( text ) "\n" _block( block ); - - # Ordered list - } else if ( match( block, "(^|\n) ? ? ?([0-9]+\\.|#\\.)[ \t][^\n]+(\n|$)" \ - "(([ \t]*\n)* ? ? ?([0-9]+\\.|#\\.)[ \t][^\n]+(\n|$)" \ - "|([ \t]*\n)*( ? ? ?\t| +)[^\n]+(\n|$)" \ - "|[^\n \t][^\n]+(\n|$))*" ) ) { - st = RSTART; len = RLENGTH; list = substr( block, RSTART, RLENGTH); - sub("^\n", "", list); match(list, "^ ? ? ?[0-9#]"); indent = RLENGTH; - gsub( "(^|\n) {0," indent - 1 "}", "\n", list); sub("^\n", "", list); - - text = substr(block, 1, st - 1); block = substr(block, st + len); - if (match( list, "\n[-+*][ \t]" )) { - block = substr(list, RSTART + 1) block; - list = substr(list, 1, RSTART); - } - - return _block( text ) "
    \n" _list( list, "([0-9]+\\.|#\\.)" ) "
\n" _block( block ); + # Unordered list types + } else if ( text = _startlist( block, "ul", "-", "([+*•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + return text; + } else if ( text = _startlist( block, "ul", "\\+", "([-*•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + return text; + } else if ( text = _startlist( block, "ul", "\\*", "([-+•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + return text; + } else if ( text = _startlist( block, "ul", "•", "([-+*]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + return text; + + # Ordered list types + } else if ( text = _startlist( block, "ol", "[0-9]+\\.", "([-+*•]|#\\.|[0-9]+\\)|#\\))") ) { + return text; + } else if ( text = _startlist( block, "ol", "[0-9]+\\)", "([-+*•]|[0-9]+\\.|#\\.|#\\))") ) { + return text; + } else if ( text = _startlist( block, "ol", "#\\.", "([-+*•]|[0-9]+\\.|[0-9]+\\)|#\\))") ) { + return text; + } else if ( text = _startlist( block, "ol", "#\\)", "([-+*•]|[0-9]+\\.|#\\.|[0-9]+\\))") ) { + return text; # Split paragraphs } else if ( match( block, /(^|\n)[[:space:]]*(\n|$)/) ) { @@ -741,17 +741,38 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, } } +function _startlist(block, type, mark, exclude, LOCAL, st, len, list, indent, text) { + if (match( block, "(^|\n) ? ? ?" mark "[ \t][^\n]+(\n|$)" \ + "(([ \t]*\n)* ? ? ?" mark "[ \t][^\n]+(\n|$)" \ + "|([ \t]*\n)*( ? ? ?\t| +)[^\n]+(\n|$)" \ + "|[^\n \t][^\n]+(\n|$))*" ) ) { + st = RSTART; len = RLENGTH; list = substr( block, st, len); + + sub("^\n", "", list); match(list, "^ ? ? ?"); indent = RLENGTH; + gsub( "(^|\n) {0," indent "}", "\n", list); sub("^\n", "", list); + + text = substr(block, 1, st - 1); block = substr(block, st + len); + if (match(text, /\n[[:space:]]*\n/)) return 0; + if (match(text, "(^|\n) ? ? ?" exclude "[ \t][^\n]+")) return 0; + if (match( list, "\n" exclude "[ \t]" )) { + block = substr(list, RSTART + 1) block; + list = substr(list, 1, RSTART); + } + + return _block( text ) "<" type ">\n" _list( list, mark ) "\n" _block( block ); + } else return 0; +} + function _list (block, mark, p, LOCAL, len, st, text, indent, task) { if ( match(block, "^([ \t]*\n)*$")) return; + match(block, "^" mark "[ \t]"); indent = RLENGTH; sub("^" mark "[ \t]", "", block); + if (match(block, /\n[ \t]*\n/)) p = 1; - match( block, "\n" mark "[ \t][^\n]+(\n|$)" \ - "(([ \t]*\n)* ? ? ?" mark "[ \t][^\n]+(\n|$)" \ - "|([ \t]*\n)*( ? ? ?\t| +)[^\n]+(\n|$)" \ - "|[^\n \t][^\n]+(\n|$))*"); - (RLENGTH == -1) ? st = length(block) + 1 : st = RSTART; + match( block, "\n" mark "[ \t][^\n]+(\n|$)" ); + st = (RLENGTH == -1) ? length(block) + 1 : RSTART; text = substr(block, 1, st); block = substr(block, st + 1); gsub("\n {0," indent "}", "\n", text); @@ -761,7 +782,7 @@ function _list (block, mark, p, LOCAL, len, st, text, indent, task) { match( text, /^\[\/\]/ ) ? "
  • " : \ match( text, /^\[\?\]/ ) ? "
  • " : \ match( text, /^\[[xX]\]/) ? "
  • " : "
  • "; - sub(/^\[[-? /xX]\]/, "", text); + sub(/^\[[-? \/xX]\]/, "", text); text = _nblock( text ); if ( ! p && match( text, "^

    (]|\n$" ))