From: Paul Hänsch Date: Fri, 30 Aug 2024 17:18:04 +0000 (+0200) Subject: modify regexes for use in mawk X-Git-Url: https://git.plutz.net/?a=commitdiff_plain;h=98df52bf69b6f2d838264902de21369013602102;p=cgilite modify regexes for use in mawk --- diff --git a/markdown.awk b/markdown.awk index fc2f203..6e3febe 100755 --- a/markdown.awk +++ b/markdown.awk @@ -701,23 +701,25 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, ret = ret headline(2, text, 0) ; block = substr( block, len + 1); continue; - # Nth Order Heading H1 H2 H3 H4 H5 H6 + Attrib - } else if ( match( block, /^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n|$)/ ) ) { + # # Nth Order Heading H1 H2 H3 H4 H5 H6 + Attrib + # } else if ( match( block, /^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*[ \t]*\{[a-zA-Z \t-]*\}(\n|$)/ ) ) { + } else if ( match( block, /^##?#?#?#?#?[^#\n]([^\n#]|#[^\t\n# ]|#[\t ]+[^\t\n ])+#*[\t ]*\{[\ta-zA-Z -]*\}(\n|$)/ ) ) { len = RLENGTH; text = attrib = substr(block, 1, len); match(block, /^##?#?#?#?#?[^#]/); n = RLENGTH - 1; - # sub(/^(##?#?#?#?#?)[ \t]*/, "", text); # not working in mawk text = substr(text, n + 1); sub(/^[ \t]*/, "", text); sub(/[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n.*)?$/, "", text); - sub(/^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*[ \t]*\{/, "", attrib); + + sub(/^##?#?#?#?#?[^#\n]([^\n#]|#[^\t\n# ]|#[\t ]+[^\t\n ])+#*[\t ]*\{/, "", attrib); sub(/\}(\n.*)?$/, "", attrib); gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); - ret = ret headline( n, text, attrib ) ; block = substr( block, len + 1); + ret = ret headline( n, text, attrib ); block = substr( block, len + 1); continue; # Nth Order Heading H1 H2 H3 H4 H5 H6 - } else if ( match( block, /^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*(\n|$)/ ) ) { + # } else if ( match( block, /^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*(\n|$)/ ) ) { + } else if ( match( block, /^##?#?#?#?#?[^#\n]([^\n#]|#[^\t\n# ]|#[\t ]+[^\t\n ])+#*(\n|$)/ ) ) { len = RLENGTH; text = substr(block, 1, len); match(block, /^##?#?#?#?#?[^#]/); n = RLENGTH - 1; # sub(/^(##?#?#?#?#?)[ \t]+/, "", text); # not working in mawk @@ -854,10 +856,12 @@ function _startlist(block, type, mark, exclude, LOCAL, st, len, list, indent, it "|[^\n \t][^\n]+(\n|$))*" ) ) { st = RSTART; len = RLENGTH; list = substr( block, st, len); - sub("^\n", "", list); match(list, "^ ? ? ?"); indent = RLENGTH; - it = ""; while ( indent > 0 ) { it = it " ?"; indent--; } + sub("^\n", "", list); match(list, "^( | | |)"); indent = RLENGTH; # gsub( "(^|\n) {0," indent "}", "\n", list); sub("^\n", "", list); - gsub( "(^|\n)" it, "\n", list); sub("^\n", "", list); + # emulate greedy range matcher for mawk + it = "("; while ( indent > 0 ) { for (k = indent; k > 0; k--) { it = it " "; } it = it "|"; indent--; } + sub(/\|$/, ")?", it); sub(/^\($/, "", it); + gsub( "(^|\n)" it, "\n", list ); sub("^\n", "", list); text = substr(block, 1, st - 1); block = substr(block, st + len); if (match(text, /\n[[:space:]]*\n/)) return 0; @@ -875,7 +879,6 @@ function _list (block, mark, p, LOCAL, len, st, text, indent, it, task) { if ( match(block, "^([ \t]*\n)*$")) return; match(block, "^" mark "[ \t]"); indent = RLENGTH; - it = ""; while ( indent > 0 ) { it = it " ?"; indent--; } sub("^" mark "[ \t]", "", block); @@ -886,6 +889,9 @@ function _list (block, mark, p, LOCAL, len, st, text, indent, it, task) { text = substr(block, 1, st); block = substr(block, st + 1); # gsub("\n {0," indent "}", "\n", text); + # emulate greedy range matcher for mawk + it = "("; while ( indent > 0 ) { for (k = indent; k > 0; k--) { it = it " "; } it = it "|"; indent--; } + sub(/\|$/, ")?", it); sub(/^\($/, "", it); gsub("\n" it, "\n", text); task = match( text, /^\[ \]/ ) ? "
  • " : \ @@ -915,9 +921,11 @@ function _dlist (block, LOCAL, len, st, text, indent, it, p) { len = RLENGTH; text = substr(block, 1, len); sub( "^([ \t]*\n)*", "", text); match(text, "^ ? ? ?:(\t| +)"); indent = RLENGTH; - it = ""; while ( indent > 0 ) { it = it " ?"; indent--; } sub( "^ ? ? ?:(\t| +)", "", text); # gsub( "(^|\n) {0," indent "}", "\n", text ); + # emulate greedy range matcher for mawk + it = "("; while ( indent > 0 ) { for (k = indent; k > 0; k--) { it = it " "; } it = it "|"; indent--; } + sub(/\|$/, ")?", it); sub(/^\($/, "", it); gsub( "(^|\n)" it, "\n", text ); text = _nblock(text);