From 32b4555b66c086805df962e68070956c4c1780d7 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Paul=20H=C3=A4nsch?= Date: Sat, 31 Aug 2024 21:53:16 +0200 Subject: [PATCH] split/modify some regexes for compatibility with old mawk implementations --- markdown.awk | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/markdown.awk b/markdown.awk index ad8c438..356a987 100755 --- a/markdown.awk +++ b/markdown.awk @@ -347,7 +347,11 @@ function inline( line, LOCAL, len, text, code, href, guard, ret ) { # Literal HTML entities # } else if ( match( line, /^&([a-zA-Z]{2,32}|#[0-9]{1,7}|#[xX][0-9a-fA-F]{1,6});/) ) { # mawk does not support repitition ranges - } else if ( match( line, /^&([a-zA-Z][a-zA-Z][a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?|#[0-9][0-9]?[0-9]?[0-9]?[0-9]?[0-9]?[0-9]?|#[xX][0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?);/) ) { + } else if ( match( line, /^&[a-zA-Z][a-zA-Z][a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?;/) ) { + len = RLENGTH; + ret = ret substr( line, 1, len ); line = substr(line, len + 1); + continue; + } else if ( match( line, /^&(#[0-9][0-9]?[0-9]?[0-9]?[0-9]?[0-9]?[0-9]?|#[xX][0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?);/) ) { len = RLENGTH; ret = ret substr( line, 1, len ); line = substr(line, len + 1); continue; @@ -425,8 +429,14 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, ret = ret _block(substr(block, 1, st - 1)) substr(block, st, len); block = substr(block, st + len); continue; - # HTML #6 - } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<\/?(address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)([[:space:]\n>]|\/>)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) { + # HTML #6 (part1) + } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<\/?(address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset)([[:space:]\n>]|\/>)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) { + len = RLENGTH; st = RSTART; + ret = ret _block(substr(block, 1, st - 1)) substr(block, st, len); block = substr(block, st + len); + continue; + + # HTML #6 (part2) + } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<\/?(h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)([[:space:]\n>]|\/>)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) { len = RLENGTH; st = RSTART; ret = ret _block(substr(block, 1, st - 1)) substr(block, st, len); block = substr(block, st + len); continue; @@ -516,8 +526,7 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, "((\\|([^\n]+\\|)+\n)+" \ "\\+(-+\\+)+(\n|$))+" \ ) || \ - match( block, "^()()()" \ - "(\\+(:?-+:?\\+)+)\n" \ + match( block, "^(\\+(:?-+:?\\+)+)\n" \ "((\\|([^\n]+\\|)+\n)+" \ "\\+(-+\\+)+(\n|$))+" \ ) ) { @@ -857,7 +866,7 @@ function _startlist(block, type, mark, exclude, LOCAL, st, len, list, indent, it "|[^\n \t][^\n]+(\n|$))*" ) ) { st = RSTART; len = RLENGTH; list = substr( block, st, len); - sub("^\n", "", list); match(list, "^( | | |)"); indent = RLENGTH; + sub("^\n", "", list); match(list, "^( | | )?"); indent = RLENGTH; # gsub( "(^|\n) {0," indent "}", "\n", list); sub("^\n", "", list); # emulate greedy range matcher for mawk it = "("; while ( indent > 0 ) { for (k = indent; k > 0; k--) { it = it " "; } it = it "|"; indent--; } -- 2.39.2