From cc08744f66835d9ad6e9ec7f2c3fa9ed17e712ba Mon Sep 17 00:00:00 2001 From: =?utf8?q?Paul=20H=C3=A4nsch?= Date: Wed, 30 Aug 2023 15:38:15 +0200 Subject: [PATCH] reduce use of non-posix gensub --- markdown.awk | 69 +++++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/markdown.awk b/markdown.awk index 86657ad..eb40210 100755 --- a/markdown.awk +++ b/markdown.awk @@ -107,7 +107,7 @@ function URL ( text, sharp ) { return text; } -function inline( line, LOCAL, len, code, href, guard ) { +function inline( line, LOCAL, len, text, code, href, guard ) { nu = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\_]|_[[:alnum:]])*" # not underline (except when escaped) na = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\\\*])*" # not asterisk (except when escaped) ieu = "_([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])_" # inner (underline) @@ -134,7 +134,7 @@ function inline( line, LOCAL, len, code, href, guard ) { code = substr( line, len + 1, match( substr(line, len + 1), guard ) - 1) len = 2 * length(guard) + length(code) # strip single surrounding white spaces - code = gensub( /^ | $/, "", "g" , code) + gsub( /^ | $/, "", code) # escape HTML within code span gsub( /&/, "\\&", code ); gsub( //, "\\>", code ); return "" code "" inline( substr( line, len + 1 ) ) @@ -310,7 +310,7 @@ function inline( line, LOCAL, len, code, href, guard ) { } function headline( hlvl, htxt, attrib, LOCAL, sec, n, HL) { - split( gensub( /^(.* )?([0-9]+( [0-9]+){5})$/, "\\2" ,"1", hstack), HL); + match(hstack, /([0-9]+( [0-9]+){5})$/); split( substr(hstack, RSTART), HL); for ( n = hlvl; n <= 6; n++ ) { sec = sec (HL[n]?"":""); } HL[hlvl]++; for ( n = hlvl + 1; n <= 6; n++) { HL[n] = 0;} @@ -319,8 +319,8 @@ function headline( hlvl, htxt, attrib, LOCAL, sec, n, HL) { hid = hid HL[1]; for ( n = 2; n <= hlvl; n++) { hid = hid "." HL[n] ; } hid = hid ":" URL(htxt, 1); - hstack = gensub( /^(.* )?([0-9]+( [0-9]+){5})$/, "\\1" ,"1", hstack) \ - HL[1] " " HL[2] " " HL[3] " " HL[4] " " HL[5] " " HL[6]; + sub(/([0-9]+( [0-9]+){5})$/, "", hstack); + hstack = hstack HL[1] " " HL[2] " " HL[3] " " HL[4] " " HL[5] " " HL[6]; return sec "
" \ "" inline( htxt ) \ @@ -337,7 +337,7 @@ function _nblock( block, LOCAL, sec, n ) { for ( n = blvl + 1; n in BL; n++) { delete BL[n]; } block = _block( block ); - split( gensub( /^(.* )?([0-9]+( [0-9]+){5})$/, "\\2" ,"1", hstack), HL); + match(hstack, /([0-9]+( [0-9]+){5})$/); split( substr(hstack, RSTART), HL); sec = ""; for ( n = 1; n <= 6; n++ ) { sec = sec (HL[n]?"
":""); } sub("( +[0-9]+){6} *$", "", hstack); blvl--; @@ -382,8 +382,9 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, } else if ( match( block, /^> /) ) { match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match(block, /$/); len = RLENGTH; st = RSTART; - return "
" gensub( /^\n|\n$/, "", "g", _nblock( gensub( /(^|\n)> /, "\n", "g", substr(block, 1, st - 1) ) ) ) "
\n\n" \ - _block( substr(block, st + len) ); + text = substr(block, 1, st - 1); gsub( /(^|\n)> /, "\n", text ); + text = _nblock( text ); gsub( /^\n|\n$/, "", text ) + return "
" text "
\n\n" _block( substr(block, st + len) ); # Pipe Tables (pandoc / php md / gfm ) } else if ( match(block, "^((\\|)?([^\n]+\\|)+[^\n]+(\\|)?)\n" \ @@ -516,12 +517,12 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, # Line Blocks (pandoc) } else if ( match(block, /^\| [^\n]*(\n|$)(\| [^\n]*(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) { len = RLENGTH; st = RSTART; - text = substr(block, 1, len); - gsub(/\n[[:space:]]+/, " ", text); - gsub(/\n\| /, "\n", text); - gsub(/^\| |\n$/, "", text); - return "
" gensub(/\n/, "
\n", "g", inline( text )) "
\n" \ - _block( substr( block, len + 1) ); + + text = substr(block, 1, len); gsub(/\n[[:space:]]+/, " ", text); + gsub(/\n\| /, "\n", text); gsub(/^\| |\n$/, "", text); + text = inline(text); gsub(/\n/, "
\n", text); + + return "
" text "
\n" _block( substr( block, len + 1) ); # Indented Code Block } else if ( match(block, /^( |\t)( *\t*[^ \t\n]+ *\t*)+(\n|$)(( |\t)[^\n]+(\n|$)|[ \t]*(\n|$))*/) ) { @@ -570,53 +571,55 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, # First Order Heading H1 + Attrib } else if ( match( block, /^([^\n]+)([ \t]*\{([^\}\n]+)\})\n===+(\n|$)/ ) ) { - len = RLENGTH; - text = gensub(/^([^\n]+)([ \t]*\{([^\}\n]+)\})\n===+(\n.*)?$/, "\\1", 1, block) - attrib = gensub(/^([^\n]+)([ \t]*\{([^\}\n]+)\})\n===+(\n.*)?$/, "\\3", 1, block) + len = RLENGTH; text = attrib = block; + sub(/([ \t]*\{([^\}\n]+)\})\n===+(\n.*)?$/, "", text); + sub(/\}\n===+(\n.*)?$/, "", attrib); sub(/^([^\n]+)[ \t]*\{/, "", attrib); gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); return headline(1, text, attrib) _block( substr( block, len + 1 ) ); # First Order Heading H1 } else if ( match( block, /^([^\n]+)\n===+(\n|$)/ ) ) { - len = RLENGTH; - text = gensub(/^([^\n]+)\n===+(\n.*)?$/, "\\1", 1, block) + len = RLENGTH; text = substr(block, 1, len); + sub(/\n===+(\n.*)?$/, "", text); return headline(1, text, 0) _block( substr( block, len + 1 ) ); # Second Order Heading H2 + Attrib } else if ( match( block, /^([^\n]+)([ \t]*\{([^\}\n]+)\})\n---+(\n|$)/ ) ) { - len = RLENGTH; - text = gensub(/^([^\n]+)([ \t]*\{([^\}\n]+)\})\n---+(\n.*)?$/, "\\1", 1, block) - attrib = gensub(/^([^\n]+)([ \t]*\{([^\}\n]+)\})\n---+(\n.*)?$/, "\\3", 1, block) + len = RLENGTH; text = attrib = block; + sub(/([ \t]*\{([^\}\n]+)\})\n---+(\n.*)?$/, "", text); + sub(/\}\n---+(\n.*)?$/, "", attrib); sub(/^([^\n]+)[ \t]*\{/, "", attrib); gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); return headline(2, text, attrib) _block( substr( block, len + 1) ); # Second Order Heading H2 } else if ( match( block, /^([^\n]+)\n---+(\n|$)/ ) ) { - len = RLENGTH; - text = gensub(/^([^\n]+)\n---+(\n.*)?$/, "\\1", 1, block) + len = RLENGTH; text = substr(block, 1, len); + sub(/\n---+(\n.*)?$/, "", text); return headline(2, text, 0) _block( substr( block, len + 1) ); # Nth Order Heading H1 H2 H3 H4 H5 H6 + Attrib } else if ( match( block, /^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n|$)/ ) ) { - len = RLENGTH; - n = gensub(/^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n.*)?$/, "\\1", 1, block); - text = gensub(/^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n.*)?$/, "\\2", 1, block); - attrib = gensub(/^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n.*)?$/, "\\5", 1, block); + len = RLENGTH; text = attrib = substr(block, 1, len); + match(block, /^#{1,6}/); n = RLENGTH; + + sub(/^(#{1,6})[ \t]*/, "", text); sub(/[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n.*)?$/, "", text); + sub(/^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*[ \t]*\{/, "", attrib); + sub(/\})(\n.*)?$/, "", attrib); gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); - return headline( length(n), text, attrib ) _block( substr( block, len + 1) ); + return headline( n, text, attrib ) _block( substr( block, len + 1) ); # Nth Order Heading H1 H2 H3 H4 H5 H6 } else if ( match( block, /^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*(\n|$)/ ) ) { - len = RLENGTH; - n = gensub(/^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*(\n.*)?$/, "\\1", 1, block); - text = gensub(/^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*(\n.*)?$/, "\\2", 1, block); + len = RLENGTH; text = substr(block, 1, len); + match(block, /^#{1,6}/); n = RLENGTH; + sub(/^(#{1,6})[ \t]*/, "", text); sub(/[ \t]*#*(\n.*)?$/, "", text); - return headline( length(n), text, 0 ) _block( substr( block, len + 1) ); + return headline( n, text, 0 ) _block( substr( block, len + 1) ); # block images (wrapped in
) } else if ( match(block, /^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n|$)/) ) { -- 2.39.2