From 364b5f4157c8cd7e37b3b1dcc35edf747dc9d879 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Paul=20H=C3=A4nsch?= Date: Tue, 4 Nov 2025 19:23:00 +0100 Subject: [PATCH] markdown code cleanup --- markdown.awk | 55 ++-------------------------------------------------- 1 file changed, 2 insertions(+), 53 deletions(-) diff --git a/markdown.awk b/markdown.awk index b079bbe..d02c497 100755 --- a/markdown.awk +++ b/markdown.awk @@ -106,21 +106,6 @@ function HTML ( text ) { return text; } -function URL ( text ) { - gsub( /&/, "%26", text ); - gsub( /"/, "%22", text ); - gsub( /'/, "%27", text ); - gsub( /`/, "%60", text ); - gsub( /\?/, "%3F", text ); - gsub( /#/, "%23", text ); - gsub( /\[/, "%5B", text ); - gsub( /\]/, "%5D", text ); - gsub( / /, "%20", text ); - gsub( / /, "%09", text ); - gsub( /\\/, "%5C", text ); - return text; -} - function inline( line, LOCAL, len, text, code, href, guard, ret ) { ret = ""; while (line !~ /^$/) { @@ -160,8 +145,6 @@ function inline( line, LOCAL, len, text, code, href, guard, ret ) { len = RLENGTH; href = text = substr(line, 1, len); sub(/^\[\[/, "", href); sub(/(\|([^]]+))?\]\].*$/, "", href); sub(/^\[\[([^]|]+)/, "", text); sub(/\]\].*$/, "", text); sub(/^\|/, "", text); - # sub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\1", href ); - # sub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\3", text ); if ( ! text ) text = href; ret = ret "" HTML(text) ""; line = substr( line, len + 1); continue; @@ -174,7 +157,6 @@ function inline( line, LOCAL, len, text, code, href, guard, ret ) { continue; # quick link email - # } else if ( match( line, /^<[a-zA-Z0-9.!#$%&'\''*+\/=?^_`{|}~-]+@[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>/ ) ) { } else if ( match( line, /^<[a-zA-Z0-9.!#$%&'\''*+\/=?^_`{|}~-]+@([a-zA-Z0-9]\.[a-zA-Z0-9]|[a-zA-Z0-9-])+>/ ) ) { len = RLENGTH; href = HTML( substr( line, 2, len - 2) ); @@ -212,8 +194,6 @@ function inline( line, LOCAL, len, text, code, href, guard, ret ) { len = RLENGTH; text = id = substr(line, 1, len); sub(/\n.*$/, "", text); sub(/^\[/, "", text); sub(/\] ?\[([^\n]*)\].*$/, "", text); sub(/\n.*$/, "", id); sub(/^\[([^]]+)\] ?\[/, "", id); sub(/\].*$/, "", id); - # text = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, text ); - # id = gensub(/^\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, id ); if ( ! id ) id = text; if ( rl_href[id] && rl_title[id] ) { @@ -266,8 +246,6 @@ function inline( line, LOCAL, len, text, code, href, guard, ret ) { len = RLENGTH; text = id = substr(line, 1, len); sub(/\n.*$/, "", text); sub(/^!\[/, "", text); sub(/\] ?\[([^\n]*)\].*$/, "", text); sub(/\n.*$/, "", id); sub(/^!\[([^]]+)\] ?\[/, "", id); sub(/\].*$/, "", id); - # text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) ); - # id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); if ( ! id ) id = text; if ( rl_href[id] && rl_title[id] ) { ret = ret "\"""; @@ -345,8 +323,7 @@ function inline( line, LOCAL, len, text, code, href, guard, ret ) { continue; # Literal HTML entities - # } else if ( match( line, /^&([a-zA-Z]{2,32}|#[0-9]{1,7}|#[xX][0-9a-fA-F]{1,6});/) ) { - # mawk does not support repitition ranges + # mawk does not support repitition ranges (i.e. "xyz{1,10}") } else if ( match( line, /^&[a-zA-Z][a-zA-Z][a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?;/) ) { len = RLENGTH; ret = ret substr( line, 1, len ); line = substr(line, len + 1); @@ -384,7 +361,6 @@ function inline( line, LOCAL, len, text, code, href, guard, ret ) { } function headline( hlvl, htxt, attrib, LOCAL, sec, n, hid, hid2, HL) { - # match(hstack, /([0-9]+( [0-9]+){5})$/); split( substr(hstack, RSTART), HL); match(hstack, /([0-9]+( [0-9]+)( [0-9]+)( [0-9]+)( [0-9]+)( [0-9]+))$/); split( substr(hstack, RSTART), HL); for ( n = hlvl; n <= 6; n++ ) { sec = sec (HL[n]?"":""); } @@ -392,12 +368,8 @@ function headline( hlvl, htxt, attrib, LOCAL, sec, n, hid, hid2, HL) { hid = ""; for ( n = 2; n <= blvl; n++) { hid = hid BL[n] "/"; } hid = hid HL[1]; for ( n = 2; n <= hlvl; n++) { hid = hid "." HL[n] ; } - hid = hid ":" HTML(htxt); # anchor for TOC - # hid2 = ":" HTML(htxt); # anchor for permalink - # while ( headings[hid2] ) { n = n ? 2 : n + 1; hid2 = ":" HTML(htxt) "/" n; } - # headings[hid2] = true; + hid = hid ":" HTML(htxt); # anchor for TOC and permalink - # sub(/([0-9]+( [0-9]+){5})$/, "", hstack); sub(/([0-9]+( [0-9]+)( [0-9]+)( [0-9]+)( [0-9]+)( [0-9]+))$/, "", hstack); hstack = hstack HL[1] " " HL[2] " " HL[3] " " HL[4] " " HL[5] " " HL[6]; @@ -543,13 +515,10 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, # Column Count tmp = block; sub( "(\n.*)*$", "", tmp); cols = split( tmp, tread, /\+/) - 2; - # debug(" Cols: " gensub( "^(\\+(:?-+:?\\+)+)(\n.*)*$", "\\1", 1, block )); # table alignment match(block, "((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)"); split( substr(block, RSTART, RLENGTH) , talign, /\+/ ); - # split( gensub( "^(.*\n)?\\+((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)(\n.*)$", "\\2", "g", block ), talign, /\+/ ); - # debug("Align: " gensub( "^(.*\n)?\\+((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)(\n.*)$", "\\2", "g", block )); for (cnt = 1; cnt <= cols; cnt++) { if (match(talign[cnt], /:(-+|=+):/)) talign[cnt]="center"; @@ -632,7 +601,6 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, guard = substr( block, 1, RLENGTH ); attrib = code = block; sub(/^[^\n]+\n/, "", code); sub(/^:::+[ \t]*\{?[ \t]*/, "", attrib); sub(/\}?[ \t]*\n.*$/, "", attrib); - # attrib = gensub(/^:::+[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\1", 1, attrib); gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); if ( match(code, "(^|\n)" guard "+(\n|$)" ) && attrib ) { @@ -658,7 +626,6 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, guard = substr( block, 1, RLENGTH ); attrib = code = block; sub(/^[^\n]+\n/, "", code); sub(/^(~~~+|```+)[ \t]*\{?[ \t]*/, "", attrib); sub(/\}?[ \t]*\n.*$/, "", attrib); - # attrib = gensub(/^(~~~+|```+)[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\2", 1, attrib); gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); if ( match(code, "(^|\n)" guard "+(\n|$)" ) && attrib ) { @@ -718,11 +685,9 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, continue; # # Nth Order Heading H1 H2 H3 H4 H5 H6 + Attrib - # } else if ( match( block, /^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*[ \t]*\{[a-zA-Z \t-]*\}(\n|$)/ ) ) { } else if ( match( block, /^##?#?#?#?#?[^#\n]([^\n#]|#[^\t\n# ]|#[\t ]+[^\t\n ])+#*[\t ]*\{[\ta-zA-Z -]*\}(\n|$)/ ) ) { len = RLENGTH; text = attrib = substr(block, 1, len); match(block, /^##?#?#?#?#?[^#]/); n = RLENGTH - 1; - # sub(/^(##?#?#?#?#?)[ \t]*/, "", text); # not working in mawk text = substr(text, n + 1); sub(/^[ \t]*/, "", text); sub(/[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n.*)?$/, "", text); @@ -734,11 +699,9 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, continue; # Nth Order Heading H1 H2 H3 H4 H5 H6 - # } else if ( match( block, /^(##?#?#?#?#?)[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*(\n|$)/ ) ) { } else if ( match( block, /^##?#?#?#?#?[^#\n]([^\n#]|#[^\t\n# ]|#[\t ]+[^\t\n ])+#*(\n|$)/ ) ) { len = RLENGTH; text = substr(block, 1, len); match(block, /^##?#?#?#?#?[^#]/); n = RLENGTH - 1; - # sub(/^(##?#?#?#?#?)[ \t]+/, "", text); # not working in mawk text = substr(text, n + 1); sub(/^[ \t]*/, "", text); sub(/[ \t]*#*(\n.*)?$/, "", text); @@ -782,8 +745,6 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, len = RLENGTH; text = id = block; sub(/(\n.*)?$/, "", text); sub( /^!\[/, "", text); sub(/\] ?\[([^\n]*)\]$/, "", text); sub(/(\n.*)?$/, "", id); sub( /^!\[([^\n]*)\] ?\[/, "", id); sub(/\]$/, "", id); - # text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\1", 1, block); - # id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\2", 1, block); if ( ! id ) id = text; if ( rl_href[id] && rl_title[id] ) { ret = ret "
" \ @@ -808,7 +769,6 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, } else if ( match( block, /^<<(([^>]|>[^>])+)>>(\n|$)/ ) ) { len = RLENGTH; text = block; sub(/^<>(\n.*)?$/, "", text); - # text = gensub(/^<<(([^>]|>[^>])+)>>(\n.*)?$/, "\\1", 1, block); ret = ret "" HTML(text) "" ; block = substr(block, len + 1); continue; @@ -851,7 +811,6 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, continue; # Horizontal rule - # } else if ( match( block, /(^|\n) ? ? ?((\* *){3,}|(- *){3,}|(_ *){3,})($|\n)/) ) { } else if ( match( block, /(^|\n) ? ? ?((\* *)(\* *)(\* *)(\* *)*|(- *)(- *)(- *)(- *)*|(_ *)(_ *)(_ *)(_ *)*)($|\n)/) ) { len = RLENGTH; st = RSTART; ret = ret _block(substr(block, 1, st - 1)) "
\n"; block = substr(block, st + len); @@ -873,7 +832,6 @@ function _startlist(block, type, mark, exclude, LOCAL, st, len, list, indent, it st = RSTART; len = RLENGTH; list = substr( block, st, len); sub("^\n", "", list); match(list, "^( | | )?"); indent = RLENGTH; - # gsub( "(^|\n) {0," indent "}", "\n", list); sub("^\n", "", list); # emulate greedy range matcher for mawk it = "("; while ( indent > 0 ) { for (k = indent; k > 0; k--) { it = it " "; } it = it "|"; indent--; } sub(/\|$/, ")?", it); sub(/^\($/, "", it); @@ -904,7 +862,6 @@ function _list (block, mark, p, LOCAL, len, st, text, indent, it, task) { st = (RLENGTH == -1) ? length(block) + 1 : RSTART; text = substr(block, 1, st); block = substr(block, st + 1); - # gsub("\n {0," indent "}", "\n", text); # emulate greedy range matcher for mawk it = "("; while ( indent > 0 ) { for (k = indent; k > 0; k--) { it = it " "; } it = it "|"; indent--; } sub(/\|$/, ")?", it); sub(/^\($/, "", it); @@ -938,7 +895,6 @@ function _dlist (block, LOCAL, len, st, text, indent, it, p) { sub( "^([ \t]*\n)*", "", text); match(text, "^ ? ? ?:(\t| +)"); indent = RLENGTH; sub( "^ ? ? ?:(\t| +)", "", text); - # gsub( "(^|\n) {0," indent "}", "\n", text ); # emulate greedy range matcher for mawk it = "("; while ( indent > 0 ) { for (k = indent; k > 0; k--) { it = it " "; } it = it "|"; indent--; } sub(/\|$/, ")?", it); sub(/^\($/, "", it); @@ -957,7 +913,6 @@ BEGIN { file = ""; rl_href[""] = ""; rl_title[""] = ""; if (ENVIRON["MD_HTML"] == "true") { AllowHTML = "true"; } HL[1] = 0; HL[2] = 0; HL[3] = 0; HL[4] = 0; HL[5] = 0; HL[6] = 0; - # hls = "0 0 0 0 0 0"; # Universal Patterns nu = "([^_\\\\]|\\\\.|_[[:alnum:]])" # not underline (except when escaped, or inside a word) @@ -981,7 +936,6 @@ BEGIN { # Fill array of reference links f = file; rl_id; re_reflink = "(^|\n) ? ? ?\\[([^]\n]+)\\]: ([^ \t\n]+)(\n?[ \t]+(\"([^\"]+)\"|'([^']+)'|\\(([^)]+)\\)))?(\n|$)"; - # /(^|\n) ? ? ?\[([^]\n]+)\]: ([^ \t\n]+)(\n?[ \t]+("([^"]+)"|'([^']+)'|\(([^)]+)\)))?(\n|$)/ while ( match(f, re_reflink ) ) { tt = th = ti = substr(f, RSTART, RLENGTH); f = substr(f, RSTART + RLENGTH); sub("(^|\n) ? ? ?\\[", "", ti); sub("\\]: ([^ \t\n]+)(\n?[ \t]+(\"([^\"]+)\"|'([^']+)'|\\(([^)]+)\\)))?(\n.*)?$", "", ti); @@ -990,16 +944,11 @@ BEGIN { sub("(^|\n) ? ? ?\\[([^]\n]+)\\]: ([^ \t\n]+)", "", tt); sub("^\n?[ \t]+", "", tt); sub("(\n.*)?$", "", tt); } else { tt = ""; } rl_id = ti; rl_href[rl_id] = th; rl_title[rl_id] = tt; - # rl_id = gensub( re_reflink, "\\2", 1, substr(f, RSTART, RLENGTH) ); - # rl_href[rl_id] = gensub( re_reflink, "\\3", 1, substr(f, RSTART, RLENGTH) ); - # rl_title[rl_id] = gensub( re_reflink, "\\5", 1, substr(f, RSTART, RLENGTH) ); - # f = substr(f, RSTART + RLENGTH); rl_title[rl_id] = substr( rl_title[rl_id], 2, length(rl_title[rl_id]) - 2 ); if ( rl_href[rl_id] ~ /<.*>/ ) rl_href[rl_id] = substr( rl_href[rl_id], 2, length(rl_href[rl_id]) - 2 ); } # Clear reflinks from File while( gsub(re_reflink, "\n", file ) ); - # for (n in rl_href) { debug(n " | " rl_href[n] " | " rl_title[n] ); } # Run Block Processing -> The Actual Markdown! printf "%s", _nblock( file ); -- 2.39.5