From 8f60061afcd1d37a725fe2f9ea84c17e66e41254 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Paul=20H=C3=A4nsch?= Date: Thu, 18 Dec 2025 07:31:25 +0100 Subject: [PATCH] unified attibute parsing --- markdown.awk | 42 ++++++++++++++++++++++-------------------- tests-markdown.sh | 2 +- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/markdown.awk b/markdown.awk index a9a6e2b..d1f6616 100755 --- a/markdown.awk +++ b/markdown.awk @@ -211,21 +211,20 @@ function inline( line, LOCAL, len, text, code, href, guard, ret ) { } # inline images - } else if ( match(line, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?") ) { + } else if ( match(line, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)("rattr")?") ) { len = RLENGTH; text = href = title = attrib = substr( line, 1, len); sub("^!\\[", "", text); - sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", text); + sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)("rattr")?$", "", text); sub("^!" lix "\\([\n\t ]*", "", href); - sub("([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", href); + sub("([\n\t ]+" lit ")?[\n\t ]*\\)("rattr")?$", "", href); sub("^!" lix "\\([\n\t ]*" lid, "", title); - sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", title); + sub("[\n\t ]*\\)("rattr")?$", "", title); sub("^[\n\t ]+", "", title); - sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib); - sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib); + attrib = _attr(attrib); if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } @@ -649,11 +648,10 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, } # First Order Heading H1 + Attrib - } else if ( match( block, /^([^\n]+)([ \t]*\{([^\}\n]+)\})\n===+(\n|$)/ ) ) { + } else if ( match( block, "^([^\n]+)([ \t]*"rattr")\n===+(\n|$)" ) ) { len = RLENGTH; text = attrib = block; sub(/([ \t]*\{([^\}\n]+)\})\n===+(\n.*)?$/, "", text); - sub(/\}\n===+(\n.*)?$/, "", attrib); sub(/^([^\n]+)[ \t]*\{/, "", attrib); - gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); + attrib = _attr(attrib); ret = ret headline(1, text, attrib) ; block = substr( block, len + 1 ); continue; @@ -667,11 +665,10 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, continue; # Second Order Heading H2 + Attrib - } else if ( match( block, /^([^\n]+)([ \t]*\{([^\}\n]+)\})\n---+(\n|$)/ ) ) { + } else if ( match( block, "^([^\n]+)([ \t]*"rattr")\n---+(\n|$)" ) ) { len = RLENGTH; text = attrib = block; sub(/([ \t]*\{([^\}\n]+)\})\n---+(\n.*)?$/, "", text); - sub(/\}\n---+(\n.*)?$/, "", attrib); sub(/^([^\n]+)[ \t]*\{/, "", attrib); - gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); + attrib = _attr(attrib); ret = ret headline(2, text, attrib) ; block = substr( block, len + 1); continue; @@ -685,15 +682,13 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, continue; # # Nth Order Heading H1 H2 H3 H4 H5 H6 + Attrib - } else if ( match( block, /^##?#?#?#?#?[^#\n]([^\n#]|#[^\t\n# ]|#[\t ]+[^\t\n ])+#*[\t ]*\{[\ta-zA-Z -]*\}(\n|$)/ ) ) { + } else if ( match( block, "^##?#?#?#?#?[^#\n]([^\n#]|#[^\t\n# ]|#[\t ]+[^\t\n ])+#*[\t ]*"rattr"(\n|$)" ) ) { len = RLENGTH; text = attrib = substr(block, 1, len); match(block, /^##?#?#?#?#?[^#]/); n = RLENGTH - 1; text = substr(text, n + 1); sub(/^[ \t]*/, "", text); sub(/[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n.*)?$/, "", text); - sub(/^##?#?#?#?#?[^#\n]([^\n#]|#[^\t\n# ]|#[\t ]+[^\t\n ])+#*[\t ]*\{/, "", attrib); - sub(/\}(\n.*)?$/, "", attrib); - gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); + attrib = _attr(attrib); ret = ret headline( n, text, attrib ); block = substr( block, len + 1); continue; @@ -709,7 +704,7 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, continue; # block images (wrapped in
) - } else if ( match(block, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n|$)") ) { + } else if ( match(block, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)("rattr")?(\n|$)") ) { len = RLENGTH; text = href = title = attrib = substr( block, 1, len); sub("^!\\[", "", text); @@ -722,9 +717,7 @@ function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", title); sub("^[\n\t ]+", "", title); - sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib); - sub("(\n.*)?$", "", attrib); - sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib); + attrib = _attr(attrib); if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } @@ -908,6 +901,13 @@ function _dlist (block, LOCAL, len, st, text, indent, it, p) { } } +function _attr (attrib) { + if ( sub(/.*\{[ \t.#]*/, "", attrib) ) if ( sub(/[ \t]*\}(\n.*)?$/, "", attrib) ) { + gsub(/[ \t.#]+/, " ", attrib); + return attrib; + } else return ""; +} + BEGIN { # Global Vars file = ""; rl_href[""] = ""; rl_title[""] = ""; @@ -922,6 +922,8 @@ BEGIN { iea = "\\*([^*[:space:]]|[^*[:space:]]" na "*[^*[:space:]])\\*" # inner (asterisk) isa = "\\*\\*([^*[:space:]]|[^*[:space:]]" na "*[^*[:space:]])\\*\\*" # inner (asterisk) + rattr = "\\{[ \t]*([.#]?([a-zA-Z0-9]+-)*[a-zA-Z0-9]+[ \t]+)*([.#]?([a-zA-Z0-9]+-)*[a-zA-Z0-9]+)[ \t]*\\}" + lix="\\[(\\\\[^\n]|[^]\n\\\\[])*\\]" # link text lid="(<(\\\\[^\n]|[^\n<>\\\\])*>|(\\\\.|[^()\"'\\\\])+|([^<\n\t ()\\\\]|\\\\[^\n])(\\\\[\n]|[^\n\t \\(\\)\\\\])*)" # link dest lit="(\"(\\\\.|[^\"\\\\])*\"|'(\\\\.|[^'\\\\])*'|\\((\\\\.|[^\\(\\)\\\\])*\\))" # link text diff --git a/tests-markdown.sh b/tests-markdown.sh index bee2e37..3794321 100755 --- a/tests-markdown.sh +++ b/tests-markdown.sh @@ -93,7 +93,7 @@ assert '[Wikipedia]( "Online Encyclopedia")' "

Testbild

' "inline image" assert ' ![Testbild](Test Bild.jpg "German Television *test* image ca. 1994")' '

Testbild

' "inline image" assert ' ![Testbild *ARD*](Test Bild.jpg){tv ard function-check}' '

Testbild *ARD*

' "inline image" -# assert ' ![Testbild *ARD*](Test Bild.jpg){#tv .ard .function-check}' '

Testbild *ARD*

' "inline image id/classes" +assert ' ![Testbild *ARD*](Test Bild.jpg){#tv .ard .function-check}' '

Testbild *ARD*

' "inline image id/classes" assert '[![Wikipedia](wikilogo.png)]()'\ '

Wikipedia

'\ -- 2.39.5