From: Paul Hänsch Date: Mon, 15 Apr 2024 11:05:26 +0000 (+0200) Subject: Merge commit 'c16fa9412ca9760759fa0357278c3bf8e065959d' X-Git-Url: http://git.plutz.net/?a=commitdiff_plain;h=f0b9c81f51c55f990356f087721f0743706f4867;hp=efc3ea0ce4c4d8efbe95d138e18c266ee2547975;p=confetti Merge commit 'c16fa9412ca9760759fa0357278c3bf8e065959d' --- diff --git a/cgilite/cgilite.awk b/cgilite/cgilite.awk new file mode 100644 index 0000000..f16ed6a --- /dev/null +++ b/cgilite/cgilite.awk @@ -0,0 +1,158 @@ +#!/bin/env awk -f + +function PATH( str, seg, out ) { + while ( str ) { + seg = str; + sub( /\/.*$/, "", seg); + sub( /^[^\/]*\//, "", str); + + if ( seg == ".." ) sub(/\/[^\/]*\/?$/, "", out); + else if ( seg ~ /^\.?$/) sub(/\/?$/, "/", out); + else sub(/\/?$/, "/" seg, out); + + if (seg == str) break; + } + if (!(str && out)) sub(/\/?$/,"/" out); + return out; +} + +function HEX_DECODE( pfx, inp, out, n, k ) { + k = length(pfx); + gsub(/[].*+?^${}()|\\[]/,"\\\\&",pfx); + while ( inp ) if ( n = match(inp, pfx "[0-9a-fA-F][0-9a-fA-F]") ) { + out = out substr(inp, 1, n - 1); + inp = substr(inp, n + k); + if (inp ~ /^[0-9]/) n = 16 * substr(inp, 1, 1); + else if (inp ~ /^[aA]/) n = 160; + else if (inp ~ /^[bB]/) n = 176; + else if (inp ~ /^[cC]/) n = 192; + else if (inp ~ /^[dD]/) n = 208; + else if (inp ~ /^[eE]/) n = 224; + else if (inp ~ /^[fF]/) n = 240; + if (inp ~ /^.[0-9]/) n += substr(inp, 2, 1); + else if (inp ~ /^.[aA]/) n += 10; + else if (inp ~ /^.[bB]/) n += 11; + else if (inp ~ /^.[cC]/) n += 12; + else if (inp ~ /^.[dD]/) n += 13; + else if (inp ~ /^.[eE]/) n += 14; + else if (inp ~ /^.[fF]/) n += 15; + out = out sprintf("%c", n); + inp = substr(inp, 3); + } else { + out = out inp; + break; + } + return out; +} + +function HTML( text ) { + gsub( /&/, "\\&", text ); + gsub( //, "\\>", text ); + gsub( /"/, "\\"", text ); + gsub( /'/, "\\'", text ); + gsub( /\[/, "\\[", text ); + gsub( /\]/, "\\]", text ); + gsub( /\r/, "\\ ", text ); + gsub( /\n/, "\\ ", text ); + gsub( /\\/, "\\\", text ); + return text; +} + +function URL( text ) { + gsub( /&/, "%26", text ); + gsub( /"/, "%22", text ); + gsub( /'/, "%27", text ); + gsub( /`/, "%60", text ); + gsub( /\?/, "%3F", text ); + gsub( /#/, "%23", text ); + gsub( /\[/, "%5B", text ); + gsub( /\]/, "%5D", text ); + gsub( / /, "%20", text ); + gsub( /\t/, "%09", text ); + gsub( /\r/, "%0D", text ); + gsub( /\n/, "%0A", text ); + gsub( /%/, "%25", text ); + gsub( /\\/, "%5C", text ); + return text; +} + +function _cgilite_urldecode( str, arr, spl, form, k, n, key) { + if (! spl) spl="&" + split(str, form, spl); + for ( k in form ) { + key = form[k]; sub(/=.*$/, "", key); + sub(/^[^=]*=/, "", form[k]); + if ( key in arr ) { + n = 1; while ( (key, n) in arr ) n++; + arr[key,n] = HEX_DECODE( "%", form[k]); + } else { + arr[key] = HEX_DECODE( "%", form[k]); + } + } +} + +function _cgilite_request( key, val) { + # Read request from client connection + + # Read Headers + getline; REQUEST_METHOD = $1; REQUEST_URI = $2; SERVER_PROTOCOL = $3; + while ( getline ) { + if ($0 ~ /^\r?$/) break; + else if ($0 ~ /^[a-zA-Z][0-9a-zA-Z_-]+: .*/) { + key = toupper($0); + sub(/:.*$/, "", key); + gsub(/-/, "_", key); + _HEADER[key] = $0; + sub(/^[^:]:[\t ]*/, "", _HEADER[key]); + sub(/[\t ]*\r?$/, "", _HEADER[key]); + } + } + CONTENT_LENGTH = _HEADER["CONTENT_LENGTH"]; + CONTENT_TYPE = _HEADER["CONTENT_TYPE"]; + + PATH_INFO = REQUEST_URI; gsub(/\?.*$/, "", PATH_INFO) + PATH_INFO = PATH( HEX_DECODE( "%", PATH_INFO ) ); + QUERY_STRING = REQUEST_URI; + if ( !gsub(/^[^?]+\?/, "", QUERY_STRING) ) QUERY_STRING = ""; + + # Set up _GET[]-Array + _cgilite_urldecode(QUERY_STRING, _GET); + + if ( _HEADER["CONTENT_TYPE"] == "application/x-www-form-urlencoded" \ + && _HEADER["CONTENT_LENGTH"] ) { + # Set up _POST[]-Array + + val = ""; key = "head -c " _HEADER["CONTENT_LENGTH"]; + while (key |getline) val = val $0; close(key); + _cgilite_urldecode(val, _POST); + } + + if ( _HEADER["COOKIE"] ) { + # Set up _COOKIE[]-Array + _cgilite_urldecode(_HEADER["COOKIE"], _COOKIE, "; ?"); + } + + if ( _HEADER["REFERER"] ) { + key = HEADER["REFERER"]; + if (! sub(/^[^\?]+?/, "", key)) key = "" + _cgilite_urldecode(key, _REF); + } + +} + +function _cgilite_headers() { + # Import request data from webserver environment variables +} + +BEGIN { + REQUEST_METHOD=""; REQUEST_URI=""; SERVER_PROTOCOL=""; + PATH_INFO=""; QUERY_STRING=""; CONTENT_LENGTH=""; CONTENT_TYPE=""; + split("", _GET); split("", _POST); split("", _REF); + split("", _HEADER); split("", _COOKIE); + + if ( ENVIRON["REQUEST_METHOD"] ) + _cgilite_headers(); + else + _cgilite_request(); +} diff --git a/cgilite/cgilite.sh b/cgilite/cgilite.sh index b51ee8e..b2467c3 100755 --- a/cgilite/cgilite.sh +++ b/cgilite/cgilite.sh @@ -1,22 +1,21 @@ #!/bin/sh -# Copyright 2017 - 2021 Paul Hänsch -# # This is CGIlite. # A collection of posix shell functions for writing CGI scripts. + +# Copyright 2017 - 2023 Paul Hänsch # -# CGIlite is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# CGIlite is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. # -# You should have received a copy of the GNU Affero General Public License -# along with CGIlite. If not, see . +# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. [ -n "$include_cgilite" ] && return 0 # guard set after webserver part @@ -85,8 +84,14 @@ HEX_DECODE(){ # will be copied to the output literally while [ "$in" ]; do + [ "$pfx" ] || case $in in + [0-9a-fA-F][0-9a-fA-F]*):;; + ?*) out="${out}${in%%"${in#?}"}" + in="${in#?}"; continue;; + esac + case $in in - "$pfx"[0-9a-fA-F][0-9a-fA-F]*) in="${in#${pfx}}";; + "$pfx"[0-9a-fA-F][0-9a-fA-F]*) in="${in#"${pfx}"}";; \\*) in="${in#?}"; out="${out}\\\\"; continue;; %*) in="${in#?}"; out="${out}%%"; continue;; *) att="${in%%"${pfx}"*}"; att="${att%%%*}"; att="${att%%\\*}" @@ -307,6 +312,7 @@ URL(){ \&*) out="${out}%26"; str="${str#?}";; \"*) out="${out}%22"; str="${str#?}";; \'*) out="${out}%27"; str="${str#?}";; + \`*) out="${out}%60"; str="${str#?}";; \?*) out="${out}%3F"; str="${str#?}";; \#*) out="${out}%23"; str="${str#?}";; \[*) out="${out}%5B"; str="${str#?}";; diff --git a/cgilite/common.css b/cgilite/common.css index 65c28f3..30c3942 100644 --- a/cgilite/common.css +++ b/cgilite/common.css @@ -29,6 +29,7 @@ a { font-style: italic; text-decoration: underline; color: #068; + word-break: break-word; } a.button { font-style: inherit; @@ -47,8 +48,19 @@ b, strong { font-weight: bolder; } tt, code, var, samp, kbd { font-family: monospace; } kbd { font-style: italic; } -ul, ol { margin-left: 1.125em; } +blockquote { + background-color: #EEE; + margin: .5em 0; + padding: 1em 2em; + white-space: pre-line; +} + +ul, ol { padding-left: 1.5em; } dl dt { font-weight: bolder; } +dl dd { + margin: 0 2em; + background-color: #EEE; +} table th { font-weight: bold; } li p + ul, li p + ol { @@ -68,7 +80,10 @@ h4, h5, h6, form legend { margin-bottom: .25em; } -h1 { font-size: 1.5em; } +h1 { + text-align: center; + font-size: 1.5em; +} h2 { font-size: 1.125em; } select, input, button, textarea, a.button { @@ -101,6 +116,7 @@ input + label { margin-left: .375em; } +input[type="search"] + button.search, input.search + button.search { width: 2.5em; color: transparent; @@ -109,7 +125,9 @@ input.search + button.search { border-left: none; border-radius: 0 2pt 2pt 0; white-space: nowrap; + overflow: hidden; } +input[type="search"] + button.search:before, input.search + button.search:before { content: '\1f50d'; color: #000; @@ -119,8 +137,6 @@ input.search + button.search:before { @media print { @page { margin: 20mm; } - h1 { text-align: center; } - h1, h2, h3, h4, h5, h6, form legend { page-break-inside: avoid; page-break-after: avoid; @@ -168,4 +184,8 @@ input[type=radio].tab ~ *.tab { box-shadow: .125em .125em .125em #888; } +/* Markdown line-block */ +.line-block { white-space: pre-wrap; } +.line-block br { display: none; } + /* ======= End Common Styles ======= */ diff --git a/cgilite/db23.sh b/cgilite/db23.sh new file mode 100755 index 0000000..e8a0d64 --- /dev/null +++ b/cgilite/db23.sh @@ -0,0 +1,114 @@ +#!/bin/sh + +# Copyright 2023, 2024 Paul Hänsch +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +[ -n "$include_db23" ] && return 0 +include_db23="$0" + +. "${_EXEC:-.}/cgilite/storage.sh" + +DB2() { + local call data file key val seq + data="${BR}${1}${BR}" call="$2" + shift 2 + + case $call in + new|discard) + printf '' + ;; + open|load) file="$1" + cat "$file" || return 1 + ;; + check|contains) key="$(STRING "$1")" val='' + val="${data##*"${BR}${key}" }" val="${val%%"${BR}"*}" + [ "$val" = '' ] && return 1 + ;; + count) key="$(STRING "$1")" val='' seq=0 + val="${data##*"${BR}${key}" }" val="${val%%"${BR}"*}" + [ "$val" = '' ] || val="${val} " + while [ "$val" != '' ]; do + seq=$((seq + 1)) val="${val#* }" + done + printf "%i\n" "$seq" + [ $seq = 0 ] && return 1 + ;; + get) key="$(STRING "$1")" seq="${2:-1}" + val="${data##*"${BR}${key}" }" val="${val%%"${BR}"*}" + [ "$val" = '' ] && return 1 || val="${val} " + while [ $seq -gt 1 ]; do + seq=$((seq - 1)) val="${val#* }" + done + [ "$val" = '' ] && return 1 + UNSTRING "${val%% *}" + ;; + iterate|raw) key="$(STRING "$1")" + val="${data##*"${BR}${key}" }" val="${val%%"${BR}"*}" + [ "$val" = '' ] && return 1 + printf '%s\n' $val + ;; + delete|remove) key="$(STRING "$1")" + val="${data#*"${BR}${key}" *"${BR}"}" + key="${data%"${BR}${key}" *"${BR}"*}" + [ "${key}${BR}${val}" = "${data}" ] && return 1 + printf '%s' "${key#"${BR}"}${BR}${val%"${BR}"}" + ;; + set|store) key="$(STRING "$1")" val="" + shift 1 + val="$(for v in "$@"; do STRING "$v"; printf \\t; done)" + if [ "${data#*"${BR}${key}" *}" != "$data" ]; then + data="${data%"${BR}${key}" *"${BR}"*}${BR}${key} ${val% }${BR}${data#*"${BR}${key}" *"${BR}"}" + data="${data#"${BR}"}" data="${data%"${BR}"}" + else + data="${data#"${BR}"}${key} ${val% }${BR}" + data="${data#"${BR}"}" + fi + printf %s\\n "${data}" + ;; + append) key="$(STRING "$1")" val="" + val="${data##*"${BR}${key}" }" val="${val%%"${BR}"*}" + if [ "$val" = '' ]; then + printf %s\\n "${data}" + return 1 + else + shift 1 + val="${val}$(for v in "$@"; do printf \\t; STRING "$v"; done)" + data="${data%"${BR}${key}" *"${BR}"*}${BR}${key} ${val% }${BR}${data#*"${BR}${key}" *"${BR}"}" + data="${data#"${BR}"}" data="${data%"${BR}"}" + printf %s\\n "${data}" + fi + ;; + flush|save|write) file="$1" + data="${data#"${BR}"}" data="${data%"${BR}"}" + printf '%s\n' "$data" >"$file" || return 1 + ;; + esac + return 0 +} + +DB3() { + # wrapper function that allows easyer use of DB2 + # by always keeping file data in $db3_data + + case "$1" in + new|discard|open|load|delete|remove|set|store|append) + db3_data="$(DB2 "$db3_data" "$@")" + return "$?" + ;; + get|count|check|contains|iterate|raw|flush|save|write) + DB2 "$db3_data" "$@" + return "$?" + ;; + esac +} diff --git a/cgilite/file.sh b/cgilite/file.sh index 0d1f4ea..c66b17d 100755 --- a/cgilite/file.sh +++ b/cgilite/file.sh @@ -1,21 +1,18 @@ #!/bin/sh -# Copyright 2016 - 2019 Paul Hänsch -# -# This file is part of cgilite. +# Copyright 2016 - 2024 Paul Hänsch # -# cgilite is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. # -# cgilite is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with cgilite. If not, see . +# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. [ -n "$include_fileserve" ] && return 0 include_fileserve="$0" @@ -48,24 +45,32 @@ file_type(){ } FILE(){ - local file file_size file_date http_date cachedate range mime - file="$1" mime="$2" + local file="$1" mime="$2" + local file_size file_date http_date cachedate range if ! [ -f "$file" ]; then printf 'Content-Length: 0\r\nStatus: 404 Not Found\r\n\r\n' - exit 0 + return 0 elif ! [ -r "$file" ]; then printf 'Content-Length: 0\r\nStatus: 403 Forbidden\r\n\r\n' - exit 0 + return 0 fi - file_size="$(stat -Lc %s "$file")" - file_date="$(stat -Lc %Y "$file")" + read file_size file_date <<-EOF + $(stat -Lc "%s %Y" "$file") + EOF http_date="$(date -ud "@$file_date" +"%a, %d %b %Y %T GMT")" + + [ ! "$HTTP_IF_MODIFIED_SINCE" -a "$cgilite_headers" ] \ + && HTTP_IF_MODIFIED_SINCE="$(HEADER If-Modified-Since)" + [ ! "$HTTP_RANGE" -a "$cgilite_headers" ] \ + && HTTP_RANGE="$(HEADER Range)" + cachedate="$( # Parse the allowable date formats from Section 3.3.1 of # https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html - HEADER If-Modified-Since \ + # HEADER If-Modified-Since \ + printf %s "$HTTP_IF_MODIFIED_SINCE" \ | sed -E 's;^[^ ]+, ([0-9]{2}) (...) ([0-9]{4}) (..:..:..) GMT$;\3-\2-\1 \4;; s;^[^ ]+, ([0-9]{2})-(...)-([789][0-9]) (..:..:..) GMT$;19\3-\2-\1 \4;; s;^[^ ]+, ([0-9]{2})-(...)-([0-6][0-9]) (..:..:..) GMT$;20\3-\2-\1 \4;; @@ -76,14 +81,25 @@ FILE(){ | xargs -r0 date +%s -ud 2>&- )" - range="$(HEADER Range |sed -nE 's;^bytes=([0-9]+-[0-9]*|-[0-9]+)$;\1;p;q;')" + range="${HTTP_RANGE#bytes=}" case "$range" in - *-) range="${range}$((file_size - 1))";; - -*) [ ${range#-} -le $file_size ] \ - && range="$((file_size - ${range#-}))-$((file_size - 1))" \ - || range="0-$((file_size - 1))";; - *-*) [ ${range#*-} -ge $file_size ] \ - && range="${range%-*}-$((file_size - 1))";; + *[!0-9]*-*|*-*[!0-9]*) + range="" + ;; + *-) + range="${range}$((file_size - 1))" + ;; + -*) + [ ${range#-} -le $file_size ] \ + && range="$((file_size - ${range#-}))-$((file_size - 1))" \ + || range="0-$((file_size - 1))" + ;; + *-*) + [ ${range#*-} -ge $file_size ] \ + && range="${range%-*}-$((file_size - 1))" + ;; + *) range="" + ;; esac if [ "$file_date" -lt "$cachedate" ] 2>&-; then diff --git a/cgilite/html-sh.sed b/cgilite/html-sh.sed index 8d7b61c..1a0f2b4 100755 --- a/cgilite/html-sh.sed +++ b/cgilite/html-sh.sed @@ -1,5 +1,19 @@ #!/bin/sed -nEf +# Copyright 2018 - 2019 Paul Hänsch +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + :Escapes s,\\\\,\\,g; s,\\&,\&,g; s,\\<,\<,g; s,\\>,\>,g; diff --git a/cgilite/json.sh b/cgilite/json.sh new file mode 100755 index 0000000..12afdc4 --- /dev/null +++ b/cgilite/json.sh @@ -0,0 +1,360 @@ +#!/bin/sh + +[ -n "$include_json" ] && return 0 +include_json="$0" + +. "${_EXEC:-.}/cgilite/db23.sh" + +# debug(){ [ $# -gt 0 ] && printf '%s\n' "$@" >&2 || tee -a /dev/stderr; } + +json_except() { + printf '%s\n' "$@" >&2; + printf 'Exc: %s\n' "$json_document" >&2 +} + +json_space() { + while true; do case "$json_document" in + [" ${BR}${CR} "]*) json_document="${json_document#?}";; + *) break ;; + esac; done +} + +json_string() { + local string json_document="$json_document" end=0 + + json_space + case $json_document in + \"*) json_document="${json_document#?}" + ;; + *) json_except "Expected string specifyer starting with (\")" + return 1 + ;; + esac + while [ "$json_document" ]; do case $json_document in + \\?*) + string="${string}${json_document%"${json_document#??}"}" + json_document="${json_document#??}" + ;; + \"*) + json_document="${json_document#?}" + end=1 + break + ;; + *) + string="${string}${json_document%"${json_document#?}"}" + json_document="${json_document#?}" + ;; + esac; done + + if [ $end -eq 0 ]; then + json_except "Document ended mid-string" + return 1 + fi + + printf "%s %s\n" "$(STRING "$string")" "$json_document" +} + +json_key() { + local key json_document="$json_document" + + json_space + case $json_document in + \"*) + key="$(json_string)" || return 1 + json_document="${key#* }" + key="${key%% *}" + ;; + *) json_except "Expected key specifyer starting with '\"'" + return 1 + ;; + esac + json_space + case $json_document in + :*) json_document="${json_document#?}" + ;; + *) json_except "Expected value separator \":\"" + return 1 + ;; + esac + + printf '%s %s\n' "$key" "$json_document" +} + +json_number() { + local number json_document="$json_document" + + json_space + number="${json_document%%[" ${BR}${CR} ,}]"]*}" + json_document="${json_document#"$number"}" + if ! number="$(printf %f "$number")"; then + json_except "Invalid number format" + return 1 + fi + + printf '%s %s\n' "${number%.000000}" "$json_document" +} + +json_array() { + local struct="$(DB2 "" new)" value json_document="$json_document" + + json_space + case $json_document in + "["*) json_document="${json_document#?}" + ;; + *) json_except "Expected array starting with \"[\"" + return 1 + ;; + esac + + json_space + case $json_document in + "]"*) + printf "%s %s\n" "" "${json_document#?}" + return 0 + ;; + esac + + while :; do + json_space + + value="$(json_value)" || return 1 + json_document="${value#* }" + value="$(UNSTRING "${value%% *}")" + + struct="$(DB2 "$struct" append "@" "$value")" \ + || struct="$(DB2 "$struct" set "@" "$value")" + + json_space + case $json_document in + ,*) json_document="${json_document#?}" + ;; + "]"*) json_document="${json_document#?}" + break + ;; + *) json_except "Unexpected character mid-array" + return 1 + ;; + esac + done + + printf "%s %s\n" "$(STRING "$struct")" "$json_document" +} + +json_object() { + local struct="$(DB2 "" new)" key value json_document="$json_document" + + json_space + case $json_document in + "{"*) json_document="${json_document#?}" + ;; + *) json_except "Expected object starting with \"{\"" + return 1 + ;; + esac + + json_space + case $json_document in + "}"*) + printf "%s %s\n" "" "${json_document#?}" + return 0 + ;; + esac + + while :; do + json_space + + key="$(json_key)" || return 1 + json_document="${key#* }" + key="$(UNSTRING "${key%% *}")" + + value="$(json_value)" || return 1 + json_document="${value#* }" + value="$(UNSTRING "${value%% *}")" + + struct="$(DB2 "$struct" set "$key" "$value")" + + json_space + case $json_document in + ,*) json_document="${json_document#?}" + ;; + "}"*) json_document="${json_document#?}" + break + ;; + *) json_except "Unexpected character mid-object" + return 1 + ;; + esac + done + + printf "%s %s\n" "$(STRING "$struct")" "$json_document" +} + +json_value() { + local value json_document="$json_document" + json_type="" + + json_space + case $json_document in + \"*) + value="$(json_string)" || return 1 + json_document="${value#* }" + value="str:${value%% *}" + json_type=string + ;; + [+-.0-9]*) + value="$(json_number)" || return 1 + json_document="${value#* }" + value="num:${value%% *}" + json_type=number + ;; + "{"*) + value="$(json_object)" || return 1 + json_document="${value#* }" + value="obj:${value%% *}" + json_type=object + ;; + "["*) + value="$(json_array)" || return 1 + json_document="${value#* }" + value="arr:${value%% *}" + json_type=array + ;; + null*) + json_document="${json_document#null}" + value="null" + json_type=null + ;; + true*) + json_document="${json_document#true}" + value="true" + json_type=boolean + ;; + false*) + json_document="${json_document#false}" + value="false" + json_type=boolean + ;; + esac + + printf "%s %s\n" "$value" "$json_document" +} + +json_load() { + local json_document="$1" json + + json_value |UNSTRING +} + +json_get() { + local json="$1" jpath="${2#.}" key idx + json_type='' + + case $json in + str:*) json_type="string";; + arr:*) json_type="array";; + obj:*) json_type="object";; + num:*) json_type="number";; + true|false) + json_type="boolean";; + null) json_type="null";; + esac + + case $jpath in + "") + printf %s\\n "${json#???:}" + return 0 + ;; + "["[0-9]*"]"*) + idx="${jpath%%"]"*}" idx="${idx#"["}" + jpath="${jpath#"["*"]"}" + ;; + "['"*"']"*) + key="${jpath%%"']"*}" key="${key#"['"}" + jpath="${jpath#"['"*"']"}" + ;; + "$"*) + jpath="${jpath#?}" + ;; + *) key="${jpath%%[".["]*}" + jpath="${jpath#"$key"}" + ;; + esac + + if [ "$key" -a "$json_type" = object ]; then + if ! json="$(DB2 "${json#obj:}" get "$key")"; then + debug "Key not found: \"$key\"" + return 1 + fi + elif [ "$idx" -a "$json_type" = array ]; then + if ! json="$(DB2 "${json#arr:}" get @ "$(( idx + 1 ))")"; then + debug "Array index not found: \"$idx\"" + return 1 + fi + elif [ "$key" ]; then + debug "Cannot select key (\"$key\") from value of type \"$json_type\"" + return 1 + elif [ "$idx" ]; then + debug "Cannot select index ($idx) from value of type \"$json_type\"" + return 1 + fi + json_get "$json" "$jpath" + return $? +} + +json_dump_string() { + local in="$1" out='' + while [ "$in" ]; do case $in in + \\*) out="${out}\\\\"; in="${in#\\}" ;; + "$BR"*) out="${out}\\n"; in="${in#${BR}}" ;; + "$CR"*) out="${out}\\r"; in="${in#${CR}}" ;; + " "*) out="${out}\\t"; in="${in# }" ;; + \"*) out="${out}\\\""; in="${in#\"}" ;; + *) out="${out}${in%%[\\${CR}${BR} \"]*}"; in="${in#"${in%%[\\${BR}${CR} \"]*}"}" ;; + esac; done + printf '"%s"' "${out}" +} + +json_dump_array() { + local json="$1" value out='' + + for value in $(DB2 "$json" iterate @); do + out="${out},$(json_dump "$(UNSTRING "$value")")" + done + printf '[%s]' "${out#,}" +} + +json_dump_object() { + local json="$1" key value out='' + + while read -r key value; do + out="${out},$(json_dump_string "$(UNSTRING "$key")"):$(json_dump "$(UNSTRING "$value")")" + done <<-EOF + ${json} + EOF + printf '{%s}' "${out#,}" +} + +json_dump() { + local json="$1" + + case $json in + str:*) + json_dump_string "${json#str:}" + ;; + arr:*) + json_dump_array "${json#arr:}" + ;; + obj:*) + json_dump_object "${json#obj:}" + ;; + num:*) + printf "${json#num:}" + ;; + true|false|null) + printf %s\\n "$json" + ;; + *) + json_dump_string "${json}" + ;; + esac +} diff --git a/cgilite/markdown.awk b/cgilite/markdown.awk index 44d4e0d..34879d2 100755 --- a/cgilite/markdown.awk +++ b/cgilite/markdown.awk @@ -5,6 +5,20 @@ # Meant to support all features of John Grubers basic Markdown # + a number of common extensions, mostly inspired by Pandoc Markdown +# Copyright 2021 - 2023 Paul Hänsch +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + # Supported Features / TODO: # ========================== # [x] done [ ] todo [-] not planned ? unsure @@ -38,18 +52,24 @@ # # Extensions - Block elements: # ---------------------------- +# - [x] Automatic
-wrapping (custom) # - ? Heading identifiers (php md, pandoc) +# - [x] Heading attributes (custom) +# - [ ]
terminates section # - [x] Automatic heading identifiers (custom) # - [x] Fenced code blocks (php md, pandoc) # - [x] Fenced code attributes +# - [x] Images (as block elements,
-wrapped) (custom) +# - [x] reference style block images # - [/] Tables # - ? Simple table (pandoc) # - ? Multiline table (pandoc) # - [x] Grid table (pandoc) -# - [x] Pipe table (php md pandoc) +# - [x] Headerless +# - [x] Pipe table (php md, pandoc) # - [x] Line blocks (pandoc) # - [x] Task lists (pandoc, custom) -# - [ ] Definition lists (php md, pandoc) +# - [x] Definition lists (php md, pandoc) # - [-] Numbered example lists (pandoc) # - [-] Metadata blocks (pandoc) # - [x] Metadata blocks (custom) @@ -62,7 +82,7 @@ # - [x] ^Superscript^ ~Subscript~ (pandoc) # - [-] Bracketed spans (pandoc) # - [-] Inline attributes (pandoc) -# - [x] Image attributes (custom, pandoc inspired, inline only) +# - [x] Image attributes (custom, pandoc inspired, not for reference style) # - [x] Wiki style links [[PageName]] / [[PageName|Link Text]] # - [-] TEX-Math (pandoc) # - ? Footnotes (php md) @@ -72,7 +92,7 @@ # - ? ... three-dot ellipsis (smartypants) # - [-] en-dash (smartypants) # - [ ] Automatic em-dash / en-dash -# - [ ] Automatic -> Arrows <- +# - [x] Automatic -> Arrows <- (custom) function debug(text) { printf "\n---\n%s\n---\n", text > "/dev/stderr"; } @@ -86,24 +106,32 @@ function HTML ( text ) { return text; } -function inline( line, LOCAL, len, code, href, guard ) { - nu = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\_]|_[[:alnum:]])*" # not underline (except when escaped) - na = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\\\*])*" # not asterisk (except when escaped) - ieu = "_([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])_" # inner (underline) - isu = "__([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])__" # inner (underline) - iea = "\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*" # inner (asterisk) - isa = "\\*\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*\\*" # inner (asterisk) +function URL ( text, sharp ) { + gsub( /&/, "%26", text ); + gsub( /"/, "%22", text ); + gsub( /'/, "%27", text ); + gsub( /`/, "%60", text ); + gsub( /\?/, "%3F", text ); + if (sharp) gsub( /#/, "%23", text ); + gsub( /\[/, "%5B", text ); + gsub( /\]/, "%5D", text ); + gsub( / /, "%20", text ); + gsub( / /, "%09", text ); + gsub( /\\/, "%5C", text ); + return text; +} +function inline( line, LOCAL, len, text, code, href, guard ) { if ( line ~ /^$/ ) { # Recursion End return ""; - # omit processing of escaped characters - } else if ( line ~ /^\\[]\\`\*_\{\}\(\)#\+-\.![]/) { - return substr(line, 2, 1) inline( substr(line, 3) ); + # omit processing of escaped characters + } else if ( line ~ /^\\./) { + return HTML(substr(line, 2, 1)) inline( substr(line, 3) ); # hard brakes } else if ( match(line, /^ \n/) ) { - return "
\n" inline( substr(line, RLENGTH + 1) ); + return "
\n" inline( substr(line, RLENGTH + 1) ); # ``code spans`` } else if ( match( line, /^`+/) ) { @@ -113,17 +141,22 @@ function inline( line, LOCAL, len, code, href, guard ) { code = substr( line, len + 1, match( substr(line, len + 1), guard ) - 1) len = 2 * length(guard) + length(code) # strip single surrounding white spaces - code = gensub( / (.*) /, "\\1", "1" , code) + gsub( /^ | $/, "", code) # escape HTML within code span gsub( /&/, "\\&", code ); gsub( //, "\\>", code ); return "" code "" inline( substr( line, len + 1 ) ) } + # Macros + } else if ( match( line, /^<<([^>]|>[^>])+>>/ ) ) { + len = RLENGTH; + return "" HTML( substr( line, 3, len - 4 ) ) "" inline(substr(line, len + 1)); + # Wiki style links - } else if ( match( line, /^\[\[([^\]\|]+)(\|([^\]]+))?\]\]/) ) { + } else if ( match( line, /^\[\[([^]|]+)(\|[^]]+)?\]\]/) ) { len = RLENGTH; - href = gensub(/^\[\[([^\]\|]+)(\|([^\]]+))?\]\]/, "\\1", 1, substr(line, 1, len) ); - text = gensub(/^\[\[([^\]\|]+)(\|([^\]]+))?\]\]/, "\\3", 1, substr(line, 1, len) ); + href = gensub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\1", 1, substr(line, 1, len) ); + text = gensub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\3", 1, substr(line, 1, len) ); if ( ! text ) text = href; return "" HTML(text) "" inline( substr( line, len + 1) ); @@ -139,17 +172,28 @@ function inline( line, LOCAL, len, code, href, guard ) { href = HTML( substr( line, 2, len - 2) ); return "" href "" inline( substr( line, len + 1) ); + # Verbatim inline HTML + } else if ( AllowHTML && match( line, /^(|<\?([^\?]|\?[^>])*\?>|]*>|])*\]\]>|<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)/) ) { + len = RLENGTH; + return substr( line, 1, len) inline(substr(line, len + 1)); + # inline links - } else if ( match(line, /^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/) ) { + } else if ( match(line, "^" lii "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)") ) { len = RLENGTH; - text = gensub(/^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, "\\1", 1, substr(line, 1, len) ); - href = gensub(/^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, "\\2", 1, substr(line, 1, len) ); - title = gensub(/^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, "\\4", 1, substr(line, 1, len) ); - if ( title ) { - return "" inline( text ) "" inline( substr( line, len + 1) ); - } else { - return "" inline( text ) "" inline( substr( line, len + 1) ); - } + text = href = title = substr( line, 1, len); + sub("^\\[", "", text); sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)$", "", text); + sub("^" lii "\\([\n\t ]*", "", href); sub("([\n\t ]+" lit ")?[\n\t ]*\\)$", "", href); + sub("^" lii "\\([\n\t ]*" lid, "", title); sub("[\n\t ]*\\)$", "", title); sub("^[\n\t ]+", "", title); + + if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } + if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } + else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); } + else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); } + + gsub(/\\/, "", href); gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title); + + return "" \ + inline( text ) "" inline( substr( line, len + 1) ); # reference style links } else if ( match(line, /^\[([^]]+)\] ?\[([^]]*)\]/ ) ) { @@ -166,32 +210,46 @@ function inline( line, LOCAL, len, code, href, guard ) { } # inline images - } else if ( match(line, /^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/) ) { - len = RLENGTH; - text = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\1", "g", substr(line, 1, len) ); - href = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\2", "g", substr(line, 1, len) ); - title = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\4", "g", substr(line, 1, len) ); - attrib = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\6", "g", substr(line, 1, len) ); - if ( title && attrib ) { - return "\""" inline( substr( line, len + 1) ); - } else if ( title ) { - return "\""" inline( substr( line, len + 1) ); - } else if ( attrib ) { - return "\""" inline( substr( line, len + 1) ); - } else { - return "\""" inline( substr( line, len + 1) ); - } + } else if ( match(line, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?") ) { + len = RLENGTH; text = href = title = attrib = substr( line, 1, len); + + sub("^!\\[", "", text); + sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", text); + + sub("^!" lix "\\([\n\t ]*", "", href); + sub("([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", href); + + sub("^!" lix "\\([\n\t ]*" lid, "", title); + sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", title); + sub("^[\n\t ]+", "", title); + + sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib); + sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib); + + if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } + if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } + else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); } + else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); } + + gsub(/^[\t ]+$/, "", text); gsub(/\\/, "", href); + gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title); + + return "\""" inline( substr( line, len + 1) ); # reference style images - } else if ( match(line, /^!\[([^]]+)\] ?\[([^]]*)\]/ ) ) { + } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\]/ ) ) { len = RLENGTH; - text = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) ); - id = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); + text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) ); + id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); if ( ! id ) id = text; if ( rl_href[id] && rl_title[id] ) { - return "\""" inline( substr( line, len + 1) ); + return "\""" \ + inline( substr( line, len + 1) ); } else if ( rl_href[id] ) { - return "\""" inline( substr( line, len + 1) ); + return "\""" \ + inline( substr( line, len + 1) ); } else { return "" HTML(substr(line, 1, len)) inline( substr(line, len + 1) ); } @@ -245,21 +303,19 @@ function inline( line, LOCAL, len, code, href, guard ) { len = RLENGTH; return "" inline( substr( line, 2, len - 2 ) ) "" inline( substr( line, len + 1 ) ); - # Macros - } else if ( AllowMacros && match( line, /^<<([^>]|>[^>])+>>/) ) { - len = RLENGTH; - return macro( substr( line, 3, len - 4 ) ) inline(substr(line, len + 1)); - - # Verbatim inline HTML - } else if ( AllowHTML && match( line, /^(|<\?([^\?]|\?[^>])*\?>|]*>|])*\]\]>|<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)/) ) { - len = RLENGTH; - return substr( line, 1, len) inline(substr(line, len + 1)); - # Literal HTML entities } else if ( match( line, /^&([a-zA-Z]{2,32}|#[0-9]{1,7}|#[xX][0-9a-fA-F]{1,6});/) ) { len = RLENGTH; return substr( line, 1, len ) inline(substr(line, len + 1)); + # Arrows + } else if ( line ~ /^-->( |$)/) { # ignore multidash-arrow + return "-->" inline( substr(line, 4) ); + } else if ( line ~ /^<-( |$)/) { + return "←" inline( substr(line, 3) ); + } else if ( line ~ /^->( |$)/) { + return "→" inline( substr(line, 3) ); + # Escape lone HTML character } else if ( match( line, /^[&<>"']/) ) { return HTML(substr(line, 1, 1)) inline(substr(line, 2)); @@ -270,8 +326,43 @@ function inline( line, LOCAL, len, code, href, guard ) { } } -function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib ) { - gsub( /^\n+|\n+$/, "", block ); +function headline( hlvl, htxt, attrib, LOCAL, sec, n, HL) { + match(hstack, /([0-9]+( [0-9]+){5})$/); split( substr(hstack, RSTART), HL); + + for ( n = hlvl; n <= 6; n++ ) { sec = sec (HL[n]?"
":""); } + HL[hlvl]++; for ( n = hlvl + 1; n <= 6; n++) { HL[n] = 0;} + + hid = ""; for ( n = 2; n <= blvl; n++) { hid = hid BL[n] "/"; } + hid = hid HL[1]; for ( n = 2; n <= hlvl; n++) { hid = hid "." HL[n] ; } + hid = hid ":" URL(htxt, 1); + + sub(/([0-9]+( [0-9]+){5})$/, "", hstack); + hstack = hstack HL[1] " " HL[2] " " HL[3] " " HL[4] " " HL[5] " " HL[6]; + + return sec "
" \ + "" inline( htxt ) \ + "" \ + "\n"; +} + +# Nested Block, resets heading counters +function _nblock( block, LOCAL, sec, n ) { + hstack = hstack " 0 0 0 0 0 0"; + + # Block Level + blvl++; BL[blvl]++; + for ( n = blvl + 1; n in BL; n++) { delete BL[n]; } + + block = _block( block ); + match(hstack, /([0-9]+( [0-9]+){5})$/); split( substr(hstack, RSTART), HL); + sec = ""; for ( n = 1; n <= 6; n++ ) { sec = sec (HL[n]?"
":""); } + + sub("( +[0-9]+){6} *$", "", hstack); blvl--; + return block sec; +} + +function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, indent, list ) { + gsub( "(^\n+|\n+$)", "", block ); if ( block == "" ) { return ""; @@ -300,7 +391,7 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib # Metadata (custom, block starting with %something) # Metadata is ignored but can be interpreted externally - } else if ( match(block, /^%[a-zA-Z]+([[:space:]][^\n]*)?(\n|$)(%[a-zA-Z]+([[:space:]][^\n]*)?(\n|$)|%([[:space:]][^\n]*)?(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) { + } else if ( match(block, /^%[a-zA-Z-]+([[:space:]][^\n]*)?(\n|$)(%[a-zA-Z-]+([[:space:]][^\n]*)?(\n|$)|%([[:space:]][^\n]*)?(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) { len = RLENGTH; st = RSTART; return _block( substr( block, len + 1) ); @@ -308,12 +399,13 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib } else if ( match( block, /^> /) ) { match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match(block, /$/); len = RLENGTH; st = RSTART; - return "
\n" _block( gensub( /(^|\n)> /, "\n", "g", substr(block, 1, st - 1) ) ) "
\n\n" \ - _block( substr(block, st + len) ); + text = substr(block, 1, st - 1); gsub( /(^|\n)> /, "\n", text ); + text = _nblock( text ); gsub( /^\n|\n$/, "", text ) + return "
" text "
\n\n" _block( substr(block, st + len) ); # Pipe Tables (pandoc / php md / gfm ) } else if ( match(block, "^((\\|)?([^\n]+\\|)+[^\n]+(\\|)?)\n" \ - "((\\|)?:?(-+:?[\\|+])+:?-+:?(\\|)?)\n" \ + "((\\|)?(:?-+:?[\\|+])+:?-+:?(\\|)?)\n" \ "((\\|)?([^\n]+\\|)+[^\n]+(\\|)?(\n|$))+" ) ) { len = RLENGTH; st = RSTART; #initialize empty arrays @@ -359,46 +451,65 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib return "" ttext "
\n" _block(block); # Grid Tables (pandoc) - } else if ( match(block, "^\\+(-+\\+)+\n" \ - "(\\|([^\n]+\\|)+\n)+" \ - "\\+(:?=+:?\\+)+\n" \ - "((\\|([^\n]+\\|)+\n)+" \ - "\\+(-+\\+)+(\n|$))+" \ - ) ) { + # (with, and without header) + } else if ( match( block, "^\\+(-+\\+)+\n" \ + "(\\|([^\n]+\\|)+\n)+" \ + "(\\+(:?=+:?\\+)+)\n" \ + "((\\|([^\n]+\\|)+\n)+" \ + "\\+(-+\\+)+(\n|$))+" \ + ) || \ + match( block, "^()()()" \ + "(\\+(:?-+:?\\+)+)\n" \ + "((\\|([^\n]+\\|)+\n)+" \ + "\\+(-+\\+)+(\n|$))+" \ + ) ) { len = RLENGTH; st = RSTART; #initialize empty arrays split("", talign); split("", tarray); split("", tread); cols = 0; cnt=0; ttext = ""; - # table header and alignment - block = substr(block, match(block, /(\n|$)/) + 1 ); - while ( match(block, "^\\|([^\n]+\\|)+\n") ) { - cols = split( gensub( /(^\||\|$)/, "", "g", \ - gensub( /(^|[^\\])\\\|/, "\\1\\|", "g", \ - substr(block, 1, match(block, /(\n|$)/)) \ - )), tread, /\|/); - block = substr(block, match(block, /(\n|$)/) + 1 ); - for (cnt = 1; cnt < cols; cnt++) - tarray[cnt] = tarray[cnt] "\n" tread[cnt]; - } + # Column Count + cols = split( gensub( "^(\\+(:?-+:?\\+)+)(\n.*)*$", "\\1", 1, block), tread, /\+/) - 2; + # debug(" Cols: " gensub( "^(\\+(:?-+:?\\+)+)(\n.*)*$", "\\1", 1, block )); - cols = split( \ - gensub( /(^\+|\+$)/, "", "g", \ - substr(block, 1, match(block, /(\n|$)/)) \ - ), talign, /\+/); - block = substr(block, match(block, /(\n|$)/) + 1 ); + # table alignment + split( gensub( "^(.*\n)?\\+((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)(\n.*)$", "\\2", "g", block ), talign, /\+/ ); + # debug("Align: " gensub( "^(.*\n)?\\+((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)(\n.*)$", "\\2", "g", block )); - for (cnt = 1; cnt < cols; cnt++) { - if (match(talign[cnt], /:=+:/)) talign[cnt]="center"; - else if (match(talign[cnt], /=+:/)) talign[cnt]="right"; - else if (match(talign[cnt], /:=+/ )) talign[cnt]="left"; + for (cnt = 1; cnt <= cols; cnt++) { + if (match(talign[cnt], /:(-+|=+):/)) talign[cnt]="center"; + else if (match(talign[cnt], /(-+|=+):/)) talign[cnt]="right"; + else if (match(talign[cnt], /:(-+|=+)/ )) talign[cnt]="left"; else talign[cnt]=""; } - ttext = "\n" - for (cnt = 1; cnt < cols; cnt++) - ttext = ttext "" _block(tarray[cnt]) "" - ttext = ttext "\n\n" + if ( match(block, "^\\+(-+\\+)+\n" \ + "(\\|([^\n]+\\|)+\n)+" \ + "\\+(:?=+:?\\+)+\n" \ + "((\\|([^\n]+\\|)+\n)+" \ + "\\+(-+\\+)+(\n|$))+" \ + ) ) { + # table header + block = substr(block, match(block, /(\n|$)/) + 1 ); + while ( match(block, "^\\|([^\n]+\\|)+\n") ) { + split( gensub( /(^\||\|$)/, "", "g", \ + gensub( /(^|[^\\])\\\|/, "\\1\\|", "g", \ + substr(block, 1, match(block, /(\n|$)/)) \ + )), tread, /\|/); + block = substr(block, match(block, /(\n|$)/) + 1 ); + for (cnt = 1; cnt <= cols; cnt++) + tarray[cnt] = tarray[cnt] "\n" tread[cnt]; + } + + ttext = "\n" + for (cnt = 1; cnt <= cols; cnt++) + ttext = ttext "" _nblock(tarray[cnt]) "" + ttext = ttext "\n" + } + + # table body + block = substr(block, match(block, /(\n|$)/) + 1 ); + ttext = ttext "\n" while ( match(block, /^((\|([^\n]+\|)+\n)+\+(-+\+)+(\n|$))+/ ) ){ split("", tarray); @@ -408,27 +519,27 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib substr(block, 1, match(block, /(\n|$)/)) \ )), tread, /\|/); block = substr(block, match(block, /(\n|$)/) + 1 ); - for (cnt = 1; cnt < cols; cnt++) + for (cnt = 1; cnt <= cols; cnt++) tarray[cnt] = tarray[cnt] "\n" tread[cnt]; } block = substr(block, match(block, /(\n|$)/) + 1 ); ttext = ttext "" - for (cnt = 1; cnt < cols; cnt++) - ttext = ttext "" _block(tarray[cnt]) "" + for (cnt = 1; cnt <= cols; cnt++) + ttext = ttext "" _nblock(tarray[cnt]) "" ttext = ttext "\n" } - return "" ttext "
\n" _block(block); + return "" ttext "
\n" _nblock(block); # Line Blocks (pandoc) } else if ( match(block, /^\| [^\n]*(\n|$)(\| [^\n]*(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) { len = RLENGTH; st = RSTART; - code = substr(block, 1, len); - gsub(/\n[[:space:]]+/, " ", code); - gsub(/\n\| /, "\n", code); - gsub(/^\| |\n$/, "", code); - return "
" gensub(/\n/, "
\n", "g", inline( code )) "
\n" \ - _block( substr( block, len + 1) ); + + text = substr(block, 1, len); gsub(/\n[[:space:]]+/, " ", text); + gsub(/\n\| /, "\n", text); gsub(/^\| |\n$/, "", text); + text = inline(text); gsub(/\n/, "
\n", text); + + return "
" text "
\n" _block( substr( block, len + 1) ); # Indented Code Block } else if ( match(block, /^( |\t)( *\t*[^ \t\n]+ *\t*)+(\n|$)(( |\t)[^\n]+(\n|$)|[ \t]*(\n|$))*/) ) { @@ -442,13 +553,13 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib # Fenced Divs (pandoc, custom) } else if ( match( block, /^(:::+)/ ) ) { guard = substr( block, 1, RLENGTH ); - code = gensub(/^[^\n]+\n/, "", 1, block); + code = block; sub(/^[^\n]+\n/, "", code); attrib = gensub(/^:::+[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\1", 1, block); gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); if ( match(code, "(^|\n)" guard "+(\n|$)" ) ) { len = RLENGTH; st = RSTART; - return "
" _block( substr(code, 1, st - 1) ) "
\n" \ + return "
" _nblock( substr(code, 1, st - 1) ) "
\n" \ _block( substr( code, st + len ) ); } else { match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match( block, /$/ ); @@ -475,60 +586,147 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib _block( substr(block, st + len) ); } - # Unordered list - } else if ( match( block, "^ ? ? ?[-+*][ \t]+[^\n]+(\n|$)" \ - "(([ \t]*\n)* ? ? ?[-+*][ \t]+[^\n]+(\n|$)" \ - "|([ \t]*\n)*( ? ? ?\t| +)[^\n]+(\n|$)" \ - "|[^\n]+(\n|$))*" ) ) { - list = substr( block, 1, RLENGTH); - block = substr( block, RLENGTH + 1); - indent = length( gensub(/[-+*][ \t]+[^\n]+.*$/, "", 1, list) ); - - gsub("(^|\n) {0," indent "}", "\n", list); - return "\n
    \n" _list( substr(list, 2) ) "
\n" _block( block ); - - # Ordered list - } else if ( match( block, "^ ? ? ?([0-9]+|#)\\.[ \t]+[^\n]+(\n|$)" \ - "(([ \t]*\n)* ? ? ?([0-9]+|#)\\.[ \t]+[^\n]+(\n|$)" \ - "|([ \t]*\n)*( ? ? ?\t| +)[^\n]+(\n|$)" \ - "|[^\n]+(\n|$))*" ) ) { - list = substr( block, 1, RLENGTH); - block = substr( block, RLENGTH + 1); - indent = length( gensub(/([0-9]+|#)\.[ \t]+[^\n]+.*$/, "", 1, list) ); - - gsub("(^|\n) {0," indent "}", "\n", list); - return "\n
    \n" _list( substr(list, 2) ) "
\n" _block( block ); - - # First Order Heading - } else if ( match( block, /^[^\n]+\n===+(\n|$)/ ) ) { - len = RLENGTH; - HL[1]++; HL[2] = 0; HL[3] = 0; HL[4] = 0; HL[5] = 0; HL[6] = 0; - return "

" \ - inline( gensub( /\n.*$/, "", "g", block ) ) \ - "

\n\n" \ - _block( substr( block, len + 1 ) ); + # First Order Heading H1 + Attrib + } else if ( match( block, /^([^\n]+)([ \t]*\{([^\}\n]+)\})\n===+(\n|$)/ ) ) { + len = RLENGTH; text = attrib = block; + sub(/([ \t]*\{([^\}\n]+)\})\n===+(\n.*)?$/, "", text); + sub(/\}\n===+(\n.*)?$/, "", attrib); sub(/^([^\n]+)[ \t]*\{/, "", attrib); + gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); - # Second Order Heading - } else if ( match( block, /^[^\n]+\n---+(\n|$)/ ) ) { - len = RLENGTH; - HL[2]++; HL[3] = 0; HL[4] = 0; HL[5] = 0; HL[6] = 0; - return "

" \ - inline( gensub( /\n.*$/, "", "g", block ) ) \ - "

\n\n" \ + return headline(1, text, attrib) _block( substr( block, len + 1 ) ); + + # First Order Heading H1 + } else if ( match( block, /^([^\n]+)\n===+(\n|$)/ ) ) { + len = RLENGTH; text = substr(block, 1, len); + sub(/\n===+(\n.*)?$/, "", text); + + return headline(1, text, 0) _block( substr( block, len + 1 ) ); + + # Second Order Heading H2 + Attrib + } else if ( match( block, /^([^\n]+)([ \t]*\{([^\}\n]+)\})\n---+(\n|$)/ ) ) { + len = RLENGTH; text = attrib = block; + sub(/([ \t]*\{([^\}\n]+)\})\n---+(\n.*)?$/, "", text); + sub(/\}\n---+(\n.*)?$/, "", attrib); sub(/^([^\n]+)[ \t]*\{/, "", attrib); + gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); + + return headline(2, text, attrib) _block( substr( block, len + 1) ); + + # Second Order Heading H2 + } else if ( match( block, /^([^\n]+)\n---+(\n|$)/ ) ) { + len = RLENGTH; text = substr(block, 1, len); + sub(/\n---+(\n.*)?$/, "", text); + + return headline(2, text, 0) _block( substr( block, len + 1) ); + + # Nth Order Heading H1 H2 H3 H4 H5 H6 + Attrib + } else if ( match( block, /^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n|$)/ ) ) { + len = RLENGTH; text = attrib = substr(block, 1, len); + match(block, /^#{1,6}/); n = RLENGTH; + + sub(/^(#{1,6})[ \t]*/, "", text); sub(/[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n.*)?$/, "", text); + sub(/^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*[ \t]*\{/, "", attrib); + sub(/\})(\n.*)?$/, "", attrib); + gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); + + return headline( n, text, attrib ) _block( substr( block, len + 1) ); + + # Nth Order Heading H1 H2 H3 H4 H5 H6 + } else if ( match( block, /^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*(\n|$)/ ) ) { + len = RLENGTH; text = substr(block, 1, len); + match(block, /^#{1,6}/); n = RLENGTH; + sub(/^(#{1,6})[ \t]*/, "", text); sub(/[ \t]*#*(\n.*)?$/, "", text); + + return headline( n, text, 0 ) _block( substr( block, len + 1) ); + + # block images (wrapped in
) + } else if ( match(block, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n|$)") ) { + len = RLENGTH; text = href = title = attrib = substr( block, 1, len); + + sub("^!\\[", "", text); + sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", text); + + sub("^!" lix "\\([\n\t ]*", "", href); + sub("([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", href); + + sub("^!" lix "\\([\n\t ]*" lid, "", title); + sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", title); + sub("^[\n\t ]+", "", title); + + sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib); + sub("(\n.*)?$", "", attrib); + sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib); + + if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } + if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } + else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); } + else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); } + + gsub(/^[\t ]+$/, "", text); gsub(/\\/, "", href); + + return "
" \ + "\""" \ + (title?"
" inline(title) "
":"") \ + "
\n\n" \ _block( substr( block, len + 1) ); - # Nth Order Heading - } else if ( match( block, /^#{1,6}[ \t]*[^\n]+([ \t]*#*)(\n|$)/ ) ) { + # reference style images (block) + } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\](\n|$)/ ) ) { len = RLENGTH; - hlvl = length( gensub( /^(#{1,6}).*$/, "\\1", "g", block ) ); - htxt = gensub(/^#{1,6}[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[^\n#])+)([ \t]*#*)(\n.*)?$/, "\\1", 1, block); - HL[hlvl]++; for ( n = hlvl + 1; n < 7; n++) { HL[n] = 0;} - hid = HL[1]; for ( n = 2; n <= hlvl; n++) { hid = hid "." HL[n] ; } - return "" inline( htxt ) \ - "\n\n" \ - _block( substr( block, len + 1) ); + text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\1", 1, block); + id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\2", 1, block); + if ( ! id ) id = text; + if ( rl_href[id] && rl_title[id] ) { + return "
" \ + "\""" \ + "
" inline(rl_title[id]) "
" \ + "
\n\n" \ + _block( substr( block, len + 1) ); + } else if ( rl_href[id] ) { + return "
" \ + "\""" \ + "
\n\n" \ + _block( substr( block, len + 1) ); + } else { + return "

" HTML(substr(block, 1, len)) "

\n" _block( substr(block, len + 1) ); + } + + # Macros (standalone <> calls handled as block, so they are not wrapped in paragraph) + } else if ( match( block, /^<<(([^>]|>[^>])+)>>(\n|$)/ ) ) { + len = RLENGTH; + text = gensub(/^<<(([^>]|>[^>])+)>>(\n.*)?$/, "\\1", 1, block); + return "" HTML(text) "" _block(substr(block, len + 1) ); + + # Definition list + } else if (match( block, "^(([ \t]*\n)*[^:\n \t][^\n]+\n" \ + "([ \t]*\n)* ? ? ?:[ \t][^\n]+(\n|$)" \ + "(([ \t]*\n)* ? ? ?:[ \t][^\n]+(\n|$)" \ + "|[^:\n \t][^\n]+(\n|$)" \ + "|( ? ? ?\t| +)[^\n]+(\n|$)" \ + "|([ \t]*\n)+( ? ? ?\t| +)[^\n]+(\n|$))*)+" \ + )) { + list = substr( block, 1, RLENGTH); block = substr( block, RLENGTH + 1); + return "\n
\n" _dlist( list ) "
\n" _block( block ); + + # Unordered list types + } else if ( text = _startlist( block, "ul", "-", "([+*•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + return text; + } else if ( text = _startlist( block, "ul", "\\+", "([-*•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + return text; + } else if ( text = _startlist( block, "ul", "\\*", "([-+•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + return text; + } else if ( text = _startlist( block, "ul", "•", "([-+*]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + return text; + + # Ordered list types + } else if ( text = _startlist( block, "ol", "[0-9]+\\.", "([-+*•]|#\\.|[0-9]+\\)|#\\))") ) { + return text; + } else if ( text = _startlist( block, "ol", "[0-9]+\\)", "([-+*•]|[0-9]+\\.|#\\.|#\\))") ) { + return text; + } else if ( text = _startlist( block, "ol", "#\\.", "([-+*•]|[0-9]+\\.|[0-9]+\\)|#\\))") ) { + return text; + } else if ( text = _startlist( block, "ol", "#\\)", "([-+*•]|[0-9]+\\.|#\\.|[0-9]+\\))") ) { + return text; # Split paragraphs } else if ( match( block, /(^|\n)[[:space:]]*(\n|$)/) ) { @@ -539,7 +737,7 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib # Horizontal rule } else if ( match( block, /(^|\n) ? ? ?((\* *){3,}|(- *){3,}|(_ *){3,})($|\n)/) ) { len = RLENGTH; st = RSTART; - return _block(substr(block, 1, st - 1)) "
\n" _block(substr(block, st + len)); + return _block(substr(block, 1, st - 1)) "
\n" _block(substr(block, st + len)); # Plain paragraph } else { @@ -547,52 +745,78 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib } } -function _list( block, last, LOCAL, p) { - if ( ! length(block) ) return ""; - gsub(/^([-+*]|[0-9]+\.|#\.)( ? ? ?|\t)/, "", block) +function _startlist(block, type, mark, exclude, LOCAL, st, len, list, indent, text) { + if (match( block, "(^|\n) ? ? ?" mark "[ \t][^\n]+(\n|$)" \ + "(([ \t]*\n)* ? ? ?" mark "[ \t][^\n]+(\n|$)" \ + "|([ \t]*\n)*( ? ? ?\t| +)[^\n]+(\n|$)" \ + "|[^\n \t][^\n]+(\n|$))*" ) ) { + st = RSTART; len = RLENGTH; list = substr( block, st, len); + + sub("^\n", "", list); match(list, "^ ? ? ?"); indent = RLENGTH; + gsub( "(^|\n) {0," indent "}", "\n", list); sub("^\n", "", list); + + text = substr(block, 1, st - 1); block = substr(block, st + len); + if (match(text, /\n[[:space:]]*\n/)) return 0; + if (match(text, "(^|\n) ? ? ?" exclude "[ \t][^\n]+")) return 0; + if (match( list, "\n" exclude "[ \t]" )) { + block = substr(list, RSTART + 1) block; + list = substr(list, 1, RSTART); + } - # slice next list item from input - if ( match( block, /\n([-+*]|[0-9]+\.|#\.)[ \t]+[^\n]+/) ) { - p = substr( block, 1, RSTART); - block = substr( block, RSTART + 1); - } else { - p = block; block = ""; - } - sub( /\n +([-+*]|[0-9]+\.|#\.)/, "\n&", p ); + return _block( text ) "<" type ">\n" _list( list, mark ) "\n" _block( block ); + } else return 0; +} - # if this should be a paragraph item - # either previous item (last) or current item (p) contains blank lines - if (match(last, /\n[[:space:]]*\n/) || match(p, /\n[[:space:]]*\n/) ) { - last = p; p = _block(p); - } else { - last = p; p = _block(p); - sub( /^

/, "", p ); - sub( /<\/p>\n/, "", p ); +function _list (block, mark, p, LOCAL, len, st, text, indent, task) { + if ( match(block, "^([ \t]*\n)*$")) return; + + match(block, "^" mark "[ \t]"); indent = RLENGTH; + sub("^" mark "[ \t]", "", block); + + if (match(block, /\n[ \t]*\n/)) p = 1; + + match( block, "\n" mark "[ \t][^\n]+(\n|$)" ); + st = (RLENGTH == -1) ? length(block) + 1 : RSTART; + text = substr(block, 1, st); block = substr(block, st + 1); + + gsub("\n {0," indent "}", "\n", text); + + task = match( text, /^\[ \]/ ) ? "

  • " : \ + match( text, /^\[-\]/ ) ? "
  • " : \ + match( text, /^\[\/\]/ ) ? "
  • " : \ + match( text, /^\[\?\]/ ) ? "
  • " : \ + match( text, /^\[[xX]\]/) ? "
  • " : "
  • "; + sub(/^\[[-? \/xX]\]/, "", text); + + text = _nblock( text ); + if ( ! p && match( text, "^

    (]|\n$" )) + gsub( "(^

    |

    \n$)", "", text); + + return task text "
  • \n" _list(block, mark, p); +} + +function _dlist (block, LOCAL, len, st, text, indent, p) { + if (match( block, "^([ \t]*\n)*[^:\n \t][^\n]+\n" )) { + len = RLENGTH; text = substr(block, 1, len); + gsub( "(^\n*|\n*$)", "", text ); + return "
    " inline( text ) "
    \n" _dlist( substr(block, len + 1) ); + } else if (match( block, "^([ \t]*\n)* ? ? ?:[ \t][^\n]+(\n|$)" \ + "([^:\n \t][^\n]+(\n|$)" \ + "|( ? ? ?\t| +)[^\n]+(\n|$)" \ + "|([ \t]*\n)+( ? ? ?\t| +)[^\n]+(\n|$))*" \ + )) { + len = RLENGTH; text = substr(block, 1, len); + sub( "^([ \t]*\n)*", "", text); + match(text, "^ ? ? ?:(\t| +)"); indent = RLENGTH; + sub( "^ ? ? ?:(\t| +)", "", text); + gsub( "(^|\n) {0," indent "}", "\n", text ); + + text = _nblock(text); + if (match( text, "^

    (]|\n$" )) + gsub( "(^

    |

    \n$)", "", text); + + return "
    " text "
    \n" _dlist( substr(block, len + 1) ); } - sub( /\n$/, "", p ); - - # Task List (pandoc, custom) - if ( p ~ /^\[ \].*/ ) { return "
  • " \ - substr(p, 4) "
  • \n" _list( block, last ); - } else if ( p ~ /^\[-\].*/ ) { return "
  • " \ - substr(p, 4) "
  • \n" _list( block, last ); - } else if ( p ~ /^\[\?\].*/ ) { return "
  • " \ - substr(p, 4) "
  • \n" _list( block, last ); - } else if ( p ~ /^\[\/\].*/ ) { return "
  • " \ - substr(p, 4) "
  • \n" _list( block, last ); - } else if ( p ~ /^\[[xX]\].*/ ) { return "
  • " \ - substr(p, 4) "
  • \n" _list( block, last ); - } else if ( p ~ /^

    \[ \].*/ ) { return "

  • " \ - substr(p, 7) "

  • \n" _list( block, last ); - } else if ( p ~ /^

    \[-\].*/ ) { return "

  • " \ - substr(p, 7) "

  • \n" _list( block, last ); - } else if ( p ~ /^

    \[\?\].*/ ) { return "

  • " \ - substr(p, 7) "

  • \n" _list( block, last ); - } else if ( p ~ /^

    \[\/\].*/ ) { return "

  • " \ - substr(p, 7) "

  • \n" _list( block, last ); - } else if ( p ~ /^

    \[[xX]\].*/ ) { return "

  • " \ - substr(p, 7) "

  • \n" _list( block, last ); - } else { return "
  • " p "
  • \n" _list( block, last ); } } BEGIN { @@ -600,6 +824,21 @@ BEGIN { file = ""; rl_href[""] = ""; rl_title[""] = ""; if (ENVIRON["MD_HTML"] == "true") { AllowHTML = "true"; } HL[1] = 0; HL[2] = 0; HL[3] = 0; HL[4] = 0; HL[5] = 0; HL[6] = 0; + # hls = "0 0 0 0 0 0"; + + # Universal Patterns + nu = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\_]|_[[:alnum:]])*" # not underline (except when escaped) + na = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\\\*])*" # not asterisk (except when escaped) + ieu = "_([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])_" # inner (underline) + isu = "__([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])__" # inner (underline) + iea = "\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*" # inner (asterisk) + isa = "\\*\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*\\*" # inner (asterisk) + + lix="\\[(\\\\[^\n]|[^]\n\\\\[])*\\]" # link text + lid="(<(\\\\[^\n]|[^\n<>\\\\])*>|(\\\\.|[^()\"'\\\\])+|([^<\n\t ()\\\\]|\\\\[^\n])(\\\\[\n]|[^\n\t \\(\\)\\\\])*)" # link dest + lit="(\"(\\\\.|[^\"\\\\])*\"|'(\\\\.|[^'\\\\])*'|\\((\\\\.|[^\\(\\)\\\\])*\\))" # link text + # link text with image def + lii="\\[(\\\\[^\n]|[^]\n\\\\[])*(!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\))?(\\\\[^\n]|[^]\n\\\\[])*\\]" # Buffering of full file ist necessary, e.g. to find reference links while (getline) { file = file $0 "\n"; } @@ -623,5 +862,5 @@ BEGIN { # for (n in rl_href) { debug(n " | " rl_href[n] " | " rl_title[n] ); } # Run Block Processing -> The Actual Markdown! - printf "%s", _block( file ); + printf "%s", _nblock( file ); } diff --git a/cgilite/session.sh b/cgilite/session.sh index 1f4699e..c3a44e8 100755 --- a/cgilite/session.sh +++ b/cgilite/session.sh @@ -1,9 +1,23 @@ #!/bin/sh +# Copyright 2018 - 2022 Paul Hänsch +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + [ -n "$include_session" ] && return 0 include_session="$0" -_DATE="$(date +%s)" +export _DATE="$(date +%s)" SESSION_TIMEOUT="${SESSION_TIMEOUT:-7200}" if ! which uuencode >/dev/null; then diff --git a/cgilite/storage.sh b/cgilite/storage.sh index 22e6acc..17ea0d0 100755 --- a/cgilite/storage.sh +++ b/cgilite/storage.sh @@ -1,21 +1,18 @@ #!/bin/sh -# Copyright 2018, 2019, 2021 Paul Hänsch -# -# This is a file format helper, part of CGIlite. +# Copyright 2018 - 2021 Paul Hänsch # -# CGIlite is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. # -# CGIlite is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with CGIlite. If not, see . +# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. [ -n "$include_storage" ] && return 0 include_storage="$0" diff --git a/cgilite/users.sh b/cgilite/users.sh index 6a6833e..32299ff 100755 --- a/cgilite/users.sh +++ b/cgilite/users.sh @@ -1,10 +1,24 @@ #!/bin/sh +# Copyright 2021 - 2024 Paul Hänsch +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + [ -n "$include_users" ] && return 0 include_users="$0" -. "${_EXEC}/cgilite/session.sh" -. "${_EXEC}/cgilite/storage.sh" +. "${_EXEC:-.}/cgilite/session.sh" +. "${_EXEC:-.}/cgilite/storage.sh" SENDMAIL=${SENDMAIL-sendmail} @@ -15,9 +29,8 @@ USER_ACCOUNTPAGE="${USER_ACCOUNTPAGE}" USER_ACCOUNTEXPIRE="${USER_ACCOUNTEXPIRE:-$((86400 * 730))}" USER_CONFIRMEXPIRE="${USER_CONFIRMEXPIRE:-86400}" -MAILFROM="${MAILDOMAIN-noreply@${HTTP_HOST%:*}}" - HTTP_HOST="$(HEADER Host)" +MAILFROM="noreply@${HTTP_HOST%:*}" [ "$HTTPS" ] && SCHEMA=https || SCHEMA=http @@ -36,6 +49,10 @@ LOCAL_USER='local \ USER_EXPIRE USER_DEVICES USER_FUTUREUSE ' +# == TRANSLATIONS == +# override all functions marked with "TRANSLATION" +# sed -n '/TRANSLATION$/,/^}/p;'