From c16fa9412ca9760759fa0357278c3bf8e065959d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Paul=20H=C3=A4nsch?= Date: Mon, 15 Apr 2024 13:05:26 +0200 Subject: [PATCH 1/1] Squashed 'cgilite/' changes from 970afda..397847d 397847d offer invitation links if email is not required 9c46b3c Bugfix: use HTML() escapes in most links instead of double escaping via URL() a48f202 bugfix: accidental regex range 6c55826 allow dashes in metadata names 58aab92 awk port of some cgilite functions d7b1281 bugfix: incorrect call of `DB2 "" new` 426dac5 fix auto detection of sender address 0591b08 Bugfix: faulty email address check e7fcaf2 json export function e1c0b4b more consistent db2/json structure and jpath selector 0a8a851 get json values via jpath b8a4246 read json data into recursive DB2 structure 6ab6900 more reliable include path 8f729b0 db23 - simple in-memory key-value DB api 51f6906 updated copyright notices 04871f7 bugfix: faulty validation in user_update made pw change impossible 3a4544b Fix broken range requests when running in dash bfef1a0 allow autocomplete in login form 0f62500 avoid overflow of long links 9e5394e typo a004238 do not require class=search for input of type=search fb015c8 allow link/image src pattern with whitespace, avoid confusion with comment 1e12e80 force ALT-text for images 41642aa link and image syntax allowing whitespace URLs, repace use of non-posix gensub() 31cfd89 change order of precedence (HTML binds more than link brackets) 880ed14 css: avoid search button overflow, always center h1 211f2ee export _DATE variable b846014 bugfix: recognize center and left aligned pipe tables 8928c6e independent macro extension 4c361b1 allow bullet symbol as list marker (for copy/paste from office documents) 4ee910d bugfix: do not accidentally start list blocks at second order 8e5ffff bugfix: do not consume multiple paragraphs in list start 3055b17 LICENSE CHANGE: CGIlite is now under ISC License! d4da2a5 bugfix: endless loop in _startlist because of lacking indention removal fc3451c discriminate between different list markers 8822843 unified list code 8e79399 allow block lists (again) cc08744 reduce use of non-posix gensub 3d2d975 simpler block nesting, fix: h2 after paragraph ca22f37 fix: block nesting in lists 1f4a5e2 fix: task list unsure fc47d5d improved lists 0d64190 style for definition lists, adjusted list padding 0c5f738 style for line-block 887a68a tidy up variable declarations, remove additional line break after headline ae55f6f introducing definition lists! 1cf0dab markdown: avoid adding newlines to blockquote, css: style for blockquote 854547d headline atttributes b4a3f6a heading text in id 33570b0 block level prefix to avoid repeating headline IDs 516bc32 nesting of
in block elements 925f042 unified headline function 9b2f590 bugfix do not escape # character in link references d45722a Omit link IDs in nested headings, to prevent ambiguous IDs e6442fb EXPERIMENTAL:
wrapping for Headlines d0b1c70 bugfix: HEX_DECODE for prefixes looking like shell patterns 8ce6dce bugfix: do not accidentally strip white spaces from code spans c4ba9cc Include backtick (`) in URL escape handling d61539c bugfix: prevent endless loop in HEX_DECODE, copy non-hex-digits unchanged 8fd595c translatable user dialogs 30435ff list padding, to prevent bullets floating into elements to the left 628929d Security: put backslash-escaped characters through HTML escaping 2ea88f7 Compatibility: Grid Tables section now compatible with posix/busybox awk 0f8f663 performance: avoid process forking d45e2c8 wiki style links: portability regex fix fa6be3f Allow headerless grid tables f9f5356 allow empty alt text in images 019a9ea Extension: Arrows 01dadd7 enable block element Macros 697a1bb extesion: introduce
-wrapped images as block elements a8a5ea5 bugfix: enable image links ffe17ca W3C Validator compliance: introduce separate function for escaping URL references, omit superfluous trailing slashes (e.g. in
) git-subtree-dir: cgilite git-subtree-split: 397847dd01a44b7a5a26b6c6d16a61a42079a542 --- cgilite.awk | 158 +++++++++++++ cgilite.sh | 34 +-- common.css | 28 ++- db23.sh | 114 +++++++++ file.sh | 72 +++--- html-sh.sed | 14 ++ json.sh | 360 +++++++++++++++++++++++++++++ markdown.awk | 641 +++++++++++++++++++++++++++++++++++---------------- session.sh | 16 +- storage.sh | 25 +- users.sh | 236 ++++++++++++------- 11 files changed, 1349 insertions(+), 349 deletions(-) create mode 100644 cgilite.awk create mode 100755 db23.sh create mode 100755 json.sh diff --git a/cgilite.awk b/cgilite.awk new file mode 100644 index 0000000..f16ed6a --- /dev/null +++ b/cgilite.awk @@ -0,0 +1,158 @@ +#!/bin/env awk -f + +function PATH( str, seg, out ) { + while ( str ) { + seg = str; + sub( /\/.*$/, "", seg); + sub( /^[^\/]*\//, "", str); + + if ( seg == ".." ) sub(/\/[^\/]*\/?$/, "", out); + else if ( seg ~ /^\.?$/) sub(/\/?$/, "/", out); + else sub(/\/?$/, "/" seg, out); + + if (seg == str) break; + } + if (!(str && out)) sub(/\/?$/,"/" out); + return out; +} + +function HEX_DECODE( pfx, inp, out, n, k ) { + k = length(pfx); + gsub(/[].*+?^${}()|\\[]/,"\\\\&",pfx); + while ( inp ) if ( n = match(inp, pfx "[0-9a-fA-F][0-9a-fA-F]") ) { + out = out substr(inp, 1, n - 1); + inp = substr(inp, n + k); + if (inp ~ /^[0-9]/) n = 16 * substr(inp, 1, 1); + else if (inp ~ /^[aA]/) n = 160; + else if (inp ~ /^[bB]/) n = 176; + else if (inp ~ /^[cC]/) n = 192; + else if (inp ~ /^[dD]/) n = 208; + else if (inp ~ /^[eE]/) n = 224; + else if (inp ~ /^[fF]/) n = 240; + if (inp ~ /^.[0-9]/) n += substr(inp, 2, 1); + else if (inp ~ /^.[aA]/) n += 10; + else if (inp ~ /^.[bB]/) n += 11; + else if (inp ~ /^.[cC]/) n += 12; + else if (inp ~ /^.[dD]/) n += 13; + else if (inp ~ /^.[eE]/) n += 14; + else if (inp ~ /^.[fF]/) n += 15; + out = out sprintf("%c", n); + inp = substr(inp, 3); + } else { + out = out inp; + break; + } + return out; +} + +function HTML( text ) { + gsub( /&/, "\\&", text ); + gsub( //, "\\>", text ); + gsub( /"/, "\\"", text ); + gsub( /'/, "\\'", text ); + gsub( /\[/, "\\[", text ); + gsub( /\]/, "\\]", text ); + gsub( /\r/, "\\ ", text ); + gsub( /\n/, "\\ ", text ); + gsub( /\\/, "\\\", text ); + return text; +} + +function URL( text ) { + gsub( /&/, "%26", text ); + gsub( /"/, "%22", text ); + gsub( /'/, "%27", text ); + gsub( /`/, "%60", text ); + gsub( /\?/, "%3F", text ); + gsub( /#/, "%23", text ); + gsub( /\[/, "%5B", text ); + gsub( /\]/, "%5D", text ); + gsub( / /, "%20", text ); + gsub( /\t/, "%09", text ); + gsub( /\r/, "%0D", text ); + gsub( /\n/, "%0A", text ); + gsub( /%/, "%25", text ); + gsub( /\\/, "%5C", text ); + return text; +} + +function _cgilite_urldecode( str, arr, spl, form, k, n, key) { + if (! spl) spl="&" + split(str, form, spl); + for ( k in form ) { + key = form[k]; sub(/=.*$/, "", key); + sub(/^[^=]*=/, "", form[k]); + if ( key in arr ) { + n = 1; while ( (key, n) in arr ) n++; + arr[key,n] = HEX_DECODE( "%", form[k]); + } else { + arr[key] = HEX_DECODE( "%", form[k]); + } + } +} + +function _cgilite_request( key, val) { + # Read request from client connection + + # Read Headers + getline; REQUEST_METHOD = $1; REQUEST_URI = $2; SERVER_PROTOCOL = $3; + while ( getline ) { + if ($0 ~ /^\r?$/) break; + else if ($0 ~ /^[a-zA-Z][0-9a-zA-Z_-]+: .*/) { + key = toupper($0); + sub(/:.*$/, "", key); + gsub(/-/, "_", key); + _HEADER[key] = $0; + sub(/^[^:]:[\t ]*/, "", _HEADER[key]); + sub(/[\t ]*\r?$/, "", _HEADER[key]); + } + } + CONTENT_LENGTH = _HEADER["CONTENT_LENGTH"]; + CONTENT_TYPE = _HEADER["CONTENT_TYPE"]; + + PATH_INFO = REQUEST_URI; gsub(/\?.*$/, "", PATH_INFO) + PATH_INFO = PATH( HEX_DECODE( "%", PATH_INFO ) ); + QUERY_STRING = REQUEST_URI; + if ( !gsub(/^[^?]+\?/, "", QUERY_STRING) ) QUERY_STRING = ""; + + # Set up _GET[]-Array + _cgilite_urldecode(QUERY_STRING, _GET); + + if ( _HEADER["CONTENT_TYPE"] == "application/x-www-form-urlencoded" \ + && _HEADER["CONTENT_LENGTH"] ) { + # Set up _POST[]-Array + + val = ""; key = "head -c " _HEADER["CONTENT_LENGTH"]; + while (key |getline) val = val $0; close(key); + _cgilite_urldecode(val, _POST); + } + + if ( _HEADER["COOKIE"] ) { + # Set up _COOKIE[]-Array + _cgilite_urldecode(_HEADER["COOKIE"], _COOKIE, "; ?"); + } + + if ( _HEADER["REFERER"] ) { + key = HEADER["REFERER"]; + if (! sub(/^[^\?]+?/, "", key)) key = "" + _cgilite_urldecode(key, _REF); + } + +} + +function _cgilite_headers() { + # Import request data from webserver environment variables +} + +BEGIN { + REQUEST_METHOD=""; REQUEST_URI=""; SERVER_PROTOCOL=""; + PATH_INFO=""; QUERY_STRING=""; CONTENT_LENGTH=""; CONTENT_TYPE=""; + split("", _GET); split("", _POST); split("", _REF); + split("", _HEADER); split("", _COOKIE); + + if ( ENVIRON["REQUEST_METHOD"] ) + _cgilite_headers(); + else + _cgilite_request(); +} diff --git a/cgilite.sh b/cgilite.sh index b51ee8e..b2467c3 100755 --- a/cgilite.sh +++ b/cgilite.sh @@ -1,22 +1,21 @@ #!/bin/sh -# Copyright 2017 - 2021 Paul Hänsch -# # This is CGIlite. # A collection of posix shell functions for writing CGI scripts. + +# Copyright 2017 - 2023 Paul Hänsch # -# CGIlite is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# CGIlite is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. # -# You should have received a copy of the GNU Affero General Public License -# along with CGIlite. If not, see . +# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. [ -n "$include_cgilite" ] && return 0 # guard set after webserver part @@ -85,8 +84,14 @@ HEX_DECODE(){ # will be copied to the output literally while [ "$in" ]; do + [ "$pfx" ] || case $in in + [0-9a-fA-F][0-9a-fA-F]*):;; + ?*) out="${out}${in%%"${in#?}"}" + in="${in#?}"; continue;; + esac + case $in in - "$pfx"[0-9a-fA-F][0-9a-fA-F]*) in="${in#${pfx}}";; + "$pfx"[0-9a-fA-F][0-9a-fA-F]*) in="${in#"${pfx}"}";; \\*) in="${in#?}"; out="${out}\\\\"; continue;; %*) in="${in#?}"; out="${out}%%"; continue;; *) att="${in%%"${pfx}"*}"; att="${att%%%*}"; att="${att%%\\*}" @@ -307,6 +312,7 @@ URL(){ \&*) out="${out}%26"; str="${str#?}";; \"*) out="${out}%22"; str="${str#?}";; \'*) out="${out}%27"; str="${str#?}";; + \`*) out="${out}%60"; str="${str#?}";; \?*) out="${out}%3F"; str="${str#?}";; \#*) out="${out}%23"; str="${str#?}";; \[*) out="${out}%5B"; str="${str#?}";; diff --git a/common.css b/common.css index 65c28f3..30c3942 100644 --- a/common.css +++ b/common.css @@ -29,6 +29,7 @@ a { font-style: italic; text-decoration: underline; color: #068; + word-break: break-word; } a.button { font-style: inherit; @@ -47,8 +48,19 @@ b, strong { font-weight: bolder; } tt, code, var, samp, kbd { font-family: monospace; } kbd { font-style: italic; } -ul, ol { margin-left: 1.125em; } +blockquote { + background-color: #EEE; + margin: .5em 0; + padding: 1em 2em; + white-space: pre-line; +} + +ul, ol { padding-left: 1.5em; } dl dt { font-weight: bolder; } +dl dd { + margin: 0 2em; + background-color: #EEE; +} table th { font-weight: bold; } li p + ul, li p + ol { @@ -68,7 +80,10 @@ h4, h5, h6, form legend { margin-bottom: .25em; } -h1 { font-size: 1.5em; } +h1 { + text-align: center; + font-size: 1.5em; +} h2 { font-size: 1.125em; } select, input, button, textarea, a.button { @@ -101,6 +116,7 @@ input + label { margin-left: .375em; } +input[type="search"] + button.search, input.search + button.search { width: 2.5em; color: transparent; @@ -109,7 +125,9 @@ input.search + button.search { border-left: none; border-radius: 0 2pt 2pt 0; white-space: nowrap; + overflow: hidden; } +input[type="search"] + button.search:before, input.search + button.search:before { content: '\1f50d'; color: #000; @@ -119,8 +137,6 @@ input.search + button.search:before { @media print { @page { margin: 20mm; } - h1 { text-align: center; } - h1, h2, h3, h4, h5, h6, form legend { page-break-inside: avoid; page-break-after: avoid; @@ -168,4 +184,8 @@ input[type=radio].tab ~ *.tab { box-shadow: .125em .125em .125em #888; } +/* Markdown line-block */ +.line-block { white-space: pre-wrap; } +.line-block br { display: none; } + /* ======= End Common Styles ======= */ diff --git a/db23.sh b/db23.sh new file mode 100755 index 0000000..e8a0d64 --- /dev/null +++ b/db23.sh @@ -0,0 +1,114 @@ +#!/bin/sh + +# Copyright 2023, 2024 Paul Hänsch +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +[ -n "$include_db23" ] && return 0 +include_db23="$0" + +. "${_EXEC:-.}/cgilite/storage.sh" + +DB2() { + local call data file key val seq + data="${BR}${1}${BR}" call="$2" + shift 2 + + case $call in + new|discard) + printf '' + ;; + open|load) file="$1" + cat "$file" || return 1 + ;; + check|contains) key="$(STRING "$1")" val='' + val="${data##*"${BR}${key}" }" val="${val%%"${BR}"*}" + [ "$val" = '' ] && return 1 + ;; + count) key="$(STRING "$1")" val='' seq=0 + val="${data##*"${BR}${key}" }" val="${val%%"${BR}"*}" + [ "$val" = '' ] || val="${val} " + while [ "$val" != '' ]; do + seq=$((seq + 1)) val="${val#* }" + done + printf "%i\n" "$seq" + [ $seq = 0 ] && return 1 + ;; + get) key="$(STRING "$1")" seq="${2:-1}" + val="${data##*"${BR}${key}" }" val="${val%%"${BR}"*}" + [ "$val" = '' ] && return 1 || val="${val} " + while [ $seq -gt 1 ]; do + seq=$((seq - 1)) val="${val#* }" + done + [ "$val" = '' ] && return 1 + UNSTRING "${val%% *}" + ;; + iterate|raw) key="$(STRING "$1")" + val="${data##*"${BR}${key}" }" val="${val%%"${BR}"*}" + [ "$val" = '' ] && return 1 + printf '%s\n' $val + ;; + delete|remove) key="$(STRING "$1")" + val="${data#*"${BR}${key}" *"${BR}"}" + key="${data%"${BR}${key}" *"${BR}"*}" + [ "${key}${BR}${val}" = "${data}" ] && return 1 + printf '%s' "${key#"${BR}"}${BR}${val%"${BR}"}" + ;; + set|store) key="$(STRING "$1")" val="" + shift 1 + val="$(for v in "$@"; do STRING "$v"; printf \\t; done)" + if [ "${data#*"${BR}${key}" *}" != "$data" ]; then + data="${data%"${BR}${key}" *"${BR}"*}${BR}${key} ${val% }${BR}${data#*"${BR}${key}" *"${BR}"}" + data="${data#"${BR}"}" data="${data%"${BR}"}" + else + data="${data#"${BR}"}${key} ${val% }${BR}" + data="${data#"${BR}"}" + fi + printf %s\\n "${data}" + ;; + append) key="$(STRING "$1")" val="" + val="${data##*"${BR}${key}" }" val="${val%%"${BR}"*}" + if [ "$val" = '' ]; then + printf %s\\n "${data}" + return 1 + else + shift 1 + val="${val}$(for v in "$@"; do printf \\t; STRING "$v"; done)" + data="${data%"${BR}${key}" *"${BR}"*}${BR}${key} ${val% }${BR}${data#*"${BR}${key}" *"${BR}"}" + data="${data#"${BR}"}" data="${data%"${BR}"}" + printf %s\\n "${data}" + fi + ;; + flush|save|write) file="$1" + data="${data#"${BR}"}" data="${data%"${BR}"}" + printf '%s\n' "$data" >"$file" || return 1 + ;; + esac + return 0 +} + +DB3() { + # wrapper function that allows easyer use of DB2 + # by always keeping file data in $db3_data + + case "$1" in + new|discard|open|load|delete|remove|set|store|append) + db3_data="$(DB2 "$db3_data" "$@")" + return "$?" + ;; + get|count|check|contains|iterate|raw|flush|save|write) + DB2 "$db3_data" "$@" + return "$?" + ;; + esac +} diff --git a/file.sh b/file.sh index 0d1f4ea..c66b17d 100755 --- a/file.sh +++ b/file.sh @@ -1,21 +1,18 @@ #!/bin/sh -# Copyright 2016 - 2019 Paul Hänsch -# -# This file is part of cgilite. +# Copyright 2016 - 2024 Paul Hänsch # -# cgilite is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. # -# cgilite is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with cgilite. If not, see . +# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. [ -n "$include_fileserve" ] && return 0 include_fileserve="$0" @@ -48,24 +45,32 @@ file_type(){ } FILE(){ - local file file_size file_date http_date cachedate range mime - file="$1" mime="$2" + local file="$1" mime="$2" + local file_size file_date http_date cachedate range if ! [ -f "$file" ]; then printf 'Content-Length: 0\r\nStatus: 404 Not Found\r\n\r\n' - exit 0 + return 0 elif ! [ -r "$file" ]; then printf 'Content-Length: 0\r\nStatus: 403 Forbidden\r\n\r\n' - exit 0 + return 0 fi - file_size="$(stat -Lc %s "$file")" - file_date="$(stat -Lc %Y "$file")" + read file_size file_date <<-EOF + $(stat -Lc "%s %Y" "$file") + EOF http_date="$(date -ud "@$file_date" +"%a, %d %b %Y %T GMT")" + + [ ! "$HTTP_IF_MODIFIED_SINCE" -a "$cgilite_headers" ] \ + && HTTP_IF_MODIFIED_SINCE="$(HEADER If-Modified-Since)" + [ ! "$HTTP_RANGE" -a "$cgilite_headers" ] \ + && HTTP_RANGE="$(HEADER Range)" + cachedate="$( # Parse the allowable date formats from Section 3.3.1 of # https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html - HEADER If-Modified-Since \ + # HEADER If-Modified-Since \ + printf %s "$HTTP_IF_MODIFIED_SINCE" \ | sed -E 's;^[^ ]+, ([0-9]{2}) (...) ([0-9]{4}) (..:..:..) GMT$;\3-\2-\1 \4;; s;^[^ ]+, ([0-9]{2})-(...)-([789][0-9]) (..:..:..) GMT$;19\3-\2-\1 \4;; s;^[^ ]+, ([0-9]{2})-(...)-([0-6][0-9]) (..:..:..) GMT$;20\3-\2-\1 \4;; @@ -76,14 +81,25 @@ FILE(){ | xargs -r0 date +%s -ud 2>&- )" - range="$(HEADER Range |sed -nE 's;^bytes=([0-9]+-[0-9]*|-[0-9]+)$;\1;p;q;')" + range="${HTTP_RANGE#bytes=}" case "$range" in - *-) range="${range}$((file_size - 1))";; - -*) [ ${range#-} -le $file_size ] \ - && range="$((file_size - ${range#-}))-$((file_size - 1))" \ - || range="0-$((file_size - 1))";; - *-*) [ ${range#*-} -ge $file_size ] \ - && range="${range%-*}-$((file_size - 1))";; + *[!0-9]*-*|*-*[!0-9]*) + range="" + ;; + *-) + range="${range}$((file_size - 1))" + ;; + -*) + [ ${range#-} -le $file_size ] \ + && range="$((file_size - ${range#-}))-$((file_size - 1))" \ + || range="0-$((file_size - 1))" + ;; + *-*) + [ ${range#*-} -ge $file_size ] \ + && range="${range%-*}-$((file_size - 1))" + ;; + *) range="" + ;; esac if [ "$file_date" -lt "$cachedate" ] 2>&-; then diff --git a/html-sh.sed b/html-sh.sed index 8d7b61c..1a0f2b4 100755 --- a/html-sh.sed +++ b/html-sh.sed @@ -1,5 +1,19 @@ #!/bin/sed -nEf +# Copyright 2018 - 2019 Paul Hänsch +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + :Escapes s,\\\\,\\,g; s,\\&,\&,g; s,\\<,\<,g; s,\\>,\>,g; diff --git a/json.sh b/json.sh new file mode 100755 index 0000000..12afdc4 --- /dev/null +++ b/json.sh @@ -0,0 +1,360 @@ +#!/bin/sh + +[ -n "$include_json" ] && return 0 +include_json="$0" + +. "${_EXEC:-.}/cgilite/db23.sh" + +# debug(){ [ $# -gt 0 ] && printf '%s\n' "$@" >&2 || tee -a /dev/stderr; } + +json_except() { + printf '%s\n' "$@" >&2; + printf 'Exc: %s\n' "$json_document" >&2 +} + +json_space() { + while true; do case "$json_document" in + [" ${BR}${CR} "]*) json_document="${json_document#?}";; + *) break ;; + esac; done +} + +json_string() { + local string json_document="$json_document" end=0 + + json_space + case $json_document in + \"*) json_document="${json_document#?}" + ;; + *) json_except "Expected string specifyer starting with (\")" + return 1 + ;; + esac + while [ "$json_document" ]; do case $json_document in + \\?*) + string="${string}${json_document%"${json_document#??}"}" + json_document="${json_document#??}" + ;; + \"*) + json_document="${json_document#?}" + end=1 + break + ;; + *) + string="${string}${json_document%"${json_document#?}"}" + json_document="${json_document#?}" + ;; + esac; done + + if [ $end -eq 0 ]; then + json_except "Document ended mid-string" + return 1 + fi + + printf "%s %s\n" "$(STRING "$string")" "$json_document" +} + +json_key() { + local key json_document="$json_document" + + json_space + case $json_document in + \"*) + key="$(json_string)" || return 1 + json_document="${key#* }" + key="${key%% *}" + ;; + *) json_except "Expected key specifyer starting with '\"'" + return 1 + ;; + esac + json_space + case $json_document in + :*) json_document="${json_document#?}" + ;; + *) json_except "Expected value separator \":\"" + return 1 + ;; + esac + + printf '%s %s\n' "$key" "$json_document" +} + +json_number() { + local number json_document="$json_document" + + json_space + number="${json_document%%[" ${BR}${CR} ,}]"]*}" + json_document="${json_document#"$number"}" + if ! number="$(printf %f "$number")"; then + json_except "Invalid number format" + return 1 + fi + + printf '%s %s\n' "${number%.000000}" "$json_document" +} + +json_array() { + local struct="$(DB2 "" new)" value json_document="$json_document" + + json_space + case $json_document in + "["*) json_document="${json_document#?}" + ;; + *) json_except "Expected array starting with \"[\"" + return 1 + ;; + esac + + json_space + case $json_document in + "]"*) + printf "%s %s\n" "" "${json_document#?}" + return 0 + ;; + esac + + while :; do + json_space + + value="$(json_value)" || return 1 + json_document="${value#* }" + value="$(UNSTRING "${value%% *}")" + + struct="$(DB2 "$struct" append "@" "$value")" \ + || struct="$(DB2 "$struct" set "@" "$value")" + + json_space + case $json_document in + ,*) json_document="${json_document#?}" + ;; + "]"*) json_document="${json_document#?}" + break + ;; + *) json_except "Unexpected character mid-array" + return 1 + ;; + esac + done + + printf "%s %s\n" "$(STRING "$struct")" "$json_document" +} + +json_object() { + local struct="$(DB2 "" new)" key value json_document="$json_document" + + json_space + case $json_document in + "{"*) json_document="${json_document#?}" + ;; + *) json_except "Expected object starting with \"{\"" + return 1 + ;; + esac + + json_space + case $json_document in + "}"*) + printf "%s %s\n" "" "${json_document#?}" + return 0 + ;; + esac + + while :; do + json_space + + key="$(json_key)" || return 1 + json_document="${key#* }" + key="$(UNSTRING "${key%% *}")" + + value="$(json_value)" || return 1 + json_document="${value#* }" + value="$(UNSTRING "${value%% *}")" + + struct="$(DB2 "$struct" set "$key" "$value")" + + json_space + case $json_document in + ,*) json_document="${json_document#?}" + ;; + "}"*) json_document="${json_document#?}" + break + ;; + *) json_except "Unexpected character mid-object" + return 1 + ;; + esac + done + + printf "%s %s\n" "$(STRING "$struct")" "$json_document" +} + +json_value() { + local value json_document="$json_document" + json_type="" + + json_space + case $json_document in + \"*) + value="$(json_string)" || return 1 + json_document="${value#* }" + value="str:${value%% *}" + json_type=string + ;; + [+-.0-9]*) + value="$(json_number)" || return 1 + json_document="${value#* }" + value="num:${value%% *}" + json_type=number + ;; + "{"*) + value="$(json_object)" || return 1 + json_document="${value#* }" + value="obj:${value%% *}" + json_type=object + ;; + "["*) + value="$(json_array)" || return 1 + json_document="${value#* }" + value="arr:${value%% *}" + json_type=array + ;; + null*) + json_document="${json_document#null}" + value="null" + json_type=null + ;; + true*) + json_document="${json_document#true}" + value="true" + json_type=boolean + ;; + false*) + json_document="${json_document#false}" + value="false" + json_type=boolean + ;; + esac + + printf "%s %s\n" "$value" "$json_document" +} + +json_load() { + local json_document="$1" json + + json_value |UNSTRING +} + +json_get() { + local json="$1" jpath="${2#.}" key idx + json_type='' + + case $json in + str:*) json_type="string";; + arr:*) json_type="array";; + obj:*) json_type="object";; + num:*) json_type="number";; + true|false) + json_type="boolean";; + null) json_type="null";; + esac + + case $jpath in + "") + printf %s\\n "${json#???:}" + return 0 + ;; + "["[0-9]*"]"*) + idx="${jpath%%"]"*}" idx="${idx#"["}" + jpath="${jpath#"["*"]"}" + ;; + "['"*"']"*) + key="${jpath%%"']"*}" key="${key#"['"}" + jpath="${jpath#"['"*"']"}" + ;; + "$"*) + jpath="${jpath#?}" + ;; + *) key="${jpath%%[".["]*}" + jpath="${jpath#"$key"}" + ;; + esac + + if [ "$key" -a "$json_type" = object ]; then + if ! json="$(DB2 "${json#obj:}" get "$key")"; then + debug "Key not found: \"$key\"" + return 1 + fi + elif [ "$idx" -a "$json_type" = array ]; then + if ! json="$(DB2 "${json#arr:}" get @ "$(( idx + 1 ))")"; then + debug "Array index not found: \"$idx\"" + return 1 + fi + elif [ "$key" ]; then + debug "Cannot select key (\"$key\") from value of type \"$json_type\"" + return 1 + elif [ "$idx" ]; then + debug "Cannot select index ($idx) from value of type \"$json_type\"" + return 1 + fi + json_get "$json" "$jpath" + return $? +} + +json_dump_string() { + local in="$1" out='' + while [ "$in" ]; do case $in in + \\*) out="${out}\\\\"; in="${in#\\}" ;; + "$BR"*) out="${out}\\n"; in="${in#${BR}}" ;; + "$CR"*) out="${out}\\r"; in="${in#${CR}}" ;; + " "*) out="${out}\\t"; in="${in# }" ;; + \"*) out="${out}\\\""; in="${in#\"}" ;; + *) out="${out}${in%%[\\${CR}${BR} \"]*}"; in="${in#"${in%%[\\${BR}${CR} \"]*}"}" ;; + esac; done + printf '"%s"' "${out}" +} + +json_dump_array() { + local json="$1" value out='' + + for value in $(DB2 "$json" iterate @); do + out="${out},$(json_dump "$(UNSTRING "$value")")" + done + printf '[%s]' "${out#,}" +} + +json_dump_object() { + local json="$1" key value out='' + + while read -r key value; do + out="${out},$(json_dump_string "$(UNSTRING "$key")"):$(json_dump "$(UNSTRING "$value")")" + done <<-EOF + ${json} + EOF + printf '{%s}' "${out#,}" +} + +json_dump() { + local json="$1" + + case $json in + str:*) + json_dump_string "${json#str:}" + ;; + arr:*) + json_dump_array "${json#arr:}" + ;; + obj:*) + json_dump_object "${json#obj:}" + ;; + num:*) + printf "${json#num:}" + ;; + true|false|null) + printf %s\\n "$json" + ;; + *) + json_dump_string "${json}" + ;; + esac +} diff --git a/markdown.awk b/markdown.awk index 44d4e0d..34879d2 100755 --- a/markdown.awk +++ b/markdown.awk @@ -5,6 +5,20 @@ # Meant to support all features of John Grubers basic Markdown # + a number of common extensions, mostly inspired by Pandoc Markdown +# Copyright 2021 - 2023 Paul Hänsch +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + # Supported Features / TODO: # ========================== # [x] done [ ] todo [-] not planned ? unsure @@ -38,18 +52,24 @@ # # Extensions - Block elements: # ---------------------------- +# - [x] Automatic
-wrapping (custom) # - ? Heading identifiers (php md, pandoc) +# - [x] Heading attributes (custom) +# - [ ]
terminates section # - [x] Automatic heading identifiers (custom) # - [x] Fenced code blocks (php md, pandoc) # - [x] Fenced code attributes +# - [x] Images (as block elements,
-wrapped) (custom) +# - [x] reference style block images # - [/] Tables # - ? Simple table (pandoc) # - ? Multiline table (pandoc) # - [x] Grid table (pandoc) -# - [x] Pipe table (php md pandoc) +# - [x] Headerless +# - [x] Pipe table (php md, pandoc) # - [x] Line blocks (pandoc) # - [x] Task lists (pandoc, custom) -# - [ ] Definition lists (php md, pandoc) +# - [x] Definition lists (php md, pandoc) # - [-] Numbered example lists (pandoc) # - [-] Metadata blocks (pandoc) # - [x] Metadata blocks (custom) @@ -62,7 +82,7 @@ # - [x] ^Superscript^ ~Subscript~ (pandoc) # - [-] Bracketed spans (pandoc) # - [-] Inline attributes (pandoc) -# - [x] Image attributes (custom, pandoc inspired, inline only) +# - [x] Image attributes (custom, pandoc inspired, not for reference style) # - [x] Wiki style links [[PageName]] / [[PageName|Link Text]] # - [-] TEX-Math (pandoc) # - ? Footnotes (php md) @@ -72,7 +92,7 @@ # - ? ... three-dot ellipsis (smartypants) # - [-] en-dash (smartypants) # - [ ] Automatic em-dash / en-dash -# - [ ] Automatic -> Arrows <- +# - [x] Automatic -> Arrows <- (custom) function debug(text) { printf "\n---\n%s\n---\n", text > "/dev/stderr"; } @@ -86,24 +106,32 @@ function HTML ( text ) { return text; } -function inline( line, LOCAL, len, code, href, guard ) { - nu = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\_]|_[[:alnum:]])*" # not underline (except when escaped) - na = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\\\*])*" # not asterisk (except when escaped) - ieu = "_([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])_" # inner (underline) - isu = "__([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])__" # inner (underline) - iea = "\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*" # inner (asterisk) - isa = "\\*\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*\\*" # inner (asterisk) +function URL ( text, sharp ) { + gsub( /&/, "%26", text ); + gsub( /"/, "%22", text ); + gsub( /'/, "%27", text ); + gsub( /`/, "%60", text ); + gsub( /\?/, "%3F", text ); + if (sharp) gsub( /#/, "%23", text ); + gsub( /\[/, "%5B", text ); + gsub( /\]/, "%5D", text ); + gsub( / /, "%20", text ); + gsub( / /, "%09", text ); + gsub( /\\/, "%5C", text ); + return text; +} +function inline( line, LOCAL, len, text, code, href, guard ) { if ( line ~ /^$/ ) { # Recursion End return ""; - # omit processing of escaped characters - } else if ( line ~ /^\\[]\\`\*_\{\}\(\)#\+-\.![]/) { - return substr(line, 2, 1) inline( substr(line, 3) ); + # omit processing of escaped characters + } else if ( line ~ /^\\./) { + return HTML(substr(line, 2, 1)) inline( substr(line, 3) ); # hard brakes } else if ( match(line, /^ \n/) ) { - return "
\n" inline( substr(line, RLENGTH + 1) ); + return "
\n" inline( substr(line, RLENGTH + 1) ); # ``code spans`` } else if ( match( line, /^`+/) ) { @@ -113,17 +141,22 @@ function inline( line, LOCAL, len, code, href, guard ) { code = substr( line, len + 1, match( substr(line, len + 1), guard ) - 1) len = 2 * length(guard) + length(code) # strip single surrounding white spaces - code = gensub( / (.*) /, "\\1", "1" , code) + gsub( /^ | $/, "", code) # escape HTML within code span gsub( /&/, "\\&", code ); gsub( //, "\\>", code ); return "" code "" inline( substr( line, len + 1 ) ) } + # Macros + } else if ( match( line, /^<<([^>]|>[^>])+>>/ ) ) { + len = RLENGTH; + return "" HTML( substr( line, 3, len - 4 ) ) "" inline(substr(line, len + 1)); + # Wiki style links - } else if ( match( line, /^\[\[([^\]\|]+)(\|([^\]]+))?\]\]/) ) { + } else if ( match( line, /^\[\[([^]|]+)(\|[^]]+)?\]\]/) ) { len = RLENGTH; - href = gensub(/^\[\[([^\]\|]+)(\|([^\]]+))?\]\]/, "\\1", 1, substr(line, 1, len) ); - text = gensub(/^\[\[([^\]\|]+)(\|([^\]]+))?\]\]/, "\\3", 1, substr(line, 1, len) ); + href = gensub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\1", 1, substr(line, 1, len) ); + text = gensub(/^\[\[([^]|]+)(\|([^]]+))?\]\]/, "\\3", 1, substr(line, 1, len) ); if ( ! text ) text = href; return "" HTML(text) "" inline( substr( line, len + 1) ); @@ -139,17 +172,28 @@ function inline( line, LOCAL, len, code, href, guard ) { href = HTML( substr( line, 2, len - 2) ); return "" href "" inline( substr( line, len + 1) ); + # Verbatim inline HTML + } else if ( AllowHTML && match( line, /^(|<\?([^\?]|\?[^>])*\?>|]*>|])*\]\]>|<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)/) ) { + len = RLENGTH; + return substr( line, 1, len) inline(substr(line, len + 1)); + # inline links - } else if ( match(line, /^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/) ) { + } else if ( match(line, "^" lii "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)") ) { len = RLENGTH; - text = gensub(/^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, "\\1", 1, substr(line, 1, len) ); - href = gensub(/^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, "\\2", 1, substr(line, 1, len) ); - title = gensub(/^\[([^]]+)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, "\\4", 1, substr(line, 1, len) ); - if ( title ) { - return "" inline( text ) "" inline( substr( line, len + 1) ); - } else { - return "" inline( text ) "" inline( substr( line, len + 1) ); - } + text = href = title = substr( line, 1, len); + sub("^\\[", "", text); sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)$", "", text); + sub("^" lii "\\([\n\t ]*", "", href); sub("([\n\t ]+" lit ")?[\n\t ]*\\)$", "", href); + sub("^" lii "\\([\n\t ]*" lid, "", title); sub("[\n\t ]*\\)$", "", title); sub("^[\n\t ]+", "", title); + + if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } + if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } + else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); } + else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); } + + gsub(/\\/, "", href); gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title); + + return "" \ + inline( text ) "" inline( substr( line, len + 1) ); # reference style links } else if ( match(line, /^\[([^]]+)\] ?\[([^]]*)\]/ ) ) { @@ -166,32 +210,46 @@ function inline( line, LOCAL, len, code, href, guard ) { } # inline images - } else if ( match(line, /^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/) ) { - len = RLENGTH; - text = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\1", "g", substr(line, 1, len) ); - href = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\2", "g", substr(line, 1, len) ); - title = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\4", "g", substr(line, 1, len) ); - attrib = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\6", "g", substr(line, 1, len) ); - if ( title && attrib ) { - return "\""" inline( substr( line, len + 1) ); - } else if ( title ) { - return "\""" inline( substr( line, len + 1) ); - } else if ( attrib ) { - return "\""" inline( substr( line, len + 1) ); - } else { - return "\""" inline( substr( line, len + 1) ); - } + } else if ( match(line, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?") ) { + len = RLENGTH; text = href = title = attrib = substr( line, 1, len); + + sub("^!\\[", "", text); + sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", text); + + sub("^!" lix "\\([\n\t ]*", "", href); + sub("([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", href); + + sub("^!" lix "\\([\n\t ]*" lid, "", title); + sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", title); + sub("^[\n\t ]+", "", title); + + sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib); + sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib); + + if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } + if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } + else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); } + else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); } + + gsub(/^[\t ]+$/, "", text); gsub(/\\/, "", href); + gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title); + + return "\""" inline( substr( line, len + 1) ); # reference style images - } else if ( match(line, /^!\[([^]]+)\] ?\[([^]]*)\]/ ) ) { + } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\]/ ) ) { len = RLENGTH; - text = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) ); - id = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); + text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\1", 1, substr(line, 1, len) ); + id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\].*/, "\\2", 1, substr(line, 1, len) ); if ( ! id ) id = text; if ( rl_href[id] && rl_title[id] ) { - return "\""" inline( substr( line, len + 1) ); + return "\""" \ + inline( substr( line, len + 1) ); } else if ( rl_href[id] ) { - return "\""" inline( substr( line, len + 1) ); + return "\""" \ + inline( substr( line, len + 1) ); } else { return "" HTML(substr(line, 1, len)) inline( substr(line, len + 1) ); } @@ -245,21 +303,19 @@ function inline( line, LOCAL, len, code, href, guard ) { len = RLENGTH; return "" inline( substr( line, 2, len - 2 ) ) "" inline( substr( line, len + 1 ) ); - # Macros - } else if ( AllowMacros && match( line, /^<<([^>]|>[^>])+>>/) ) { - len = RLENGTH; - return macro( substr( line, 3, len - 4 ) ) inline(substr(line, len + 1)); - - # Verbatim inline HTML - } else if ( AllowHTML && match( line, /^(|<\?([^\?]|\?[^>])*\?>|]*>|])*\]\]>|<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)/) ) { - len = RLENGTH; - return substr( line, 1, len) inline(substr(line, len + 1)); - # Literal HTML entities } else if ( match( line, /^&([a-zA-Z]{2,32}|#[0-9]{1,7}|#[xX][0-9a-fA-F]{1,6});/) ) { len = RLENGTH; return substr( line, 1, len ) inline(substr(line, len + 1)); + # Arrows + } else if ( line ~ /^-->( |$)/) { # ignore multidash-arrow + return "-->" inline( substr(line, 4) ); + } else if ( line ~ /^<-( |$)/) { + return "←" inline( substr(line, 3) ); + } else if ( line ~ /^->( |$)/) { + return "→" inline( substr(line, 3) ); + # Escape lone HTML character } else if ( match( line, /^[&<>"']/) ) { return HTML(substr(line, 1, 1)) inline(substr(line, 2)); @@ -270,8 +326,43 @@ function inline( line, LOCAL, len, code, href, guard ) { } } -function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib ) { - gsub( /^\n+|\n+$/, "", block ); +function headline( hlvl, htxt, attrib, LOCAL, sec, n, HL) { + match(hstack, /([0-9]+( [0-9]+){5})$/); split( substr(hstack, RSTART), HL); + + for ( n = hlvl; n <= 6; n++ ) { sec = sec (HL[n]?"
":""); } + HL[hlvl]++; for ( n = hlvl + 1; n <= 6; n++) { HL[n] = 0;} + + hid = ""; for ( n = 2; n <= blvl; n++) { hid = hid BL[n] "/"; } + hid = hid HL[1]; for ( n = 2; n <= hlvl; n++) { hid = hid "." HL[n] ; } + hid = hid ":" URL(htxt, 1); + + sub(/([0-9]+( [0-9]+){5})$/, "", hstack); + hstack = hstack HL[1] " " HL[2] " " HL[3] " " HL[4] " " HL[5] " " HL[6]; + + return sec "
" \ + "" inline( htxt ) \ + "" \ + "\n"; +} + +# Nested Block, resets heading counters +function _nblock( block, LOCAL, sec, n ) { + hstack = hstack " 0 0 0 0 0 0"; + + # Block Level + blvl++; BL[blvl]++; + for ( n = blvl + 1; n in BL; n++) { delete BL[n]; } + + block = _block( block ); + match(hstack, /([0-9]+( [0-9]+){5})$/); split( substr(hstack, RSTART), HL); + sec = ""; for ( n = 1; n <= 6; n++ ) { sec = sec (HL[n]?"
":""); } + + sub("( +[0-9]+){6} *$", "", hstack); blvl--; + return block sec; +} + +function _block( block, LOCAL, st, len, text, title, attrib, href, guard, code, indent, list ) { + gsub( "(^\n+|\n+$)", "", block ); if ( block == "" ) { return ""; @@ -300,7 +391,7 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib # Metadata (custom, block starting with %something) # Metadata is ignored but can be interpreted externally - } else if ( match(block, /^%[a-zA-Z]+([[:space:]][^\n]*)?(\n|$)(%[a-zA-Z]+([[:space:]][^\n]*)?(\n|$)|%([[:space:]][^\n]*)?(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) { + } else if ( match(block, /^%[a-zA-Z-]+([[:space:]][^\n]*)?(\n|$)(%[a-zA-Z-]+([[:space:]][^\n]*)?(\n|$)|%([[:space:]][^\n]*)?(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) { len = RLENGTH; st = RSTART; return _block( substr( block, len + 1) ); @@ -308,12 +399,13 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib } else if ( match( block, /^> /) ) { match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match(block, /$/); len = RLENGTH; st = RSTART; - return "
\n" _block( gensub( /(^|\n)> /, "\n", "g", substr(block, 1, st - 1) ) ) "
\n\n" \ - _block( substr(block, st + len) ); + text = substr(block, 1, st - 1); gsub( /(^|\n)> /, "\n", text ); + text = _nblock( text ); gsub( /^\n|\n$/, "", text ) + return "
" text "
\n\n" _block( substr(block, st + len) ); # Pipe Tables (pandoc / php md / gfm ) } else if ( match(block, "^((\\|)?([^\n]+\\|)+[^\n]+(\\|)?)\n" \ - "((\\|)?:?(-+:?[\\|+])+:?-+:?(\\|)?)\n" \ + "((\\|)?(:?-+:?[\\|+])+:?-+:?(\\|)?)\n" \ "((\\|)?([^\n]+\\|)+[^\n]+(\\|)?(\n|$))+" ) ) { len = RLENGTH; st = RSTART; #initialize empty arrays @@ -359,46 +451,65 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib return "" ttext "
\n" _block(block); # Grid Tables (pandoc) - } else if ( match(block, "^\\+(-+\\+)+\n" \ - "(\\|([^\n]+\\|)+\n)+" \ - "\\+(:?=+:?\\+)+\n" \ - "((\\|([^\n]+\\|)+\n)+" \ - "\\+(-+\\+)+(\n|$))+" \ - ) ) { + # (with, and without header) + } else if ( match( block, "^\\+(-+\\+)+\n" \ + "(\\|([^\n]+\\|)+\n)+" \ + "(\\+(:?=+:?\\+)+)\n" \ + "((\\|([^\n]+\\|)+\n)+" \ + "\\+(-+\\+)+(\n|$))+" \ + ) || \ + match( block, "^()()()" \ + "(\\+(:?-+:?\\+)+)\n" \ + "((\\|([^\n]+\\|)+\n)+" \ + "\\+(-+\\+)+(\n|$))+" \ + ) ) { len = RLENGTH; st = RSTART; #initialize empty arrays split("", talign); split("", tarray); split("", tread); cols = 0; cnt=0; ttext = ""; - # table header and alignment - block = substr(block, match(block, /(\n|$)/) + 1 ); - while ( match(block, "^\\|([^\n]+\\|)+\n") ) { - cols = split( gensub( /(^\||\|$)/, "", "g", \ - gensub( /(^|[^\\])\\\|/, "\\1\\|", "g", \ - substr(block, 1, match(block, /(\n|$)/)) \ - )), tread, /\|/); - block = substr(block, match(block, /(\n|$)/) + 1 ); - for (cnt = 1; cnt < cols; cnt++) - tarray[cnt] = tarray[cnt] "\n" tread[cnt]; - } + # Column Count + cols = split( gensub( "^(\\+(:?-+:?\\+)+)(\n.*)*$", "\\1", 1, block), tread, /\+/) - 2; + # debug(" Cols: " gensub( "^(\\+(:?-+:?\\+)+)(\n.*)*$", "\\1", 1, block )); - cols = split( \ - gensub( /(^\+|\+$)/, "", "g", \ - substr(block, 1, match(block, /(\n|$)/)) \ - ), talign, /\+/); - block = substr(block, match(block, /(\n|$)/) + 1 ); + # table alignment + split( gensub( "^(.*\n)?\\+((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)(\n.*)$", "\\2", "g", block ), talign, /\+/ ); + # debug("Align: " gensub( "^(.*\n)?\\+((:?=+:?\\+|(:-+|-+:|:-+:)\\+)+)(\n.*)$", "\\2", "g", block )); - for (cnt = 1; cnt < cols; cnt++) { - if (match(talign[cnt], /:=+:/)) talign[cnt]="center"; - else if (match(talign[cnt], /=+:/)) talign[cnt]="right"; - else if (match(talign[cnt], /:=+/ )) talign[cnt]="left"; + for (cnt = 1; cnt <= cols; cnt++) { + if (match(talign[cnt], /:(-+|=+):/)) talign[cnt]="center"; + else if (match(talign[cnt], /(-+|=+):/)) talign[cnt]="right"; + else if (match(talign[cnt], /:(-+|=+)/ )) talign[cnt]="left"; else talign[cnt]=""; } - ttext = "\n" - for (cnt = 1; cnt < cols; cnt++) - ttext = ttext "" _block(tarray[cnt]) "" - ttext = ttext "\n\n" + if ( match(block, "^\\+(-+\\+)+\n" \ + "(\\|([^\n]+\\|)+\n)+" \ + "\\+(:?=+:?\\+)+\n" \ + "((\\|([^\n]+\\|)+\n)+" \ + "\\+(-+\\+)+(\n|$))+" \ + ) ) { + # table header + block = substr(block, match(block, /(\n|$)/) + 1 ); + while ( match(block, "^\\|([^\n]+\\|)+\n") ) { + split( gensub( /(^\||\|$)/, "", "g", \ + gensub( /(^|[^\\])\\\|/, "\\1\\|", "g", \ + substr(block, 1, match(block, /(\n|$)/)) \ + )), tread, /\|/); + block = substr(block, match(block, /(\n|$)/) + 1 ); + for (cnt = 1; cnt <= cols; cnt++) + tarray[cnt] = tarray[cnt] "\n" tread[cnt]; + } + + ttext = "\n" + for (cnt = 1; cnt <= cols; cnt++) + ttext = ttext "" _nblock(tarray[cnt]) "" + ttext = ttext "\n" + } + + # table body + block = substr(block, match(block, /(\n|$)/) + 1 ); + ttext = ttext "\n" while ( match(block, /^((\|([^\n]+\|)+\n)+\+(-+\+)+(\n|$))+/ ) ){ split("", tarray); @@ -408,27 +519,27 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib substr(block, 1, match(block, /(\n|$)/)) \ )), tread, /\|/); block = substr(block, match(block, /(\n|$)/) + 1 ); - for (cnt = 1; cnt < cols; cnt++) + for (cnt = 1; cnt <= cols; cnt++) tarray[cnt] = tarray[cnt] "\n" tread[cnt]; } block = substr(block, match(block, /(\n|$)/) + 1 ); ttext = ttext "" - for (cnt = 1; cnt < cols; cnt++) - ttext = ttext "" _block(tarray[cnt]) "" + for (cnt = 1; cnt <= cols; cnt++) + ttext = ttext "" _nblock(tarray[cnt]) "" ttext = ttext "\n" } - return "" ttext "
\n" _block(block); + return "" ttext "
\n" _nblock(block); # Line Blocks (pandoc) } else if ( match(block, /^\| [^\n]*(\n|$)(\| [^\n]*(\n|$)|[ \t]+[^\n[:space:]][^\n]*(\n|$))*/) ) { len = RLENGTH; st = RSTART; - code = substr(block, 1, len); - gsub(/\n[[:space:]]+/, " ", code); - gsub(/\n\| /, "\n", code); - gsub(/^\| |\n$/, "", code); - return "
" gensub(/\n/, "
\n", "g", inline( code )) "
\n" \ - _block( substr( block, len + 1) ); + + text = substr(block, 1, len); gsub(/\n[[:space:]]+/, " ", text); + gsub(/\n\| /, "\n", text); gsub(/^\| |\n$/, "", text); + text = inline(text); gsub(/\n/, "
\n", text); + + return "
" text "
\n" _block( substr( block, len + 1) ); # Indented Code Block } else if ( match(block, /^( |\t)( *\t*[^ \t\n]+ *\t*)+(\n|$)(( |\t)[^\n]+(\n|$)|[ \t]*(\n|$))*/) ) { @@ -442,13 +553,13 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib # Fenced Divs (pandoc, custom) } else if ( match( block, /^(:::+)/ ) ) { guard = substr( block, 1, RLENGTH ); - code = gensub(/^[^\n]+\n/, "", 1, block); + code = block; sub(/^[^\n]+\n/, "", code); attrib = gensub(/^:::+[ \t]*\{?[ \t]*([^\}\n]*)\}?[ \t]*\n.*$/, "\\1", 1, block); gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); if ( match(code, "(^|\n)" guard "+(\n|$)" ) ) { len = RLENGTH; st = RSTART; - return "
" _block( substr(code, 1, st - 1) ) "
\n" \ + return "
" _nblock( substr(code, 1, st - 1) ) "
\n" \ _block( substr( code, st + len ) ); } else { match( block, /(^|\n)[[:space:]]*(\n|$)/ ) || match( block, /$/ ); @@ -475,60 +586,147 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib _block( substr(block, st + len) ); } - # Unordered list - } else if ( match( block, "^ ? ? ?[-+*][ \t]+[^\n]+(\n|$)" \ - "(([ \t]*\n)* ? ? ?[-+*][ \t]+[^\n]+(\n|$)" \ - "|([ \t]*\n)*( ? ? ?\t| +)[^\n]+(\n|$)" \ - "|[^\n]+(\n|$))*" ) ) { - list = substr( block, 1, RLENGTH); - block = substr( block, RLENGTH + 1); - indent = length( gensub(/[-+*][ \t]+[^\n]+.*$/, "", 1, list) ); - - gsub("(^|\n) {0," indent "}", "\n", list); - return "\n
    \n" _list( substr(list, 2) ) "
\n" _block( block ); - - # Ordered list - } else if ( match( block, "^ ? ? ?([0-9]+|#)\\.[ \t]+[^\n]+(\n|$)" \ - "(([ \t]*\n)* ? ? ?([0-9]+|#)\\.[ \t]+[^\n]+(\n|$)" \ - "|([ \t]*\n)*( ? ? ?\t| +)[^\n]+(\n|$)" \ - "|[^\n]+(\n|$))*" ) ) { - list = substr( block, 1, RLENGTH); - block = substr( block, RLENGTH + 1); - indent = length( gensub(/([0-9]+|#)\.[ \t]+[^\n]+.*$/, "", 1, list) ); - - gsub("(^|\n) {0," indent "}", "\n", list); - return "\n
    \n" _list( substr(list, 2) ) "
\n" _block( block ); - - # First Order Heading - } else if ( match( block, /^[^\n]+\n===+(\n|$)/ ) ) { - len = RLENGTH; - HL[1]++; HL[2] = 0; HL[3] = 0; HL[4] = 0; HL[5] = 0; HL[6] = 0; - return "

" \ - inline( gensub( /\n.*$/, "", "g", block ) ) \ - "

\n\n" \ - _block( substr( block, len + 1 ) ); + # First Order Heading H1 + Attrib + } else if ( match( block, /^([^\n]+)([ \t]*\{([^\}\n]+)\})\n===+(\n|$)/ ) ) { + len = RLENGTH; text = attrib = block; + sub(/([ \t]*\{([^\}\n]+)\})\n===+(\n.*)?$/, "", text); + sub(/\}\n===+(\n.*)?$/, "", attrib); sub(/^([^\n]+)[ \t]*\{/, "", attrib); + gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); - # Second Order Heading - } else if ( match( block, /^[^\n]+\n---+(\n|$)/ ) ) { - len = RLENGTH; - HL[2]++; HL[3] = 0; HL[4] = 0; HL[5] = 0; HL[6] = 0; - return "

" \ - inline( gensub( /\n.*$/, "", "g", block ) ) \ - "

\n\n" \ + return headline(1, text, attrib) _block( substr( block, len + 1 ) ); + + # First Order Heading H1 + } else if ( match( block, /^([^\n]+)\n===+(\n|$)/ ) ) { + len = RLENGTH; text = substr(block, 1, len); + sub(/\n===+(\n.*)?$/, "", text); + + return headline(1, text, 0) _block( substr( block, len + 1 ) ); + + # Second Order Heading H2 + Attrib + } else if ( match( block, /^([^\n]+)([ \t]*\{([^\}\n]+)\})\n---+(\n|$)/ ) ) { + len = RLENGTH; text = attrib = block; + sub(/([ \t]*\{([^\}\n]+)\})\n---+(\n.*)?$/, "", text); + sub(/\}\n---+(\n.*)?$/, "", attrib); sub(/^([^\n]+)[ \t]*\{/, "", attrib); + gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); + + return headline(2, text, attrib) _block( substr( block, len + 1) ); + + # Second Order Heading H2 + } else if ( match( block, /^([^\n]+)\n---+(\n|$)/ ) ) { + len = RLENGTH; text = substr(block, 1, len); + sub(/\n---+(\n.*)?$/, "", text); + + return headline(2, text, 0) _block( substr( block, len + 1) ); + + # Nth Order Heading H1 H2 H3 H4 H5 H6 + Attrib + } else if ( match( block, /^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n|$)/ ) ) { + len = RLENGTH; text = attrib = substr(block, 1, len); + match(block, /^#{1,6}/); n = RLENGTH; + + sub(/^(#{1,6})[ \t]*/, "", text); sub(/[ \t]*#*([ \t]*\{([a-zA-Z \t-]*)\})(\n.*)?$/, "", text); + sub(/^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*[ \t]*\{/, "", attrib); + sub(/\})(\n.*)?$/, "", attrib); + gsub(/[^a-zA-Z0-9_-]+/, " ", attrib); gsub(/(^ | $)/, "", attrib); + + return headline( n, text, attrib ) _block( substr( block, len + 1) ); + + # Nth Order Heading H1 H2 H3 H4 H5 H6 + } else if ( match( block, /^(#{1,6})[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[ \t]*[^ \t\n#])+)[ \t]*#*(\n|$)/ ) ) { + len = RLENGTH; text = substr(block, 1, len); + match(block, /^#{1,6}/); n = RLENGTH; + sub(/^(#{1,6})[ \t]*/, "", text); sub(/[ \t]*#*(\n.*)?$/, "", text); + + return headline( n, text, 0 ) _block( substr( block, len + 1) ); + + # block images (wrapped in
) + } else if ( match(block, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n|$)") ) { + len = RLENGTH; text = href = title = attrib = substr( block, 1, len); + + sub("^!\\[", "", text); + sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", text); + + sub("^!" lix "\\([\n\t ]*", "", href); + sub("([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", href); + + sub("^!" lix "\\([\n\t ]*" lid, "", title); + sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", title); + sub("^[\n\t ]+", "", title); + + sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib); + sub("(\n.*)?$", "", attrib); + sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib); + + if ( match(href, /^<.*>$/) ) { sub(/^$/, "", href); } + if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); } + else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); } + else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); } + + gsub(/^[\t ]+$/, "", text); gsub(/\\/, "", href); + + return "
" \ + "\""" \ + (title?"
" inline(title) "
":"") \ + "
\n\n" \ _block( substr( block, len + 1) ); - # Nth Order Heading - } else if ( match( block, /^#{1,6}[ \t]*[^\n]+([ \t]*#*)(\n|$)/ ) ) { + # reference style images (block) + } else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\](\n|$)/ ) ) { len = RLENGTH; - hlvl = length( gensub( /^(#{1,6}).*$/, "\\1", "g", block ) ); - htxt = gensub(/^#{1,6}[ \t]*(([^ \t\n]+|[ \t]+[^ \t\n#]|[ \t]+#+[^\n#])+)([ \t]*#*)(\n.*)?$/, "\\1", 1, block); - HL[hlvl]++; for ( n = hlvl + 1; n < 7; n++) { HL[n] = 0;} - hid = HL[1]; for ( n = 2; n <= hlvl; n++) { hid = hid "." HL[n] ; } - return "" inline( htxt ) \ - "\n\n" \ - _block( substr( block, len + 1) ); + text = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\1", 1, block); + id = gensub(/^!\[([^\n]*)\] ?\[([^\n]*)\](\n.*)?$/, "\\2", 1, block); + if ( ! id ) id = text; + if ( rl_href[id] && rl_title[id] ) { + return "
" \ + "\""" \ + "
" inline(rl_title[id]) "
" \ + "
\n\n" \ + _block( substr( block, len + 1) ); + } else if ( rl_href[id] ) { + return "
" \ + "\""" \ + "
\n\n" \ + _block( substr( block, len + 1) ); + } else { + return "

" HTML(substr(block, 1, len)) "

\n" _block( substr(block, len + 1) ); + } + + # Macros (standalone <> calls handled as block, so they are not wrapped in paragraph) + } else if ( match( block, /^<<(([^>]|>[^>])+)>>(\n|$)/ ) ) { + len = RLENGTH; + text = gensub(/^<<(([^>]|>[^>])+)>>(\n.*)?$/, "\\1", 1, block); + return "" HTML(text) "" _block(substr(block, len + 1) ); + + # Definition list + } else if (match( block, "^(([ \t]*\n)*[^:\n \t][^\n]+\n" \ + "([ \t]*\n)* ? ? ?:[ \t][^\n]+(\n|$)" \ + "(([ \t]*\n)* ? ? ?:[ \t][^\n]+(\n|$)" \ + "|[^:\n \t][^\n]+(\n|$)" \ + "|( ? ? ?\t| +)[^\n]+(\n|$)" \ + "|([ \t]*\n)+( ? ? ?\t| +)[^\n]+(\n|$))*)+" \ + )) { + list = substr( block, 1, RLENGTH); block = substr( block, RLENGTH + 1); + return "\n
\n" _dlist( list ) "
\n" _block( block ); + + # Unordered list types + } else if ( text = _startlist( block, "ul", "-", "([+*•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + return text; + } else if ( text = _startlist( block, "ul", "\\+", "([-*•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + return text; + } else if ( text = _startlist( block, "ul", "\\*", "([-+•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + return text; + } else if ( text = _startlist( block, "ul", "•", "([-+*]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) { + return text; + + # Ordered list types + } else if ( text = _startlist( block, "ol", "[0-9]+\\.", "([-+*•]|#\\.|[0-9]+\\)|#\\))") ) { + return text; + } else if ( text = _startlist( block, "ol", "[0-9]+\\)", "([-+*•]|[0-9]+\\.|#\\.|#\\))") ) { + return text; + } else if ( text = _startlist( block, "ol", "#\\.", "([-+*•]|[0-9]+\\.|[0-9]+\\)|#\\))") ) { + return text; + } else if ( text = _startlist( block, "ol", "#\\)", "([-+*•]|[0-9]+\\.|#\\.|[0-9]+\\))") ) { + return text; # Split paragraphs } else if ( match( block, /(^|\n)[[:space:]]*(\n|$)/) ) { @@ -539,7 +737,7 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib # Horizontal rule } else if ( match( block, /(^|\n) ? ? ?((\* *){3,}|(- *){3,}|(_ *){3,})($|\n)/) ) { len = RLENGTH; st = RSTART; - return _block(substr(block, 1, st - 1)) "
\n" _block(substr(block, st + len)); + return _block(substr(block, 1, st - 1)) "
\n" _block(substr(block, st + len)); # Plain paragraph } else { @@ -547,52 +745,78 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent, attrib } } -function _list( block, last, LOCAL, p) { - if ( ! length(block) ) return ""; - gsub(/^([-+*]|[0-9]+\.|#\.)( ? ? ?|\t)/, "", block) +function _startlist(block, type, mark, exclude, LOCAL, st, len, list, indent, text) { + if (match( block, "(^|\n) ? ? ?" mark "[ \t][^\n]+(\n|$)" \ + "(([ \t]*\n)* ? ? ?" mark "[ \t][^\n]+(\n|$)" \ + "|([ \t]*\n)*( ? ? ?\t| +)[^\n]+(\n|$)" \ + "|[^\n \t][^\n]+(\n|$))*" ) ) { + st = RSTART; len = RLENGTH; list = substr( block, st, len); + + sub("^\n", "", list); match(list, "^ ? ? ?"); indent = RLENGTH; + gsub( "(^|\n) {0," indent "}", "\n", list); sub("^\n", "", list); + + text = substr(block, 1, st - 1); block = substr(block, st + len); + if (match(text, /\n[[:space:]]*\n/)) return 0; + if (match(text, "(^|\n) ? ? ?" exclude "[ \t][^\n]+")) return 0; + if (match( list, "\n" exclude "[ \t]" )) { + block = substr(list, RSTART + 1) block; + list = substr(list, 1, RSTART); + } - # slice next list item from input - if ( match( block, /\n([-+*]|[0-9]+\.|#\.)[ \t]+[^\n]+/) ) { - p = substr( block, 1, RSTART); - block = substr( block, RSTART + 1); - } else { - p = block; block = ""; - } - sub( /\n +([-+*]|[0-9]+\.|#\.)/, "\n&", p ); + return _block( text ) "<" type ">\n" _list( list, mark ) "\n" _block( block ); + } else return 0; +} - # if this should be a paragraph item - # either previous item (last) or current item (p) contains blank lines - if (match(last, /\n[[:space:]]*\n/) || match(p, /\n[[:space:]]*\n/) ) { - last = p; p = _block(p); - } else { - last = p; p = _block(p); - sub( /^

/, "", p ); - sub( /<\/p>\n/, "", p ); +function _list (block, mark, p, LOCAL, len, st, text, indent, task) { + if ( match(block, "^([ \t]*\n)*$")) return; + + match(block, "^" mark "[ \t]"); indent = RLENGTH; + sub("^" mark "[ \t]", "", block); + + if (match(block, /\n[ \t]*\n/)) p = 1; + + match( block, "\n" mark "[ \t][^\n]+(\n|$)" ); + st = (RLENGTH == -1) ? length(block) + 1 : RSTART; + text = substr(block, 1, st); block = substr(block, st + 1); + + gsub("\n {0," indent "}", "\n", text); + + task = match( text, /^\[ \]/ ) ? "

  • " : \ + match( text, /^\[-\]/ ) ? "
  • " : \ + match( text, /^\[\/\]/ ) ? "
  • " : \ + match( text, /^\[\?\]/ ) ? "
  • " : \ + match( text, /^\[[xX]\]/) ? "
  • " : "
  • "; + sub(/^\[[-? \/xX]\]/, "", text); + + text = _nblock( text ); + if ( ! p && match( text, "^

    (]|\n$" )) + gsub( "(^

    |

    \n$)", "", text); + + return task text "
  • \n" _list(block, mark, p); +} + +function _dlist (block, LOCAL, len, st, text, indent, p) { + if (match( block, "^([ \t]*\n)*[^:\n \t][^\n]+\n" )) { + len = RLENGTH; text = substr(block, 1, len); + gsub( "(^\n*|\n*$)", "", text ); + return "
    " inline( text ) "
    \n" _dlist( substr(block, len + 1) ); + } else if (match( block, "^([ \t]*\n)* ? ? ?:[ \t][^\n]+(\n|$)" \ + "([^:\n \t][^\n]+(\n|$)" \ + "|( ? ? ?\t| +)[^\n]+(\n|$)" \ + "|([ \t]*\n)+( ? ? ?\t| +)[^\n]+(\n|$))*" \ + )) { + len = RLENGTH; text = substr(block, 1, len); + sub( "^([ \t]*\n)*", "", text); + match(text, "^ ? ? ?:(\t| +)"); indent = RLENGTH; + sub( "^ ? ? ?:(\t| +)", "", text); + gsub( "(^|\n) {0," indent "}", "\n", text ); + + text = _nblock(text); + if (match( text, "^

    (]|\n$" )) + gsub( "(^

    |

    \n$)", "", text); + + return "
    " text "
    \n" _dlist( substr(block, len + 1) ); } - sub( /\n$/, "", p ); - - # Task List (pandoc, custom) - if ( p ~ /^\[ \].*/ ) { return "
  • " \ - substr(p, 4) "
  • \n" _list( block, last ); - } else if ( p ~ /^\[-\].*/ ) { return "
  • " \ - substr(p, 4) "
  • \n" _list( block, last ); - } else if ( p ~ /^\[\?\].*/ ) { return "
  • " \ - substr(p, 4) "
  • \n" _list( block, last ); - } else if ( p ~ /^\[\/\].*/ ) { return "
  • " \ - substr(p, 4) "
  • \n" _list( block, last ); - } else if ( p ~ /^\[[xX]\].*/ ) { return "
  • " \ - substr(p, 4) "
  • \n" _list( block, last ); - } else if ( p ~ /^

    \[ \].*/ ) { return "

  • " \ - substr(p, 7) "

  • \n" _list( block, last ); - } else if ( p ~ /^

    \[-\].*/ ) { return "

  • " \ - substr(p, 7) "

  • \n" _list( block, last ); - } else if ( p ~ /^

    \[\?\].*/ ) { return "

  • " \ - substr(p, 7) "

  • \n" _list( block, last ); - } else if ( p ~ /^

    \[\/\].*/ ) { return "

  • " \ - substr(p, 7) "

  • \n" _list( block, last ); - } else if ( p ~ /^

    \[[xX]\].*/ ) { return "

  • " \ - substr(p, 7) "

  • \n" _list( block, last ); - } else { return "
  • " p "
  • \n" _list( block, last ); } } BEGIN { @@ -600,6 +824,21 @@ BEGIN { file = ""; rl_href[""] = ""; rl_title[""] = ""; if (ENVIRON["MD_HTML"] == "true") { AllowHTML = "true"; } HL[1] = 0; HL[2] = 0; HL[3] = 0; HL[4] = 0; HL[5] = 0; HL[6] = 0; + # hls = "0 0 0 0 0 0"; + + # Universal Patterns + nu = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\_]|_[[:alnum:]])*" # not underline (except when escaped) + na = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\\\*])*" # not asterisk (except when escaped) + ieu = "_([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])_" # inner (underline) + isu = "__([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])__" # inner (underline) + iea = "\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*" # inner (asterisk) + isa = "\\*\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*\\*" # inner (asterisk) + + lix="\\[(\\\\[^\n]|[^]\n\\\\[])*\\]" # link text + lid="(<(\\\\[^\n]|[^\n<>\\\\])*>|(\\\\.|[^()\"'\\\\])+|([^<\n\t ()\\\\]|\\\\[^\n])(\\\\[\n]|[^\n\t \\(\\)\\\\])*)" # link dest + lit="(\"(\\\\.|[^\"\\\\])*\"|'(\\\\.|[^'\\\\])*'|\\((\\\\.|[^\\(\\)\\\\])*\\))" # link text + # link text with image def + lii="\\[(\\\\[^\n]|[^]\n\\\\[])*(!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\))?(\\\\[^\n]|[^]\n\\\\[])*\\]" # Buffering of full file ist necessary, e.g. to find reference links while (getline) { file = file $0 "\n"; } @@ -623,5 +862,5 @@ BEGIN { # for (n in rl_href) { debug(n " | " rl_href[n] " | " rl_title[n] ); } # Run Block Processing -> The Actual Markdown! - printf "%s", _block( file ); + printf "%s", _nblock( file ); } diff --git a/session.sh b/session.sh index 1f4699e..c3a44e8 100755 --- a/session.sh +++ b/session.sh @@ -1,9 +1,23 @@ #!/bin/sh +# Copyright 2018 - 2022 Paul Hänsch +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + [ -n "$include_session" ] && return 0 include_session="$0" -_DATE="$(date +%s)" +export _DATE="$(date +%s)" SESSION_TIMEOUT="${SESSION_TIMEOUT:-7200}" if ! which uuencode >/dev/null; then diff --git a/storage.sh b/storage.sh index 22e6acc..17ea0d0 100755 --- a/storage.sh +++ b/storage.sh @@ -1,21 +1,18 @@ #!/bin/sh -# Copyright 2018, 2019, 2021 Paul Hänsch -# -# This is a file format helper, part of CGIlite. +# Copyright 2018 - 2021 Paul Hänsch # -# CGIlite is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. # -# CGIlite is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with CGIlite. If not, see . +# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. [ -n "$include_storage" ] && return 0 include_storage="$0" diff --git a/users.sh b/users.sh index 6a6833e..32299ff 100755 --- a/users.sh +++ b/users.sh @@ -1,10 +1,24 @@ #!/bin/sh +# Copyright 2021 - 2024 Paul Hänsch +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + [ -n "$include_users" ] && return 0 include_users="$0" -. "${_EXEC}/cgilite/session.sh" -. "${_EXEC}/cgilite/storage.sh" +. "${_EXEC:-.}/cgilite/session.sh" +. "${_EXEC:-.}/cgilite/storage.sh" SENDMAIL=${SENDMAIL-sendmail} @@ -15,9 +29,8 @@ USER_ACCOUNTPAGE="${USER_ACCOUNTPAGE}" USER_ACCOUNTEXPIRE="${USER_ACCOUNTEXPIRE:-$((86400 * 730))}" USER_CONFIRMEXPIRE="${USER_CONFIRMEXPIRE:-86400}" -MAILFROM="${MAILDOMAIN-noreply@${HTTP_HOST%:*}}" - HTTP_HOST="$(HEADER Host)" +MAILFROM="noreply@${HTTP_HOST%:*}" [ "$HTTPS" ] && SCHEMA=https || SCHEMA=http @@ -36,6 +49,10 @@ LOCAL_USER='local \ USER_EXPIRE USER_DEVICES USER_FUTUREUSE ' +# == TRANSLATIONS == +# override all functions marked with "TRANSLATION" +# sed -n '/TRANSLATION$/,/^}/p;'