X-Git-Url: https://git.plutz.net/?a=blobdiff_plain;f=handlers%2F40_search.sh;h=57117e3834a1bbe0c816f92e785bc38c188bc186;hb=6d62de4f9aa16fb129513fa38c60158bfde36f16;hp=a4d91a82a8fa5e0f3861bf48f93ce6ac6fb1060b;hpb=81b5d834ef1762196d80ff0a8b1af1ef3f89889c;p=shellwiki diff --git a/handlers/40_search.sh b/handlers/40_search.sh index a4d91a8..57117e3 100644 --- a/handlers/40_search.sh +++ b/handlers/40_search.sh @@ -1,6 +1,6 @@ #!/bin/sh -# Copyright 2023 Paul Hänsch +# Copyright 2023, 2024 Paul Hänsch # # Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above @@ -18,18 +18,52 @@ [ "${PATH_INFO%\[search\]}" = "$PATH_INFO" ] && return 1 . "$_EXEC/cgilite/storage.sh" -. "$_EXEC/db23.sh" +. "$_EXEC/cgilite/db23.sh" I="$_DATA/index" +tags="$( GET q | awk ' + BEGIN { # Field separator FS should include punctuation, including Unicode Block U+2000 - U+206F + if ( length("¡") == 1 ) # Utf-8 aware AWK + FS = "([] \\t\\n\\r!\"'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+"; + else # UTF-8 Hack + FS = "([] \\t\\n\\r!\"'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+"; + fi + } + { for (n = 1; n <= NF; n++) if ($n ~ /#[[:alnum:]_]+/) { + sub(/^#/,"",$n) + printf "%s ", toupper($n); + } + } +')" + +ntags="$( GET q | awk ' + BEGIN { # Field separator FS should include punctuation, including Unicode Block U+2000 - U+206F + if ( length("¡") == 1 ) # Utf-8 aware AWK + FS = "([] \\t\\n\\r\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+"; + else # UTF-8 Hack + FS = "([] \\t\\n\\r\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+"; + fi + } + { for (n = 1; n <= NF; n++) if ($n ~ /![[:alnum:]_]+/) { + sub(/^!/,"",$n) + printf "%s ", toupper($n); + } + } +')" + words="$( GET q | awk ' BEGIN { # Field separator FS should include punctuation, including Unicode Block U+2000 - U+206F if ( length("¡") == 1 ) # Utf-8 aware AWK - FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+"; + FS = "([] \\t\\n\\r\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+"; else # UTF-8 Hack - FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+"; + FS = "([] \\t\\n\\r\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+"; fi } - { for (n = 1; n <= NF; n++) printf "%s ", tolower($n); } + { for (n = 1; n <= NF; n++) if ($n !~ /![[:alnum:]_]+/) { + sub(/!/," ",$n) + printf "%s ", tolower($n); + } + } ')" searchteaser() { @@ -69,7 +103,7 @@ for w in ${words}; do while read date doc freq num total; do P="$_DATA/pages$(UNSTRING "$doc")" d="$(stat -c %Y -- "$P/#index.flag" 2>&-)" - [ "$d" -le "$date" ] 2>&- || continue + [ "$d" -le "$date" -a -f "$P/#page.md" ] 2>&- || continue printf '%s %f\n' "$doc" "$freq" done <"$I/$w" @@ -89,6 +123,8 @@ done \ [ "${page%/:*/}" = "${page%/:${LANGUAGE}/}" ] || continue fi acl_read "$page" || continue + has_tags "$page" $tags || continue + has_tag "$page" $ntags && continue printf '%s %s\n' "$doc" "$(searchteaser "$(mdfile "$page")" $words)" done \ | theme_search "${words% }"