X-Git-Url: https://git.plutz.net/?a=blobdiff_plain;f=handlers%2F40_search.sh;h=038545711b5f2143bfb04e82f3635b2b93b97996;hb=8c1b0d1c11d754250c87df3a54cfd5343b743ac9;hp=3db8987164a867273aab6de87d504395aa838166;hpb=b45089e38287dd6da2cab48069cae40805f98da3;p=shellwiki diff --git a/handlers/40_search.sh b/handlers/40_search.sh index 3db8987..0385457 100644 --- a/handlers/40_search.sh +++ b/handlers/40_search.sh @@ -1,25 +1,81 @@ #!/bin/sh +# Copyright 2023 Paul Hänsch +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +[ "$SEARCH_INDEX" != true ] && return 1 [ "${PATH_INFO%\[search\]}" = "$PATH_INFO" ] && return 1 . "$_EXEC/cgilite/storage.sh" +. "$_EXEC/db23.sh" I="$_DATA/index" -words="$(GET q |tr '] \t\n\r!\"#'\''()*+,./:;<=>?\\^_`{|}~[-' ' ')" +words="$( GET q | awk ' + BEGIN { # Field separator FS should include punctuation, including Unicode Block U+2000 - U+206F + if ( length("¡") == 1 ) # Utf-8 aware AWK + FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+"; + else # UTF-8 Hack + FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+"; + fi + } + { for (n = 1; n <= NF; n++) printf "%s ", tolower($n); } +')" + +searchteaser() { + local file="$1" words db3_data + local w l nc nl hits mhits cont mcont + shift 1; words="$*" + + for w in ${words}; do + grep -hiwnF "$w" "$file" + done \ + | sort -t: -k1 -n \ + | { nc=-1 hits=0 mhits=0 + while read -r l; do + nl="$nc" nc="${l%%:*}" + if [ $nc -eq $nl ]; then + hits=$((hits + 1)) + elif [ $nc -eq $((nl + 1 )) ]; then + hits=$((hits + 1)) + cont="${cont}${BR}${l#*:}" + elif [ $hits -gt $mhits ]; then + mhits="$hits" mcont="$cont" + hits=1 cont="${l#*:}" + else + hits=1 cont="${l#*:}" + fi + done + + [ $hits -gt $mhits ] \ + && STRING "$cont" \ + || STRING "$mcont" + } +} for w in ${words}; do [ ! -f "$I/$w" ] && continue while read date doc freq num total; do P="$_DATA/pages$(UNSTRING "$doc")" - d="$(stat -c %Y -- "$P/#page.md")" - [ "$d" -gt "$date" ] && continue + d="$(stat -c %Y -- "$P/#index.flag" 2>&-)" + [ "$d" -le "$date" -a -f "$P/#page.md" ] 2>&- || continue printf '%s %f\n' "$doc" "$freq" done <"$I/$w" done \ | awk ' - { cnt[$1]++; weight[$1] = weight[$1] ? weight[$1] * $2 : $2; } + { cnt[$1]++; weight[$1] = weight[$1] ? weight[$1] + $2 : $2; } END { m = 0; for (d in cnt) m = ( m < cnt[d] ) ? cnt[d] : m; for (d in cnt) if ( cnt[d] == m ) printf "%f %s\n", weight[d], d; } @@ -27,14 +83,12 @@ done \ | sort -nr \ | while read freq doc; do page="$(UNSTRING "$doc")" + [ "${page%*/\[*\]/*}" != "$page" ] && continue + if [ "$LANGUAGE_DEFAULT" ]; then + [ -d "${_DATA}/pages/${page}/:${LANGUAGE}/" ] && continue + [ "${page%/:*/}" = "${page%/:${LANGUAGE}/}" ] || continue + fi acl_read "$page" || continue - printf '
  • %s
  • ' "$(URL "$page")" "$(HTML "$page")" + printf '%s %s\n' "$doc" "$(searchteaser "$(mdfile "$page")" $words)" done \ -| theme_page - <<-EOF -
    -

    $(_ "Search results:")

    -
      - $(cat) -
    -
    - EOF +| theme_search "${words% }"