X-Git-Url: https://git.plutz.net/?a=blobdiff_plain;f=parsers%2F40_indexer.sh;h=56a8990bd89e2bbd3a9d2e6f8a5f65aa6de23c87;hb=391ca15c76ee74991dc8462ae76782497751a133;hp=e3065c14aa11b8c5c82b357188f008ce48223e3a;hpb=e16cbbded1805c1cd2256b7679bd299dc4746579;p=shellwiki diff --git a/parsers/40_indexer.sh b/parsers/40_indexer.sh index e3065c1..56a8990 100755 --- a/parsers/40_indexer.sh +++ b/parsers/40_indexer.sh @@ -1,6 +1,6 @@ #!/bin/sh -# Copyright 2023 Paul Hänsch +# Copyright 2023 - 2024 Paul Hänsch # # Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above @@ -14,9 +14,11 @@ # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR # IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -DOC="${PATH_INFO%/}/" P="$_DATA/pages${DOC}" I="$_DATA/index/" +P="$PWD" I="$_DATA/index/" _DATE=$((_DATE + 1)) -if [ -f "$P/#index.flag" -a ! "$P/#page.md" -nt "$P/#index.flag" ] || [ ! -d "$P" ]; then +if [ "$SEARCH_INDEX" != true ] || [ ! -d "$P" ] || \ + [ -f "$P/#index.flag" -a ! "$P/#index.flag" -ot "$P/#page.md" ] +then cat exit 0 fi @@ -27,6 +29,7 @@ exec 3>&1 touch -d "@$_DATE" "$P/#index.flag" mkdir -p "$I" +DOC="$(STRING "${P#"$_DATA/pages"}")" { cat; printf \\n; } \ | while IFS='' read -r line; do @@ -36,9 +39,9 @@ done \ | awk ' BEGIN { # Field separator FS should include punctuation, including Unicode Block U+2000 - U+206F if ( length("¡") == 1 ) # Utf-8 aware AWK - FS = "([] \t\n\r!\"#'\''()*+,./:;<=>?\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+"; + FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+"; else # UTF-8 Hack - FS = "([] \t\n\r!\"#'\''()*+,./:;<=>?\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+"; + FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+"; fi } { for (n = 1; n <= NF; n++) { @@ -47,22 +50,9 @@ done \ } } } END { for (w in words) printf "%i %i %f %s\n", words[w], total, words[w] / total, w; } ' \ -| while read num total freq word; do +| while read -r num total freq word; do [ "$word" ] || continue - F="$I/$word" - L="$(STRING "$DOC")" - - if LOCK "$F"; then - touch "$F" - { while read d l f n t; do - [ "$l" = "$L" ] \ - || printf "%i %s %f %i %i\n" \ - "$d" "$l" "$f" "$n" "$t" - done <"$F" - printf "%i %s %f %i %i\n" \ - "$_DATE" "$L" "$freq" "$num" "$total" - } >"$F.$$" - mv -- "$F.$$" "$F" - RELEASE "$F" - fi + printf "%i %s %f %i %i\n" \ + "$_DATE" "$DOC" "$freq" "$num" "$total" \ + >>"$I/$word" done