X-Git-Url: https://git.plutz.net/?a=blobdiff_plain;f=searchindex.sh;h=28bd76a82e979386dfaa76aaf26cf507bec614c0;hb=HEAD;hp=029f0f4630b108939355c6d5b37434569904fd94;hpb=51fe8ca126d4d0ded47465e8068555b7d0203724;p=shellwiki diff --git a/searchindex.sh b/searchindex.sh index 029f0f4..28bd76a 100755 --- a/searchindex.sh +++ b/searchindex.sh @@ -1,6 +1,6 @@ #!/bin/sh -# Copyright 2023 Paul Hänsch +# Copyright 2023, 2024 Paul Hänsch # # Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above @@ -65,18 +65,25 @@ help() { script is called, if it can be determined. --data SITE_DIR - Point to the location of your site installation. I.e. the directory - containing your "pages/" and "index/" dir. Defaults to the - environment variable "\$_DATA" or the working directory. + Point to the location of your site installation. I.e. the + directory containing your "pages/" and "index/" dir. Defaults to + the environment variable "\$_DATA" or the working directory. --force Add pages to index even if they seem to be indexed already. - --loction /PAGE + --location /PAGE Index only the given page and its children. The path is given relative to the web root, i.e. without the DATA and "page/" directory. + --purge + During pruning, remove empty database files. This will look + tidyer in the index/ directory, but the operation does not + guarantee atomicity and can disrupt index operations happening at + the same time. Use it carefully and maybe don't use it in + automatic runs via cron. + -v Be more verbose. EOF @@ -90,6 +97,7 @@ while [ $# -gt 0 ]; do case $1 in --location) location="${2}"; shift 2;; --verbose|-v) verb=true; shift 1;; --force) force=true; shift 1;; + --purge) purge=true; shift 1;; --help) help 0 2>&1;; prune|index) [ ! "$cmd" ] && cmd="$1" || help 1 @@ -112,6 +120,8 @@ fi . "$_EXEC/cgilite/storage.sh" prune() { + local v l d date location freq num total + for word in "$_DATA/index"/*; do [ "$word" = "$_DATA/index/*" ] && continue @@ -120,7 +130,7 @@ prune() { mv -- "$word" "${word}.$$" while read -r date location freq num total; do - l="$_DATA/pages$(UNSTRING "$location")#index.flag" + l="$_DATA/pages$(UNSTRING "$location")/#index.flag" d="$(stat -c %Y "$l")" 2>&- if [ "$date" -ge "$d" ] 2>&-; then @@ -131,11 +141,16 @@ prune() { fi done <"${word}.$$" >>"${word}" rm -- "${word}.$$" + if [ "${purge}" -a -f "${word}" -a ! -s "${word}" ]; then + rm -- "${word}" + [ "$verb" ] && printf '\rRemoving empty search key "%s"\n' "${word}" >&2 + fi done } index() { - export PATH_INFO="" _DATE="$(date +%s)" + local PATH_INFO _DATE SEARCH_INDEX location v + export PATH_INFO="" _DATE="$(date +%s)" SEARCH_INDEX=true if [ "$location" ]; then location="${location#/}" location="${location%/}" @@ -152,6 +167,7 @@ index() { then [ "$verb" ] && printf "%${v}s\r%s\r" "" "$PATH_INFO" >&2 v="${#PATH_INFO}" + cd -- "$_DATA/pages/$PATH_INFO" "$_EXEC/parsers/40_indexer.sh" <"$_DATA/pages/$PATH_INFO/#page.md" >/dev/null fi done