X-Git-Url: https://git.plutz.net/?a=blobdiff_plain;f=searchindex.sh;h=28bd76a82e979386dfaa76aaf26cf507bec614c0;hb=767d28b6e7737c1112131899b90bc5dac95e1a75;hp=5e80014bc434a709dffe82033df928c08859a733;hpb=59aafbc74070956a8651977ed81057ee6982c712;p=shellwiki diff --git a/searchindex.sh b/searchindex.sh index 5e80014..28bd76a 100755 --- a/searchindex.sh +++ b/searchindex.sh @@ -1,6 +1,20 @@ #!/bin/sh -export _EXEC="${0%/*}/" _DATA="." +# Copyright 2023, 2024 Paul Hänsch +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +export _EXEC="${_EXEC:-${0%/*}/}" _DATA="${_DATA:-.}" verb="" v=0 cmd="" force="" location="" help() { @@ -45,23 +59,31 @@ help() { Options: --exec INSTALL_DIR - Point to the location of your shellwiki installation. This will - default to the path at which the script is called, if it can be - determined. + Point to the location of your shellwiki installation. Without + this optin, the location will be read from the environments + variable "\$_EXEC", or will default to the path at which the + script is called, if it can be determined. --data SITE_DIR - Point to the location of your site installation. I.e. the directory - containing your "pages/" and "index/" dir. Defaults to working - directory. + Point to the location of your site installation. I.e. the + directory containing your "pages/" and "index/" dir. Defaults to + the environment variable "\$_DATA" or the working directory. --force Add pages to index even if they seem to be indexed already. - --loction /PAGE + --location /PAGE Index only the given page and its children. The path is given relative to the web root, i.e. without the DATA and "page/" directory. + --purge + During pruning, remove empty database files. This will look + tidyer in the index/ directory, but the operation does not + guarantee atomicity and can disrupt index operations happening at + the same time. Use it carefully and maybe don't use it in + automatic runs via cron. + -v Be more verbose. EOF @@ -72,8 +94,10 @@ help() { while [ $# -gt 0 ]; do case $1 in --exec|-e) _EXEC="${2%/}"; shift 2;; --data|-d) _DATA="${2%/}"; shift 2;; + --location) location="${2}"; shift 2;; --verbose|-v) verb=true; shift 1;; --force) force=true; shift 1;; + --purge) purge=true; shift 1;; --help) help 0 2>&1;; prune|index) [ ! "$cmd" ] && cmd="$1" || help 1 @@ -96,6 +120,8 @@ fi . "$_EXEC/cgilite/storage.sh" prune() { + local v l d date location freq num total + for word in "$_DATA/index"/*; do [ "$word" = "$_DATA/index/*" ] && continue @@ -104,22 +130,27 @@ prune() { mv -- "$word" "${word}.$$" while read -r date location freq num total; do - l="$_DATA/pages$(UNSTRING "$location")#index.flag" + l="$_DATA/pages$(UNSTRING "$location")/#index.flag" d="$(stat -c %Y "$l")" 2>&- if [ "$date" -ge "$d" ] 2>&-; then printf '%i %s %f %i %i\n' \ "$date" "$location" "$freq" "$num" "$total" elif [ "$verb" ]; then - printf "%${v}s\rRemoving \"%s\" from \"%s\"\n" "" "$location" "$word" >&2 + printf "\rRemoving \"%s\" from \"%s\"\n" "$location" "${word##*/}" >&2 fi done <"${word}.$$" >>"${word}" rm -- "${word}.$$" + if [ "${purge}" -a -f "${word}" -a ! -s "${word}" ]; then + rm -- "${word}" + [ "$verb" ] && printf '\rRemoving empty search key "%s"\n' "${word}" >&2 + fi done } index() { - export PATH_INFO="" _DATE="$(date +%s)" + local PATH_INFO _DATE SEARCH_INDEX location v + export PATH_INFO="" _DATE="$(date +%s)" SEARCH_INDEX=true if [ "$location" ]; then location="${location#/}" location="${location%/}" @@ -136,6 +167,7 @@ index() { then [ "$verb" ] && printf "%${v}s\r%s\r" "" "$PATH_INFO" >&2 v="${#PATH_INFO}" + cd -- "$_DATA/pages/$PATH_INFO" "$_EXEC/parsers/40_indexer.sh" <"$_DATA/pages/$PATH_INFO/#page.md" >/dev/null fi done