]> git.plutz.net Git - shellwiki/commitdiff
search indeex maintenance script
authorPaul Hänsch <paul@plutz.net>
Fri, 22 Sep 2023 21:22:19 +0000 (23:22 +0200)
committerPaul Hänsch <paul@plutz.net>
Fri, 22 Sep 2023 21:22:19 +0000 (23:22 +0200)
maintenance.sh [new file with mode: 0755]

diff --git a/maintenance.sh b/maintenance.sh
new file mode 100755 (executable)
index 0000000..d94d6e3
--- /dev/null
@@ -0,0 +1,75 @@
+#!/bin/sh
+
+export _EXEC="${0%/*}/" _DATA=""  # _DATE="$(date +%s)"
+verb=""
+
+help() {
+  ex="$1"
+
+  cat >&2 <<-EOF
+       This script should be run regularly via cron to remove outdated
+       records from search the index.
+
+       USAGE:
+
+       INSTALL_DIR/maintenance.sh SITE_DIR
+
+       maintenance.sh --exec "INSTALL_DIR"  --data "SITE_DIR"
+
+       Options:
+
+       --exec INSTALL_DIR
+           Point to the location of your shellwiki installation. This will
+           default to the path at which the script is called, if it can be
+           determined.
+
+       --data SITE_DIR
+           Point to the location of your site installation. I.e. the directory
+           containing your "pages/" and "index/" dir.
+
+       EOF
+
+  exit "${ex:-0}"
+}
+
+while [ $# -gt 0 ]; do case $1 in
+  --exec|-e) _EXEC="${2%/}"; shift 2;;
+  --data|-d) _DATA="${2%/}"; shift 2;;
+  --verbose|-v) verb=true; shift 1;;
+  --help) help 0;;
+  *) [ ! "$_DATA" ] \
+     && _DATA="${1%/}" \
+     || help 1
+     ;;
+esac; done
+
+if ! [ -d "$_DATA/pages/" -a -d "$_DATA/index/" ]; then
+  printf 'ERROR: %s\n\n' "\"${_DATA}\" does not seem to be valid site directory" >&2
+  help 1
+fi
+if ! [ -x "$_EXEC/parsers/40_indexer.sh" -a -x "$_EXEC/cgilite/storage.sh" ]; then
+  printf 'ERROR: %s\n\n' "could not determine shellwiki installation path (tried \"$_EXEC\")" >&2
+  help 1
+fi
+
+. "$_EXEC/cgilite/storage.sh"
+
+for word in "$_DATA/index"/*; do
+  [ "$word" = "$_DATA/index/*" ] && continue
+
+  [ "$verb" ] && printf '\r                                                  \r%s\r' "${word##*/}" >&2
+  mv -- "$word" "${word}.$$"
+
+  while read -r date location freq num total; do
+    l="$_DATA/pages$(UNSTRING "$location")#index.flag"
+    d="$(stat -c %Y "$l")" 2>&-
+
+    if [ "$date" -ge "$d" ] 2>&-; then
+      printf '%i       %s      %f      %i      %i\n' \
+             "$date" "$location" "$freq" "$num" "$total"
+    elif [ "$verb" ]; then
+      printf 'Removing "%s" from "%s"\n' "$location" "$word" >&2
+    fi
+  done <"${word}.$$" >>"${word}"
+  rm -- "${word}.$$"
+done