3 # Copyright 2023, 2024 Paul Hänsch
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
12 # SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR
15 # IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 export _EXEC="${_EXEC:-${0%/*}/}" _DATA="${_DATA:-.}"
18 verb="" v=0 cmd="" force="" location=""
26 ${0##*/} prune [--exec "INSTALL_DIR"] [--data "SITE_DIR"] [-v]
28 ${0##*/} index [--exec "INSTALL_DIR"] [--data "SITE_DIR"] \\
29 [--location "/PAGE"] [--force] [-v]
34 Remove outdated records from the database. This is usually
35 more time consuming than index creation. It is generally
36 save to run pruning while the wiki is online, even when
37 pages are being updated. Although in rare cases a search
38 operation may return incomplete results while running on
39 a database being pruned.
41 Pruning mode should be called regularly via cron.
44 Add pages to the search index. Pages with a current index
45 will be skipped unless the --force option is provided.
46 Optionally a --location can be provided to add only a
47 part of the document tree.
49 When running indexing and pruning together, indexing should be run
50 first and pruning afterwards.
52 Pruning becomes necessary with page updates, not during mere read
53 operation. On a medium traffic installation pruning should be run
55 Pruning the index more often than daily will rarely be necessary
56 and with low traffic sites monthly maintenance may be completely
62 Point to the location of your shellwiki installation. Without
63 this optin, the location will be read from the environments
64 variable "\$_EXEC", or will default to the path at which the
65 script is called, if it can be determined.
68 Point to the location of your site installation. I.e. the
69 directory containing your "pages/" and "index/" dir. Defaults to
70 the environment variable "\$_DATA" or the working directory.
73 Add pages to index even if they seem to be indexed already.
76 Index only the given page and its children. The path is given
77 relative to the web root, i.e. without the DATA and "page/"
81 During pruning, remove empty database files. This will look
82 tidyer in the index/ directory, but the operation does not
83 guarantee atomicity and can disrupt index operations happening at
84 the same time. Use it carefully and maybe don't use it in
85 automatic runs via cron.
94 while [ $# -gt 0 ]; do case $1 in
95 --exec|-e) _EXEC="${2%/}"; shift 2;;
96 --data|-d) _DATA="${2%/}"; shift 2;;
97 --location) location="${2}"; shift 2;;
98 --verbose|-v) verb=true; shift 1;;
99 --force) force=true; shift 1;;
100 --purge) purge=true; shift 1;;
101 --help) help 0 2>&1;;
103 [ ! "$cmd" ] && cmd="$1" || help 1
108 if ! [ -d "$_DATA/pages/" -a -d "$_DATA/index/" ]; then
109 printf 'ERROR: %s\nTry --help\n' "\"${_DATA}\" does not seem to be a valid site directory" >&2
112 if ! [ -x "$_EXEC/parsers/40_indexer.sh" -a -x "$_EXEC/cgilite/storage.sh" ]; then
113 printf 'ERROR: %s\nTry --help\n' "could not determine shellwiki installation path (tried \"$_EXEC\")" >&2
116 if [ ! "$cmd" ]; then
120 . "$_EXEC/cgilite/storage.sh"
123 local v l d date location freq num total
125 for word in "$_DATA/index"/*; do
126 [ "$word" = "$_DATA/index/*" ] && continue
128 [ "$verb" ] && printf "%${v}s\r%s\r" "" "${word##*/}" >&2
130 mv -- "$word" "${word}.$$"
132 while read -r date location freq num total; do
133 l="$_DATA/pages$(UNSTRING "$location")/#index.flag"
134 d="$(stat -c %Y "$l")" 2>&-
136 if [ "$date" -ge "$d" ] 2>&-; then
137 printf '%i %s %f %i %i\n' \
138 "$date" "$location" "$freq" "$num" "$total"
139 elif [ "$verb" ]; then
140 printf "\rRemoving \"%s\" from \"%s\"\n" "$location" "${word##*/}" >&2
142 done <"${word}.$$" >>"${word}"
144 if [ "${purge}" -a -f "${word}" -a ! -s "${word}" ]; then
146 [ "$verb" ] && printf '\rRemoving empty search key "%s"\n' "${word}" >&2
152 local PATH_INFO _DATE SEARCH_INDEX location v
153 export PATH_INFO="" _DATE="$(date +%s)" SEARCH_INDEX=true
155 if [ "$location" ]; then
156 location="${location#/}" location="${location%/}"
157 printf %s\\n "/${location}/"
158 find "$_DATA/pages/" -type d -path "$_DATA/pages/${location}/*" -not -name "#*" -printf "/%P/\n"
160 find "$_DATA/pages/" -type d -not -name "#*" -printf "/%P/\n"
162 | while read PATH_INFO; do
163 [ "$force" ] && rm -f -- "$_DATA/pages/$PATH_INFO/#index.flag"
164 if [ "$_DATA/pages/$PATH_INFO/#page.md" -nt "$_DATA/pages/$PATH_INFO/#index.flag" \
165 -o -f "$_DATA/pages/$PATH_INFO/#page.md" \
166 -a ! -f "$_DATA/pages/$PATH_INFO/#index.flag" ] 2>&-
168 [ "$verb" ] && printf "%${v}s\r%s\r" "" "$PATH_INFO" >&2
170 cd -- "$_DATA/pages/$PATH_INFO"
171 "$_EXEC/parsers/40_indexer.sh" <"$_DATA/pages/$PATH_INFO/#page.md" >/dev/null