]> git.plutz.net Git - shellwiki/blobdiff - handlers/40_search.sh
treat %XX URL sequences as field stop in indexing, do not index words > 128 characters
[shellwiki] / handlers / 40_search.sh
index 3db8987164a867273aab6de87d504395aa838166..c558f5394b9cd01b8a86063d69a291de094b7fbc 100644 (file)
@@ -5,14 +5,17 @@
 . "$_EXEC/cgilite/storage.sh"
 
 I="$_DATA/index"
-words="$(GET q |tr '] \t\n\r!\"#'\''()*+,./:;<=>?\\^_`{|}~[-' ' ')"
+words="$( GET q | sed -E '
+  :X $bY; N; bX; :Y
+  s;([] \t\n\r!\"#'\''()*+,./:;<=>?\\^_`{|}~[-]|%[1-9A-Fa-f]{2})+;     ;g
+')"
 
 for w in ${words}; do
   [ ! -f "$I/$w" ] && continue
 
   while read date doc freq num total; do
     P="$_DATA/pages$(UNSTRING "$doc")"
-    d="$(stat -c %Y -- "$P/#page.md")"
+    d="$(stat -c %Y -- "$P/#index.flag")"
     [ "$d" -gt "$date" ] && continue
 
     printf '%s %f\n' "$doc" "$freq"