bugfix: backslash escaping

author Paul Hänsch <paul@plutz.net>

Fri, 22 Sep 2023 17:21:15 +0000 (19:21 +0200)

committer Paul Hänsch <paul@plutz.net>

Fri, 22 Sep 2023 17:21:15 +0000 (19:21 +0200)
author Paul Hänsch <paul@plutz.net>
Fri, 22 Sep 2023 17:21:15 +0000 (19:21 +0200)
committer Paul Hänsch <paul@plutz.net>
Fri, 22 Sep 2023 17:21:15 +0000 (19:21 +0200)
diff --git a/handlers/40_search.sh b/handlers/40_search.sh

index d198f220756319f457783e95af36d43d9114351e..6ac6dce6720066e71a2adb419f29dc633674f417 100644 (file)
--- a/handlers/40_search.sh
+++ b/handlers/40_search.sh
@@ -8,9 +8,9 @@ I="$_DATA/index"
  words="$( GET q | awk '
    BEGIN { # Field separator FS should include punctuation, including Unicode Block U+2000 - U+206F
            if ( length("¡") == 1 )  # Utf-8 aware AWK
-          FS = "([] \t\n\r!\"#'\''()*+,./:;<=>?\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+";
+          FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+";
            else                     # UTF-8 Hack
-          FS = "([] \t\n\r!\"#'\''()*+,./:;<=>?\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+";
+          FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+";
            fi
          }
         { for (n = 1; n <= NF; n++) printf "%s  ", tolower($n); }
diff --git a/parsers/40_indexer.sh b/parsers/40_indexer.sh

index e3065c14aa11b8c5c82b357188f008ce48223e3a..08060f1637ba6aa4b83a1d686fab601f09751ffd 100755 (executable)
--- a/parsers/40_indexer.sh
+++ b/parsers/40_indexer.sh
@@ -36,9 +36,9 @@ done \
  | awk '
    BEGIN { # Field separator FS should include punctuation, including Unicode Block U+2000 - U+206F
            if ( length("¡") == 1 )  # Utf-8 aware AWK
-          FS = "([] \t\n\r!\"#'\''()*+,./:;<=>?\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+";
+          FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+";
            else                     # UTF-8 Hack
-          FS = "([] \t\n\r!\"#'\''()*+,./:;<=>?\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+";
+          FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+";
            fi
          }
          { for (n = 1; n <= NF; n++) {
@@ -47,7 +47,7 @@ done \
          } } }
      END { for (w in words) printf "%i %i %f %s\n", words[w], total, words[w] / total, w; }
  ' \
-| while read num total freq word; do
+| while read -r num total freq word; do
    [ "$word" ] || continue
    F="$I/$word"
    L="$(STRING "$DOC")"
author	Paul Hänsch <paul@plutz.net>
	Fri, 22 Sep 2023 17:21:15 +0000 (19:21 +0200)
committer	Paul Hänsch <paul@plutz.net>
	Fri, 22 Sep 2023 17:21:15 +0000 (19:21 +0200)
handlers/40_search.sh		patch \| blob \| history
parsers/40_indexer.sh		patch \| blob \| history