. "$_EXEC/cgilite/storage.sh"
I="$_DATA/index"
-words="$( GET q | sed -E '
- :X $bY; N; bX; :Y
- s;([] \t\n\r!\"#'\''()*+,./:;<=>?\\^_`{|}~[-]|%[1-9A-Fa-f]{2})+; ;g
+words="$( GET q | awk '
+ BEGIN { # Field separator FS should include punctuation, including Unicode Block U+2000 - U+206F
+ if ( length("¡") == 1 ) # Utf-8 aware AWK
+ FS = "([] \t\n\r!\"#'\''()*+,./:;<=>?\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+";
+ else # UTF-8 Hack
+ FS = "([] \t\n\r!\"#'\''()*+,./:;<=>?\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+";
+ fi
+ }
+ { for (n = 1; n <= NF; n++) printf "%s ", $n; }
')"
for w in ${words}; do
printf '%s\n' "$line"
done \
| awk '
- BEGIN { FS = "([] \t\n\r!\"#'\''()*+,./:;<=>?\\^_`{|}~[-]|%[0-9A-Fa-f]{2})+" }
+ BEGIN { # Field separator FS should include punctuation, including Unicode Block U+2000 - U+206F
+ if ( length("¡") == 1 ) # Utf-8 aware AWK
+ FS = "([] \t\n\r!\"#'\''()*+,./:;<=>?\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+";
+ else # UTF-8 Hack
+ FS = "([] \t\n\r!\"#'\''()*+,./:;<=>?\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+";
+ fi
+ }
{ for (n = 1; n <= NF; n++) {
if ( $n != "" && length($n) <= 128 ) {
words[tolower($n)]++; total++;