words="$( GET q | awk '
BEGIN { # Field separator FS should include punctuation, including Unicode Block U+2000 - U+206F
if ( length("¡") == 1 ) # Utf-8 aware AWK
- FS = "([] \t\n\r!\"#'\''()*+,./:;<=>?\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+";
+ FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+";
else # UTF-8 Hack
- FS = "([] \t\n\r!\"#'\''()*+,./:;<=>?\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+";
+ FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+";
fi
}
{ for (n = 1; n <= NF; n++) printf "%s ", tolower($n); }
| awk '
BEGIN { # Field separator FS should include punctuation, including Unicode Block U+2000 - U+206F
if ( length("¡") == 1 ) # Utf-8 aware AWK
- FS = "([] \t\n\r!\"#'\''()*+,./:;<=>?\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+";
+ FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+";
else # UTF-8 Hack
- FS = "([] \t\n\r!\"#'\''()*+,./:;<=>?\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+";
+ FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+";
fi
}
{ for (n = 1; n <= NF; n++) {
} } }
END { for (w in words) printf "%i %i %f %s\n", words[w], total, words[w] / total, w; }
' \
-| while read num total freq word; do
+| while read -r num total freq word; do
[ "$word" ] || continue
F="$I/$word"
L="$(STRING "$DOC")"