$(printf %s\\n "${query}" | awk '
BEGIN { # Field separator FS should include punctuation, including Unicode Block U+2000 - U+206F
if ( length("¡") == 1 ) # Utf-8 aware AWK
- FS = "([] \\t\\n\\r\"'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+";
+ FS = "([] \\t\\n\\r\"'\''()*+,./:;<=>?\\\\^`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+";
else # UTF-8 Hack
- FS = "([] \\t\\n\\r\"'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+";
+ FS = "([] \\t\\n\\r\"'\''()*+,./:;<=>?\\\\^`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+";
fi
}
{ t=0; for (n = 1; n <= NF; n++) if ($n ~ /#[[:alnum:]_]+/) tags[t++] = toupper($n);
t=0; for (n = 1; n <= NF; n++) if ($n !~ /![[:alnum:]_]+/) words[t++] = tolower($n);
for (t in tags) { sub(/^#/, "", tags[t]); printf "%s ", tags[t]; } print "";
for (t in ntags) { sub(/^!/, "", ntags[t]); printf "%s ", ntags[t]; } print "";
- for (t in words) { sub(/^[!#]/, "", words[t]); printf "%s ", words[t]; } print "";
+ for (t in words) { gsub(/[!#_ ]+/, " ", words[t]); printf "%s ", words[t]; } print "";
}
')
EOF