From 9b45daaa31a5ca1007a705c40678cad7389ab9c9 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Paul=20H=C3=A4nsch?= Date: Tue, 15 Oct 2024 00:21:24 +0200 Subject: [PATCH] bugfix: search engine wordsplitting in busybox awk --- parsers/40_indexer.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parsers/40_indexer.sh b/parsers/40_indexer.sh index 56a8990..b16449b 100755 --- a/parsers/40_indexer.sh +++ b/parsers/40_indexer.sh @@ -39,9 +39,9 @@ done \ | awk ' BEGIN { # Field separator FS should include punctuation, including Unicode Block U+2000 - U+206F if ( length("¡") == 1 ) # Utf-8 aware AWK - FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+"; + FS = "([] \t\n\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+"; else # UTF-8 Hack - FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+"; + FS = "([] \t\n\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+"; fi } { for (n = 1; n <= NF; n++) { -- 2.39.2