3 read _DATE _date_n <<-EOF
7 . "${_EXEC:-${0%/*}}/cgilite/cgilite.sh"
8 . "${_EXEC:-${0%/*}}/cgilite/storage.sh"
9 . "${_EXEC:-${0%/*}}/cgilite/json.sh"
11 debug "$REQUEST_METHOD $REQUEST_URI $SERVER_PROTOCOL $_DATE"
14 local J="$1" ztmp="${TMP:-/tmp}/zipfile_$$.zip"
17 # json_get "$J" parts.comments
19 case $(json_get "$J" title) in
21 printf %s "$content" |base64 -d
24 printf %s "$content" |base64 -d \
28 printf %s "$content" |base64 -d \
32 printf %s "$content" |base64 -d \
36 printf %s "$content" |base64 -d \
40 printf %s "$content" |base64 -d \
44 printf %s "$content" |base64 -d >"$ztmp"
45 unzip -qc "$ztmp" word/document.xml \
46 | head -c 128M | sed 's;<[^>]*>;;g'
50 printf %s "$content" |base64 -d >"$ztmp"
51 unzip -qc "$ztmp" xl/sharedStrings.xml \
52 | head -c 128M | sed 's;<[^>]*>; ;g'
56 printf %s "$content" |base64 -d >"$ztmp"
57 unzip -qc "$ztmp" content.xml \
58 | head -c 128M | sed 's;<[^>]*>;;g'
62 printf %s "$content" |base64 -d >"$ztmp"
63 unzip -qc "$ztmp" content.xml \
64 | head -c 128M | sed 's;<[^>]*>; ;g'
72 local index="$1" words w num total freq doc date J
75 words="$(printf %s\\n "$words" | awk '
76 BEGIN { # Field separator FS should include punctuation, including Unicode Block U+2000 - U+206F
77 if ( length("ยก") == 1 ) # Utf-8 aware AWK
78 FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+";
80 FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+";
83 { for (n = 1; n <= NF; n++) printf "%s ", tolower($n); }
87 [ ! -f "${index}/$w" ] && continue
89 while read num total freq doc date; do
90 printf '%s-%i %f\n' "${doc}" "${date}" "$freq"
94 { cnt[$1]++; weight[$1] = weight[$1] ? weight[$1] + $2 : $2; }
95 END { m = 0; for (d in cnt) m = ( m < cnt[d] ) ? cnt[d] : m;
96 for (d in cnt) if ( cnt[d] == m ) printf "%f %s\n", weight[d], d;
100 | while read freq doc; do
101 date="${doc##*-}" doc="$(UNSTRING "${doc%-*}")"
103 if J="$(DBM "$_records" get "$doc")"; then
104 [ "$date" -eq "$(json_get obj:"$J" _indexdate)" ] \
105 && printf '%f %s %s\n' \
106 "$freq" "$(STRING "$doc")" "$(STRING "$J")"
111 _INDEX="${PATH_INFO#/}" _INDEX="${_INDEX%%/*}"
112 _records="${_DATA}/${_INDEX}/_0_DOCS"
114 if [ "${INDEX}" -a ! -d "${_DATA}/${_INDEX}" ]; then
115 printf '%s\r\n' "Status: 404 Not Found" ""
117 elif authlist="$(DBM "${_DATA}/auth.db" get "${_INDEX}" )"; then
118 auth="$(HEADER Authorization)" auth="${auth#Basic }"
119 for a in $authlist deny; do
120 [ "$auth" = "$a" ] && break
122 if [ "$a" = "deny" -o ! "$auth" ]; then
123 printf '%s\r\n' "Status: 401 Unauthorized" \
124 "WWW-Authenticate: Basic realm=\"Rigid Find\"" "" \
128 unset a auth authlist
131 if [ "$REQUEST_METHOD" = "PUT" ]; then
132 _doc="${PATH_INFO#"/${_INDEX}/_doc"}"
134 J="$(head -c "${CONTENT_LENGTH:-0}")"
135 # Don't use json parser to get content field
136 # Content can be very large and the json parser is slow
137 content="$(printf %s\\n "$J" |sed -E '
139 s;^.*,[ \t\r\n]*"content"[ \t\r\n]*:[ \t\r\n]*";;
143 J="$(printf %s\\n "$J" |sed -E '
145 s;,[ \t\r\n]*"content"[ \t\r\n]*:[ \t\r\n]*"[^"]*";;
147 J="$(json_load "${J}")"
149 debug "Content: ${#content} bytes"
150 debug "$(json_dump "$J")"
152 if [ "${#content}" -gt 0 ]; then
153 ingest "$J" "$content"\
154 | "${_EXEC}/concordance.sh" \
155 "$_DATA/$_INDEX/" "$(STRING "$_doc") $_DATE"
159 J="$(DB2 "$J" set _indexdate num:"$_DATE")"
161 if [ "${#content}" -eq 0 ]; then
162 printf '%s: %s\r\n' "Status" "200 OK"
164 elif DBM "$_records" insert "$_doc" "$J"; then
165 printf '%s: %s\r\n' "Status" "201 Created" "Location" "/${_INDEX}/_doc/$(URL "$_doc")" \
167 elif DBM "$_records" update "$_doc" "$J"; then
168 printf '%s: %s\r\n' "Status" "200 OK"
171 printf '%s\r\n' "Status: 500 Internal Server Error" ""
176 X-elastic-product: Elasticsearch
\r
177 content-type: application/vnd.elasticsearch+json;compatible-with=8
\r
179 { "_index": $(json_dump str:"${_INDEX}"),
180 "_id": $(json_dump str:"$_doc"),
187 elif [ "$REQUEST_METHOD" = "DELETE" ]; then
188 _doc="${PATH_INFO#"/${_INDEX}/_doc"}"
190 if DBM "$_records" get "$_doc"; then
191 if DBM "$_records" delete "$_doc"; then
192 printf '%s: %s\r\n' "Status" "200 OK"
195 printf '%s\r\n' "Status: 500 Internal Server Error" ""
199 printf '%s: %s\r\n' "Status" "404 Not Found"
204 X-elastic-product: Elasticsearch
\r
205 content-type: application/vnd.elasticsearch+json;compatible-with=8
\r
207 { "_index": $(json_dump str:"${_INDEX}"),
208 "_id": $(json_dump str:"$_doc"),
215 elif [ "$REQUEST_METHOD" = "POST" ]; then
216 J="$(json_load "$(head -c "${CONTENT_LENGTH:-0}")")"
217 J="$(json_get "$J" query.bool.must.bool.should)"
220 for j in $(DB2 "$J" iterate @); do
221 json_get "$(UNSTRING "$j")" match_phrase_prefix.content
222 done 2>/dev/null |tr \\n ' '
224 debug "Search words: $words"
227 search "${_DATA}/${_INDEX}" $words \
228 | while read -r score id source; do
229 debug "Hit: $id $score"
230 S="$(DB2 "" set _index str:"${_INDEX}")"
231 S="$(DB2 "$S" set _id str:"$(UNSTRING "${id#/}")")"
232 S="$(DB2 "$S" set _score num:"$score")"
233 S="$(DB2 "$S" set _source obj:"$(UNSTRING "$source")")"
234 printf 'obj:%s\t' "$(STRING "$S")"
237 results="${results% }"
239 t="$(( ${_DATE}${_date_n} - $(date +%s%N) ))"
243 X-elastic-product: Elasticsearch
\r
244 Content-Type: application/vnd.elasticsearch+json;compatible-with=8
\r
246 { "took":$((t / 1000)),
249 "total":{"value": $(DB2 "$results" count @) ,"relation":"eq"},
250 "max_score": $(json_get "arr:$results" '[0]._score' 2>/dev/null || printf 0),
251 "hits": $(json_dump "arr:$results")
256 elif [ "$REQUEST_METHOD" = "HEAD" ]; then
257 accept="$(HEADER Accept)"
258 [ ! "${accept#*"vnd.elasticsearch+json"*}" ] \
259 && ctype="${accept}" || ctype="application/json"
263 X-elastic-product: Elasticsearch
\r
264 content-type: ${ctype}
\r
270 # elif [ "$REQUEST_METHOD" = "GET" ]; then
272 Status: 501 Not Implemented
\r
273 X-elastic-product: Elasticsearch
\r
274 content-type: text/plain
\r
276 Use the Nextcloud Elastic Search Plugin to use this service.