3 . "${_EXEC:-${0%/*}}/cgilite/cgilite.sh"
4 . "${_EXEC:-${0%/*}}/cgilite/storage.sh"
5 . "${_EXEC:-${0%/*}}/cgilite/json.sh"
7 [ "$_DATE" ] || _DATE="$(date +%s)"
9 debug "$REQUEST_METHOD $REQUEST_URI $SERVER_PROTOCOL $_DATE"
15 # json_get "$J" parts.comments
17 case $(json_get "$J" title) in
19 printf %s "$content" |base64 -d
22 printf %s "$content" |base64 -d \
26 printf %s "$content" |base64 -d \
30 printf %s "$content" |base64 -d \
34 printf %s "$content" |base64 -d \
38 printf %s "$content" |base64 -d \
42 printf %s "$content" |base64 -d \
43 | unzip -qc /dev/stdin word/document.xml \
44 | head -c 128M | sed 's;<[^>]*>;;g'
47 printf %s "$content" |base64 -d \
48 | unzip -qc /dev/stdin xl/sharedStrings.xml \
49 | head -c 128M | sed 's;<[^>]*>; ;g'
52 printf %s "$content" |base64 -d \
53 | unzip -qc /dev/stdin content.xml \
54 | head -c 128M | sed 's;<[^>]*>;;g'
57 printf %s "$content" |base64 -d \
58 | unzip -qc /dev/stdin content.xml \
59 | head -c 128M | sed 's;<[^>]*>; ;g'
66 local index="$1" words w num total freq doc date J
69 words="$(printf %s\\n "$words" | awk '
70 BEGIN { # Field separator FS should include punctuation, including Unicode Block U+2000 - U+206F
71 if ( length("ยก") == 1 ) # Utf-8 aware AWK
72 FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '[\342\200\200-\342\201\257]')"')+";
74 FS = "([] \\t\\n\\r!\"#'\''()*+,./:;<=>?\\\\^_`{|}~[-]|%[0-9A-Fa-f]{2}|'"$(printf '\342\200[\200-\277]|\342\201[\201-\257]')"')+";
77 { for (n = 1; n <= NF; n++) printf "%s ", tolower($n); }
81 [ ! -f "${index}/$w" ] && continue
83 while read num total freq doc date; do
84 printf '%s-%i %f\n' "${doc}" "${date}" "$freq"
88 { cnt[$1]++; weight[$1] = weight[$1] ? weight[$1] + $2 : $2; }
89 END { m = 0; for (d in cnt) m = ( m < cnt[d] ) ? cnt[d] : m;
90 for (d in cnt) if ( cnt[d] == m ) printf "%f %s\n", weight[d], d;
94 | while read freq doc; do
95 date="${doc##*-}" doc="$(UNSTRING "${doc%-*}")"
97 if J="$(DBM "$_records" get "$doc")"; then
98 [ "$date" -eq "$(json_get obj:"$J" _indexdate)" ] \
99 && printf '%f %s %s\n' \
100 "$freq" "$(STRING "$doc")" "$(STRING "$J")"
105 _INDEX="${PATH_INFO#/}" _INDEX="${_INDEX%%/*}"
106 _records="${_DATA}/${_INDEX}/_0_DOCS"
108 if [ "$REQUEST_METHOD" = "PUT" ]; then
109 _doc="${PATH_INFO#"/${_INDEX}/_doc"}"
111 J="$(head -c "${CONTENT_LENGTH:-0}")"
112 # Don't use json parser to get content field
113 # Content can be very large and the json parser is slow
114 content="$(printf %s\\n "$J" |sed -E '
116 s;^.*,[ \t\r\n]*"content"[ \t\r\n]*:[ \t\r\n]*";;
120 J="$(printf %s\\n "$J" |sed -E '
122 s;,[ \t\r\n]*"content"[ \t\r\n]*:[ \t\r\n]*"[^"]*";;
124 J="$(json_load "${J}")"
126 debug "Content: ${#content} bytes"
127 debug "$(json_dump "$J")"
129 if [ "${#content}" -gt 0 ]; then
130 ingest "$J" "$content"\
131 | "${_EXEC}/concordance.sh" \
132 "$_DATA/$_INDEX/" "$(STRING "$_doc") $_DATE"
136 J="$(DB2 "$J" set _indexdate num:"$_DATE")"
138 if [ "${#content}" -eq 0 ]; then
139 printf '%s: %s\r\n' "Status" "200 OK"
141 elif DBM "$_records" insert "$_doc" "$J"; then
142 printf '%s: %s\r\n' "Status" "201 Created" "Location" "/${_INDEX}/_doc/$(URL "$_doc")" \
144 elif DBM "$_records" update "$_doc" "$J"; then
145 printf '%s: %s\r\n' "Status" "200 OK"
148 printf '%s\r\n' "Status: 500 Internal Server Error" ""
153 X-elastic-product: Elasticsearch
\r
154 content-type: application/vnd.elasticsearch+json;compatible-with=8
\r
156 { "_index": $(json_dump str:"${_INDEX}"),
157 "_id": $(json_dump str:"$_doc"),
164 elif [ "$REQUEST_METHOD" = "DELETE" ]; then
165 _doc="${PATH_INFO#"/${_INDEX}/_doc"}"
167 if DBM "$_records" get "$_doc"; then
168 if DBM "$_records" delete "$_doc"; then
169 printf '%s: %s\r\n' "Status" "200 OK"
172 printf '%s\r\n' "Status: 500 Internal Server Error" ""
176 printf '%s: %s\r\n' "Status" "404 Not Found"
181 X-elastic-product: Elasticsearch
\r
182 content-type: application/vnd.elasticsearch+json;compatible-with=8
\r
184 { "_index": $(json_dump str:"${_INDEX}"),
185 "_id": $(json_dump str:"$_doc"),
192 elif [ "$REQUEST_METHOD" = "POST" ]; then
193 J="$(json_load "$(head -c "${CONTENT_LENGTH:-0}")")"
194 J="$(json_get "$J" query.bool.must.bool.should)"
197 for j in $(DB2 "$J" iterate @); do
198 json_get "$(UNSTRING "$j")" match_phrase_prefix.content
203 search "${_DATA}/${_INDEX}" $words \
204 | while read -r score id source; do
205 S="$(DB2 "" set _index str:"${_INDEX}")"
206 S="$(DB2 "$S" set _id str:"$(UNSTRING "${id#/}")")"
207 S="$(DB2 "$S" set _score num:"$score")"
208 S="$(DB2 "$S" set _source obj:"$(UNSTRING "$source")")"
209 printf 'obj:%s\t' "$(STRING "$S")"
212 results="${results% }"
216 X-elastic-product: Elasticsearch
\r
217 Content-Type: application/vnd.elasticsearch+json;compatible-with=8
\r
221 "_shards":{"total":1,"successful":1,"skipped":0,"failed":0},
223 "total":{"value": $(DB2 "$results" count @) ,"relation":"eq"},
224 "max_score": $(json_get "arr:$results" '[0]._score' 2>&- || printf 0),
225 "hits": $(json_dump "arr:$results")
230 elif [ "$REQUEST_METHOD" = "HEAD" ]; then
231 accept="$(HEADER Accept)"
232 [ ! "${accept#*"vnd.elasticsearch+json"*}" ] \
233 && ctype="${accept}" || ctype="application/json"
237 X-elastic-product: Elasticsearch
\r
238 content-type: ${ctype}
\r
243 elif [ "$REQUEST_METHOD" = "GET" ]; then
244 accept="$(HEADER Accept)"
245 [ ! "${accept#*"vnd.elasticsearch+json"*}" ] \
246 && ctype="${accept}" || ctype="application/json"
250 X-elastic-product: Elasticsearch
\r
251 content-type: ${ctype}
\r
255 if [ "$PATH_INFO" = "/${_INDEX}/" ]; then
257 { $(json_dump str:"${_INDEX}"): {
261 "content": {"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
262 "hash":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
263 "metatags":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
264 "owner":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
265 "parts":{"properties":{"comments":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}}}},
266 "provider":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
267 "share_names":{"properties":{"paul":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}}}},
268 "source":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
269 "title":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}}
274 "routing":{"allocation":{"include":{"_tier_preference":"data_content"}}},
275 "number_of_shards":"1",
276 "provided_name": $(json_dump str:"${_INDEX}"),
277 "creation_date": "$(stat -c %W "${_DATA}/${_INDEX}")",
278 "number_of_replicas":"1",
279 "uuid":"0000000000000000000000",
280 "version":{"created":"8500010"}
289 "cluster_name" : "elasticsearch",
292 "lucene_version" : "9.9.2",
293 "minimum_wire_compatibility_version" : "7.17.0",
294 "minimum_index_compatibility_version" : "7.0.0"
296 "tagline" : "You Know, for Search"
303 printf '%s\r\n' "Status: 500 Internal Server Error" ""