#!/bin/sh . "${_EXEC:-${0%/*}}/cgilite/cgilite.sh" . "${_EXEC:-${0%/*}}/cgilite/storage.sh" . "${_EXEC:-${0%/*}}/cgilite/json.sh" [ "$_DATE" ] || _DATE="$(date +%s)" _INDEX="${PATH_INFO#/}" _INDEX="${_INDEX%%/*}" _records="${_DATA}/${_INDEX}/_0_DOCS" ingest() { local J="$1" # json_get "$J" title # json_get "$J" parts.comments case $(json_get "$J" title) in *.md|*.txt|*.csv) json_get "$J" content |base64 -d ;; *.pdf) json_get "$J" content |base64 -d \ | pdftotext - ;; *.doc) json_get "$J" content |base64 -d \ | catdoc /dev/stdin ;; *.xls) json_get "$J" content |base64 -d \ | xls2csv /dev/stdin ;; *.ppt) json_get "$J" content |base64 -d \ | catppt /dev/stdin ;; *.html|*.xml|*.svg) json_get "$J" content |base64 -d \ | sed 's;<[^>]*>;;g' ;; *.docx) json_get "$J" content |base64 -d \ | unzip -qc /dev/stdin word/document.xml \ | head -c 128M | sed 's;<[^>]*>;;g' ;; *.xlsx) json_get "$J" content |base64 -d \ | unzip -qc /dev/stdin xl/sharedStrings.xml \ | head -c 128M | sed 's;<[^>]*>; ;g' ;; *.odt) json_get "$J" content |base64 -d \ | unzip -qc /dev/stdin content.xml \ | head -c 128M | sed 's;<[^>]*>;;g' ;; *.ods|*.odp) json_get "$J" content |base64 -d \ | unzip -qc /dev/stdin content.xml \ | head -c 128M | sed 's;<[^>]*>; ;g' ;; *):;; esac } if [ "$REQUEST_METHOD" = "PUT" ]; then _doc="${PATH_INFO#"/${_INDEX}/_doc"}" J="$(json_load "$(head -c "${CONTENT_LENGTH:-0}")")" ingest "$J" \ | "${_EXEC}/concordance.sh" \ "$_DATA/$_INDEX/" "$(STRING "$_doc") $_DATE" J="${J#obj:}" J="$(DB2 "$J" delete content)" J="$(DB2 "$J" set _indexdate num:"$_DATE")" if DBM "$_records" insert "$_doc" "$J"; then printf '%s: %s\r\n' "Status" "201 Created" "Location" "/${_INDEX}/_doc/$(URL "$_doc")" \ result="created" elif DBM "$_records" update "$_doc" "$J"; then printf '%s: %s\r\n' "Status" "200 OK" result="updated" else printf '%s\r\n' "Status: 500 Internal Server Error" "" exit 0 fi sed 's;$;\r;' <<-EOF X-elastic-product: Elasticsearch content-type: application/vnd.elasticsearch+json;compatible-with=8 { "_index": $(json_dump str:"${_INDEX}"), "_id": $(json_dump str:"$_doc"), "result": "$result", "_indexdate": $_DATE } EOF exit 0 elif [ "$REQUEST_METHOD" = "DELETE" ]; then _doc="${PATH_INFO#"/${_INDEX}/_doc"}" if DBM "$_records" get "$_doc"; then if DBM "$_records" delete "$_doc"; then printf '%s: %s\r\n' "Status" "200 OK" result="deleted" else printf '%s\r\n' "Status: 500 Internal Server Error" "" exit 0 fi else printf '%s: %s\r\n' "Status" "404 Not Found" result="not_found" fi sed 's;$;\r;' <<-EOF X-elastic-product: Elasticsearch content-type: application/vnd.elasticsearch+json;compatible-with=8 { "_index": $(json_dump str:"${_INDEX}"), "_id": $(json_dump str:"$_doc"), "result": "$result", "_indexdate": $_DATE } EOF exit 0 elif [ "$REQUEST_METHOD" = "POST" ]; then : elif [ "$REQUEST_METHOD" = "HEAD" ]; then accept="$(HEADER Accept)" [ ! "${accept#*"vnd.elasticsearch+json"*}" ] \ && ctype="${accept}" || ctype="application/json" sed 's;$;\r;' <<-EOF HTTP/1.1 200 OK X-elastic-product: Elasticsearch content-type: ${ctype} EOF exit 0 elif [ "$REQUEST_METHOD" = "GET" ]; then accept="$(HEADER Accept)" [ ! "${accept#*"vnd.elasticsearch+json"*}" ] \ && ctype="${accept}" || ctype="application/json" sed 's;$;\r;' <<-EOF HTTP/1.1 200 OK X-elastic-product: Elasticsearch content-type: ${ctype} EOF if [ "$PATH_INFO" = "/${_INDEX}/" ]; then sed 's;$;\r;' <<-EOF { $(json_dump str:"${_INDEX}"): { "aliases":{}, "mappings": { "properties": { "content": {"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}}, "hash":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}}, "metatags":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}}, "owner":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}}, "parts":{"properties":{"comments":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}}}}, "provider":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}}, "share_names":{"properties":{"paul":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}}}}, "source":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}}, "title":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}} } }, "settings": { "index": { "routing":{"allocation":{"include":{"_tier_preference":"data_content"}}}, "number_of_shards":"1", "provided_name": $(json_dump str:"${_INDEX}"), "creation_date": "$(stat -c %W "${_DATA}/${_INDEX}")", "number_of_replicas":"1", "uuid":"0000000000000000000000", "version":{"created":"8500010"} } } } } EOF else sed 's;$;\r;' <<-EOF { "name" : "head", "cluster_name" : "elasticsearch", "version" : { "number" : "8.12.1", "lucene_version" : "9.9.2", "minimum_wire_compatibility_version" : "7.17.0", "minimum_index_compatibility_version" : "7.0.0" }, "tagline" : "You Know, for Search" } EOF fi exit 0 else printf '%s\r\n' "Status: 500 Internal Server Error" "" exit 0 fi