]> git.plutz.net Git - rigidfind/blobdiff - index.cgi
index ingest emulating ElastiSearch
[rigidfind] / index.cgi
diff --git a/index.cgi b/index.cgi
new file mode 100755 (executable)
index 0000000..cb325ea
--- /dev/null
+++ b/index.cgi
@@ -0,0 +1,206 @@
+#!/bin/sh
+
+. "${_EXEC:-${0%/*}}/cgilite/cgilite.sh"
+. "${_EXEC:-${0%/*}}/cgilite/storage.sh"
+. "${_EXEC:-${0%/*}}/cgilite/json.sh"
+
+[ "$_DATE" ] || _DATE="$(date +%s)"
+
+_INDEX="${PATH_INFO#/}" _INDEX="${_INDEX%%/*}"
+_records="${_DATA}/${_INDEX}/_0_DOCS"
+
+ingest() {
+  local J="$1"
+
+  # json_get "$J" title
+  # json_get "$J" parts.comments
+
+  case $(json_get "$J" title) in
+    *.md|*.txt|*.csv)
+      json_get "$J" content |base64 -d
+      ;;
+    *.pdf)
+      json_get "$J" content |base64 -d \
+      | pdftotext -
+      ;;
+    *.doc)
+      json_get "$J" content |base64 -d \
+      | catdoc /dev/stdin
+      ;;
+    *.xls)
+      json_get "$J" content |base64 -d \
+      | xls2csv /dev/stdin
+      ;;
+    *.ppt)
+      json_get "$J" content |base64 -d \
+      | catppt /dev/stdin
+      ;;
+    *.html|*.xml|*.svg)
+      json_get "$J" content |base64 -d \
+      | sed 's;<[^>]*>;;g'
+      ;;
+    *.docx)
+      json_get "$J" content |base64 -d \
+      | unzip -qc /dev/stdin word/document.xml \
+      | head -c 128M | sed 's;<[^>]*>;;g'
+      ;;
+    *.xlsx)
+      json_get "$J" content |base64 -d \
+      | unzip -qc /dev/stdin xl/sharedStrings.xml \
+      | head -c 128M | sed 's;<[^>]*>; ;g'
+      ;;
+    *.odt)
+      json_get "$J" content |base64 -d \
+      | unzip -qc /dev/stdin content.xml \
+      | head -c 128M | sed 's;<[^>]*>;;g'
+      ;;
+    *.ods|*.odp)
+      json_get "$J" content |base64 -d \
+      | unzip -qc /dev/stdin content.xml \
+      | head -c 128M | sed 's;<[^>]*>; ;g'
+      ;;
+    *):;;
+  esac
+}
+
+if   [ "$REQUEST_METHOD" = "PUT" ]; then
+  _doc="${PATH_INFO#"/${_INDEX}/_doc"}"
+
+  J="$(json_load "$(head -c "${CONTENT_LENGTH:-0}")")"
+  
+  ingest "$J" \
+  | "${_EXEC}/concordance.sh" \
+    "$_DATA/$_INDEX/" "$(STRING "$_doc")       $_DATE"
+
+  J="${J#obj:}"
+  J="$(DB2 "$J" delete content)"
+  J="$(DB2 "$J" set _indexdate num:"$_DATE")"
+
+  if   DBM "$_records" insert "$_doc" "$J"; then
+    printf '%s: %s\r\n' "Status" "201 Created" "Location" "/${_INDEX}/_doc/$(URL "$_doc")" \
+    result="created"
+  elif DBM "$_records" update "$_doc" "$J"; then
+    printf '%s: %s\r\n' "Status" "200 OK"
+    result="updated"
+  else
+    printf '%s\r\n' "Status: 500 Internal Server Error" ""
+    exit 0
+  fi
+
+  sed 's;$;\r;' <<-EOF
+       X-elastic-product: Elasticsearch
+       content-type: application/vnd.elasticsearch+json;compatible-with=8
+
+       { "_index": $(json_dump str:"${_INDEX}"),
+         "_id": $(json_dump str:"$_doc"),
+         "result": "$result",
+         "_indexdate": $_DATE
+       }
+       EOF
+  exit 0
+
+elif [ "$REQUEST_METHOD" = "DELETE" ]; then
+  _doc="${PATH_INFO#"/${_INDEX}/_doc"}"
+
+  if   DBM "$_records" get "$_doc"; then
+    if   DBM "$_records" delete "$_doc"; then
+      printf '%s: %s\r\n' "Status" "200 OK"
+      result="deleted"
+    else
+      printf '%s\r\n' "Status: 500 Internal Server Error" ""
+      exit 0
+    fi
+  else
+    printf '%s: %s\r\n' "Status" "404 Not Found"
+    result="not_found"
+  fi
+
+  sed 's;$;\r;' <<-EOF
+       X-elastic-product: Elasticsearch
+       content-type: application/vnd.elasticsearch+json;compatible-with=8
+
+       { "_index": $(json_dump str:"${_INDEX}"),
+         "_id": $(json_dump str:"$_doc"),
+         "result": "$result",
+         "_indexdate": $_DATE
+       }
+       EOF
+  exit 0
+
+elif [ "$REQUEST_METHOD" = "POST" ]; then
+  :
+elif [ "$REQUEST_METHOD" = "HEAD" ]; then
+  accept="$(HEADER Accept)"
+  [ ! "${accept#*"vnd.elasticsearch+json"*}" ] \
+  && ctype="${accept}" || ctype="application/json"
+
+  sed 's;$;\r;' <<-EOF
+       HTTP/1.1 200 OK
+       X-elastic-product: Elasticsearch
+       content-type: ${ctype}
+       EOF
+  exit 0
+
+elif [ "$REQUEST_METHOD" = "GET" ]; then
+  accept="$(HEADER Accept)"
+  [ ! "${accept#*"vnd.elasticsearch+json"*}" ] \
+  && ctype="${accept}" || ctype="application/json"
+
+  sed 's;$;\r;' <<-EOF
+       HTTP/1.1 200 OK
+       X-elastic-product: Elasticsearch
+       content-type: ${ctype}
+
+       EOF
+       
+  if [ "$PATH_INFO" = "/${_INDEX}/" ]; then
+  sed 's;$;\r;' <<-EOF
+       { $(json_dump str:"${_INDEX}"): {
+           "aliases":{},
+           "mappings": {
+             "properties": {
+               "content": {"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
+               "hash":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
+               "metatags":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
+               "owner":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
+               "parts":{"properties":{"comments":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}}}},
+               "provider":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
+               "share_names":{"properties":{"paul":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}}}},
+               "source":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
+               "title":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}}
+             }
+           },
+           "settings": {
+             "index": {
+               "routing":{"allocation":{"include":{"_tier_preference":"data_content"}}},
+               "number_of_shards":"1",
+               "provided_name": $(json_dump str:"${_INDEX}"),
+               "creation_date": "$(stat -c %W "${_DATA}/${_INDEX}")",
+               "number_of_replicas":"1",
+               "uuid":"0000000000000000000000",
+               "version":{"created":"8500010"}
+             }
+           }
+         }
+       }
+       EOF
+  else
+    sed 's;$;\r;' <<-EOF
+       { "name" : "head",
+         "cluster_name" : "elasticsearch",
+         "version" : {
+           "number" : "8.12.1",
+           "lucene_version" : "9.9.2",
+           "minimum_wire_compatibility_version" : "7.17.0",
+           "minimum_index_compatibility_version" : "7.0.0"
+         },
+         "tagline" : "You Know, for Search"
+       }
+       EOF
+  fi
+  exit 0
+
+else
+  printf '%s\r\n' "Status: 500 Internal Server Error" ""
+  exit 0
+fi