]> git.plutz.net Git - rigidfind/blob - index.cgi
index ingest emulating ElastiSearch
[rigidfind] / index.cgi
1 #!/bin/sh
2
3 . "${_EXEC:-${0%/*}}/cgilite/cgilite.sh"
4 . "${_EXEC:-${0%/*}}/cgilite/storage.sh"
5 . "${_EXEC:-${0%/*}}/cgilite/json.sh"
6
7 [ "$_DATE" ] || _DATE="$(date +%s)"
8
9 _INDEX="${PATH_INFO#/}" _INDEX="${_INDEX%%/*}"
10 _records="${_DATA}/${_INDEX}/_0_DOCS"
11
12 ingest() {
13   local J="$1"
14
15   # json_get "$J" title
16   # json_get "$J" parts.comments
17
18   case $(json_get "$J" title) in
19     *.md|*.txt|*.csv)
20       json_get "$J" content |base64 -d
21       ;;
22     *.pdf)
23       json_get "$J" content |base64 -d \
24       | pdftotext -
25       ;;
26     *.doc)
27       json_get "$J" content |base64 -d \
28       | catdoc /dev/stdin
29       ;;
30     *.xls)
31       json_get "$J" content |base64 -d \
32       | xls2csv /dev/stdin
33       ;;
34     *.ppt)
35       json_get "$J" content |base64 -d \
36       | catppt /dev/stdin
37       ;;
38     *.html|*.xml|*.svg)
39       json_get "$J" content |base64 -d \
40       | sed 's;<[^>]*>;;g'
41       ;;
42     *.docx)
43       json_get "$J" content |base64 -d \
44       | unzip -qc /dev/stdin word/document.xml \
45       | head -c 128M | sed 's;<[^>]*>;;g'
46       ;;
47     *.xlsx)
48       json_get "$J" content |base64 -d \
49       | unzip -qc /dev/stdin xl/sharedStrings.xml \
50       | head -c 128M | sed 's;<[^>]*>; ;g'
51       ;;
52     *.odt)
53       json_get "$J" content |base64 -d \
54       | unzip -qc /dev/stdin content.xml \
55       | head -c 128M | sed 's;<[^>]*>;;g'
56       ;;
57     *.ods|*.odp)
58       json_get "$J" content |base64 -d \
59       | unzip -qc /dev/stdin content.xml \
60       | head -c 128M | sed 's;<[^>]*>; ;g'
61       ;;
62     *):;;
63   esac
64 }
65
66 if   [ "$REQUEST_METHOD" = "PUT" ]; then
67   _doc="${PATH_INFO#"/${_INDEX}/_doc"}"
68
69   J="$(json_load "$(head -c "${CONTENT_LENGTH:-0}")")"
70   
71   ingest "$J" \
72   | "${_EXEC}/concordance.sh" \
73     "$_DATA/$_INDEX/" "$(STRING "$_doc")        $_DATE"
74
75   J="${J#obj:}"
76   J="$(DB2 "$J" delete content)"
77   J="$(DB2 "$J" set _indexdate num:"$_DATE")"
78
79   if   DBM "$_records" insert "$_doc" "$J"; then
80     printf '%s: %s\r\n' "Status" "201 Created" "Location" "/${_INDEX}/_doc/$(URL "$_doc")" \
81     result="created"
82   elif DBM "$_records" update "$_doc" "$J"; then
83     printf '%s: %s\r\n' "Status" "200 OK"
84     result="updated"
85   else
86     printf '%s\r\n' "Status: 500 Internal Server Error" ""
87     exit 0
88   fi
89
90   sed 's;$;\r;' <<-EOF
91         X-elastic-product: Elasticsearch
92         content-type: application/vnd.elasticsearch+json;compatible-with=8
93
94         { "_index": $(json_dump str:"${_INDEX}"),
95           "_id": $(json_dump str:"$_doc"),
96           "result": "$result",
97           "_indexdate": $_DATE
98         }
99         EOF
100   exit 0
101
102 elif [ "$REQUEST_METHOD" = "DELETE" ]; then
103   _doc="${PATH_INFO#"/${_INDEX}/_doc"}"
104
105   if   DBM "$_records" get "$_doc"; then
106     if   DBM "$_records" delete "$_doc"; then
107       printf '%s: %s\r\n' "Status" "200 OK"
108       result="deleted"
109     else
110       printf '%s\r\n' "Status: 500 Internal Server Error" ""
111       exit 0
112     fi
113   else
114     printf '%s: %s\r\n' "Status" "404 Not Found"
115     result="not_found"
116   fi
117
118   sed 's;$;\r;' <<-EOF
119         X-elastic-product: Elasticsearch
120         content-type: application/vnd.elasticsearch+json;compatible-with=8
121
122         { "_index": $(json_dump str:"${_INDEX}"),
123           "_id": $(json_dump str:"$_doc"),
124           "result": "$result",
125           "_indexdate": $_DATE
126         }
127         EOF
128   exit 0
129
130 elif [ "$REQUEST_METHOD" = "POST" ]; then
131   :
132 elif [ "$REQUEST_METHOD" = "HEAD" ]; then
133   accept="$(HEADER Accept)"
134   [ ! "${accept#*"vnd.elasticsearch+json"*}" ] \
135   && ctype="${accept}" || ctype="application/json"
136
137   sed 's;$;\r;' <<-EOF
138         HTTP/1.1 200 OK
139         X-elastic-product: Elasticsearch
140         content-type: ${ctype}
141         EOF
142   exit 0
143
144 elif [ "$REQUEST_METHOD" = "GET" ]; then
145   accept="$(HEADER Accept)"
146   [ ! "${accept#*"vnd.elasticsearch+json"*}" ] \
147   && ctype="${accept}" || ctype="application/json"
148
149   sed 's;$;\r;' <<-EOF
150         HTTP/1.1 200 OK
151         X-elastic-product: Elasticsearch
152         content-type: ${ctype}
153
154         EOF
155         
156   if [ "$PATH_INFO" = "/${_INDEX}/" ]; then
157   sed 's;$;\r;' <<-EOF
158         { $(json_dump str:"${_INDEX}"): {
159             "aliases":{},
160             "mappings": {
161               "properties": {
162                 "content": {"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
163                 "hash":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
164                 "metatags":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
165                 "owner":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
166                 "parts":{"properties":{"comments":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}}}},
167                 "provider":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
168                 "share_names":{"properties":{"paul":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}}}},
169                 "source":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
170                 "title":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}}
171               }
172             },
173             "settings": {
174               "index": {
175                 "routing":{"allocation":{"include":{"_tier_preference":"data_content"}}},
176                 "number_of_shards":"1",
177                 "provided_name": $(json_dump str:"${_INDEX}"),
178                 "creation_date": "$(stat -c %W "${_DATA}/${_INDEX}")",
179                 "number_of_replicas":"1",
180                 "uuid":"0000000000000000000000",
181                 "version":{"created":"8500010"}
182               }
183             }
184           }
185         }
186         EOF
187   else
188     sed 's;$;\r;' <<-EOF
189         { "name" : "head",
190           "cluster_name" : "elasticsearch",
191           "version" : {
192             "number" : "8.12.1",
193             "lucene_version" : "9.9.2",
194             "minimum_wire_compatibility_version" : "7.17.0",
195             "minimum_index_compatibility_version" : "7.0.0"
196           },
197           "tagline" : "You Know, for Search"
198         }
199         EOF
200   fi
201   exit 0
202
203 else
204   printf '%s\r\n' "Status: 500 Internal Server Error" ""
205   exit 0
206 fi