portable sed usage
[confetti] / pdiread.sh
index cbbc4e5..08fbaec 100755 (executable)
@@ -26,56 +26,91 @@ include_pdi="$0"
 BR='
 '
 
+unescape() {
+  local unescape='s;(^(\\\\)*|[^\\](\\\\)*)\\n;\1\n;g; s;\\(.);\1;g'
+  if [ $# -eq 0 ]; then
+    sed -E "$unescape"
+  else
+    printf %s "$*" \
+    | sed -E "$unescape"
+  fi
+}
+
 pdi_load() {
-  sed -r ':X;N;$!bX; s;\r\n[ \t];;g; s;\r\n;\n;g;' "$1" \
-  | sed -r '
+  # normalise PDI file for processing with pdi_* functions
+  # functions in this library can only be applied to normalised data
+  # Usage example:
+  # data="$(pdi_load file.vcf)"
+
+  sed -srn '
+    # === Read entire file into buffer ===
+    :X $bY; N; bX; :Y s;^.*$;\n&\n;;
+
+    # === Join continuing lines, strip trailing CRs ===
+    s;\r*\n[ \t];;g;
+    s;\r*\n;\n;g;
+
     # === turn property names to upper case, strip group names ===
-    h; s;^([^;:]+);;;
-    x; s;^([^;:\.]+\.)?([^;:]+).*$;\2;;
-    y;abcdefghijklmnopqrstuvwxyz;ABCDEFGHIJKLMNOPQRSTUVWXYZ;
-    G; s;\n;;;
+    s;\n([^;:\.\n]+\.)([^;:\n]+);\n\2;g;
+    :upcase
+    s;(\n[^;:]*)a;\1A;g; s;(\n[^;:]*)b;\1B;g; s;(\n[^;:]*)c;\1C;g; s;(\n[^;:]*)d;\1D;g; s;(\n[^;:]*)e;\1E;g;
+    s;(\n[^;:]*)f;\1F;g; s;(\n[^;:]*)g;\1G;g; s;(\n[^;:]*)h;\1H;g; s;(\n[^;:]*)i;\1I;g; s;(\n[^;:]*)j;\1J;g;
+    s;(\n[^;:]*)k;\1K;g; s;(\n[^;:]*)l;\1L;g; s;(\n[^;:]*)m;\1M;g; s;(\n[^;:]*)n;\1N;g; s;(\n[^;:]*)o;\1O;g;
+    s;(\n[^;:]*)p;\1P;g; s;(\n[^;:]*)q;\1Q;g; s;(\n[^;:]*)r;\1R;g; s;(\n[^;:]*)s;\1S;g; s;(\n[^;:]*)t;\1T;g;
+    s;(\n[^;:]*)u;\1U;g; s;(\n[^;:]*)v;\1V;g; s;(\n[^;:]*)w;\1W;g; s;(\n[^;:]*)x;\1X;g; s;(\n[^;:]*)y;\1Y;g;
+    s;(\n[^;:]*)z;\1Z;g;
+    t upcase;
+
+    # === Insert empty attribute fields where no attributes are present ===
+    s;\n([^;:]+):;\n\1\;:;g;
+
+    # === Unscramble aggregated fields ===
+    :disag
+    s;\n([^:\n]+:)(([^\n]*[^\])?(\\\\)*),;\n\1\2\n\1;;
+    t disag;
 
-    # === strip trailing CR (but keep CRs in property value) ===
-    # s;\r$;;;  # already done in in previous filter
+    # === Insert FN when only N is present ===
+    /\nFN[;:]/!{
+      s,\nN(;[^:]*)?:([^;\n]*);([^;\n]*);([^;\n]*);([^;\n]*);([^;\n]*);?\n,&FN;:\5 \3 \4 \2 \6\n,;
+      :despace
+      s,(\nFN;:[^\n]*)  ([^\n]*\n),\1 \2,;
+      s,(\nFN;:) ([^\n]*\n),\1\2,;
+      s,(\nFN;:[^\n]*) (\n),\1\2,;
+      t despace;
+    }
+    /\nFN[;:]/!{ s,\n(N[;:][^\n]*)\n,&F\1\n,; }  # Fallback
 
     # === Normalise various known vendor properties ===
-                s;^X-MS-CARDPICTURE(\;|:);PHOTO\1;;
-                        s;^X-GENDER(\;|:);GENDER\1;;
-                   s;^X-ANNIVERSARY(\;|:);ANNIVERSARY\1;;
-         s;^X-EVOLUTION-ANNIVERSARY(\;|:);ANNIVERSARY\1;;
-    s;^X-KADDRESSBOOK-X-ANNIVERSARY(\;|:);ANNIVERSARY\1;;
-            s;^X-EVOLUTION-BLOG-URL(\;|:);URL\1;;
-                           s;^AGENT(\;|:);RELATED\;VALUE=text\;TYPE=agent\1;;
-                     s;^X-ASSISTANT(\;|:);RELATED\;VALUE=text\;TYPE=assistant\1;;
-           s;^X-EVOLUTION-ASSISTANT(\;|:);RELATED\;VALUE=text\;TYPE=assistant\1;;
- s;^X-KADDRESSBOOK-X-ASSISTANTSNAME(\;|:);RELATED\;VALUE=text\;TYPE=assistant\1;;
-                       s;^X-MANAGER(\;|:);RELATED\;VALUE=text\;TYPE=manager\1;;
-             s;^X-EVOLUTION-MANAGER(\;|:);RELATED\;VALUE=text\;TYPE=manager\1;;
-   s;^X-KADDRESSBOOK-X-MANAGERSNAME(\;|:);RELATED\;VALUE=text\;TYPE=manager\1;;
-                        s;^X-SPOUSE(\;|:);RELATED\;VALUE=text\;TYPE=spouse\1;;
-              s;^X-EVOLUTION-SPOUSE(\;|:);RELATED\;VALUE=text\;TYPE=spouse\1;;
-     s;^X-KADDRESSBOOK-X-SPOUSENAME(\;|:);RELATED\;VALUE=text\;TYPE=spouse\1;;
+                s;\nX-MS-CARDPICTURE(\;|:);\nPHOTO\1;g;
+                        s;\nX-GENDER(\;|:);\nGENDER\1;g;
+                   s;\nX-ANNIVERSARY(\;|:);\nANNIVERSARY\1;g;
+         s;\nX-EVOLUTION-ANNIVERSARY(\;|:);\nANNIVERSARY\1;g;
+    s;\nX-KADDRESSBOOK-X-ANNIVERSARY(\;|:);\nANNIVERSARY\1;g;
+            s;\nX-EVOLUTION-BLOG-URL(\;|:);\nURL\1;g;
+                           s;\nAGENT(\;|:);\nRELATED\;VALUE=text\;TYPE=agent\1;g;
+                     s;\nX-ASSISTANT(\;|:);\nRELATED\;VALUE=text\;TYPE=assistant\1;g;
+           s;\nX-EVOLUTION-ASSISTANT(\;|:);\nRELATED\;VALUE=text\;TYPE=assistant\1;g;
+ s;\nX-KADDRESSBOOK-X-ASSISTANTSNAME(\;|:);\nRELATED\;VALUE=text\;TYPE=assistant\1;g;
+                       s;\nX-MANAGER(\;|:);\nRELATED\;VALUE=text\;TYPE=manager\1;g;
+             s;\nX-EVOLUTION-MANAGER(\;|:);\nRELATED\;VALUE=text\;TYPE=manager\1;g;
+   s;\nX-KADDRESSBOOK-X-MANAGERSNAME(\;|:);\nRELATED\;VALUE=text\;TYPE=manager\1;g;
+                        s;\nX-SPOUSE(\;|:);\nRELATED\;VALUE=text\;TYPE=spouse\1;g;
+              s;\nX-EVOLUTION-SPOUSE(\;|:);\nRELATED\;VALUE=text\;TYPE=spouse\1;g;
+     s;\nX-KADDRESSBOOK-X-SPOUSENAME(\;|:);\nRELATED\;VALUE=text\;TYPE=spouse\1;g;
 
     # === Normalise obsolete vendor IM properties ===
-            s;^X-AIM((\;[A-Za-z0-9-]+|\;[A-Za-z0-9-]+=([^;,:"]+|"[^"]+")(,[^;,:"]+|,"[^"]+")*)*):;IMPP\1:aim:;;
-            s;^X-ICQ((\;[A-Za-z0-9-]+|\;[A-Za-z0-9-]+=([^;,:"]+|"[^"]+")(,[^;,:"]+|,"[^"]+")*)*):;IMPP\1:aim:;;
-    s;^X-GOOGLE-TALK((\;[A-Za-z0-9-]+|\;[A-Za-z0-9-]+=([^;,:"]+|"[^"]+")(,[^;,:"]+|,"[^"]+")*)*):;IMPP\1:xmpp:;;
-         s;^X-JABBER((\;[A-Za-z0-9-]+|\;[A-Za-z0-9-]+=([^;,:"]+|"[^"]+")(,[^;,:"]+|,"[^"]+")*)*):;IMPP\1:xmpp:;;
-            s;^X-MSN((\;[A-Za-z0-9-]+|\;[A-Za-z0-9-]+=([^;,:"]+|"[^"]+")(,[^;,:"]+|,"[^"]+")*)*):;IMPP\1:msn:;;
-          s;^X-YAHOO((\;[A-Za-z0-9-]+|\;[A-Za-z0-9-]+=([^;,:"]+|"[^"]+")(,[^;,:"]+|,"[^"]+")*)*):;IMPP\1:ymsgr:;;
-            s;^X-SIP((\;[A-Za-z0-9-]+|\;[A-Za-z0-9-]+=([^;,:"]+|"[^"]+")(,[^;,:"]+|,"[^"]+")*)*):(sip:)?;IMPP\1:sip:;;
+            s;\nX-AIM((\;[A-Za-z0-9-]+|\;[A-Za-z0-9-]+=([^;,:"]+|"[^"]+")(,[^;,:"]+|,"[^"]+")*)*):;\nIMPP\1:aim:;g;
+            s;\nX-ICQ((\;[A-Za-z0-9-]+|\;[A-Za-z0-9-]+=([^;,:"]+|"[^"]+")(,[^;,:"]+|,"[^"]+")*)*):;\nIMPP\1:aim:;g;
+    s;\nX-GOOGLE-TALK((\;[A-Za-z0-9-]+|\;[A-Za-z0-9-]+=([^;,:"]+|"[^"]+")(,[^;,:"]+|,"[^"]+")*)*):;\nIMPP\1:xmpp:;g;
+         s;\nX-JABBER((\;[A-Za-z0-9-]+|\;[A-Za-z0-9-]+=([^;,:"]+|"[^"]+")(,[^;,:"]+|,"[^"]+")*)*):;\nIMPP\1:xmpp:;g;
+            s;\nX-MSN((\;[A-Za-z0-9-]+|\;[A-Za-z0-9-]+=([^;,:"]+|"[^"]+")(,[^;,:"]+|,"[^"]+")*)*):;\nIMPP\1:msn:;g;
+          s;\nX-YAHOO((\;[A-Za-z0-9-]+|\;[A-Za-z0-9-]+=([^;,:"]+|"[^"]+")(,[^;,:"]+|,"[^"]+")*)*):;\nIMPP\1:ymsgr:;g;
+            s;\nX-SIP((\;[A-Za-z0-9-]+|\;[A-Za-z0-9-]+=([^;,:"]+|"[^"]+")(,[^;,:"]+|,"[^"]+")*)*):(sip:)?;\nIMPP\1:sip:;g;
 
     # === Update obsolete LABEL property ===
-    s;^LABEL((\;[A-Za-z0-9-]+|\;[A-Za-z0-9-]+=([^;,:"]+|"[^"]+")(,[^;,:"]+|,"[^"]+")*)*):(.*)$;ADR\1\;LABEL="\5":;;
-
-    # === Insert empty attribute fields where no attributes are present ===
-    s;^([^;:]+):;\1\;:;;
+    s;\nLABEL((\;[A-Za-z0-9-]+|\;[A-Za-z0-9-]+=([^;,:"]+|"[^"]+")(,[^;,:"]+|,"[^"]+")*)*):(.*)\n;\nADR\1\;LABEL="\5":\n;g;
 
-    # unscramble aggregated fields
-    :disag
-    s;^([^:]+:)((.*[^\])?(\\\\)*),;\1\2\n\1;;
-    t disag;
-    '
+    p;' "$@"
 }
 
 pdi_count(){
@@ -89,21 +124,73 @@ pdi_count(){
 }
 
 pdi_attrib(){
-  local card=":$1" name="$2" cnt="${3:-1}"
+  local card=":$1" name="$2" cnt="${3:-1}" attr="$4"
   while [ $cnt -gt 0 ]; do
     [ "${card#*${BR}${name};}" = "$card" ] && return 1
     card="${card#*${BR}${name};}"
     cnt=$((cnt - 1))
   done
-  printf %s\\n "${card%%:*}"
+  card="${card%%:*}"
+  if [ "$attr" ]; then
+    case $card in
+      *\;"$attr"=*) card="${card#*;${attr}=}";;
+      "$attr"=*) card="${card#${attr}=}";;
+      "$attr"|*\;"$attr"|"$attr"\;*|*\;"$attr"\;*) return 0;;
+      *) return 1;;
+    esac
+    case $card in
+      \"*\"\;*|\'*\'\;*)
+        card="${card#[\"\']}"; card="${card%%[\"\'];*}"
+        ;;
+      \"*\"|\'*\')
+        card="${card#[\"\']}"; card="${card%%[\"\']}"
+        ;;
+      *\;*) card="${card%%;*}";;
+    esac
+  fi
+  printf %s\\n "${card}"
 }
 
 pdi_value(){
   local card="${BR}$1" name="$2" cnt="${3:-1}"
-  while [ $cnt -gt 0 ]; do
+  while [ "$cnt" -gt 0 ]; do
     [ "${card#*${BR}${name};*:}" = "$card" ] && return 1
     card="${card#*${BR}${name};*:}"
     cnt=$((cnt - 1))
   done
   printf %s\\n "${card%%${BR}*}"
 }
+
+pdi_update_value(){
+  local card="${BR}$1" name="$2" cnt="$3" val="$4"
+  while [ "$cnt" -gt 0 ]; do
+    if [ "${card#*${BR}${name};*:}" = "${card}" ]; then
+       printf '%s\n%s;:' "${card%${BR}END;:VCARD*}" "${name}"
+       card="${BR}END;:VCARD"
+       break;
+    else
+       printf '%s\n%s;' "${card%%${BR}${name};*}" "${name}"
+       card="${card#*${BR}${name};}"
+       printf '%s:' "${card%%:*}"
+       card="${card#*:}"
+    fi
+    cnt=$((cnt - 1))
+  done
+  printf '%s\n%s\n' "$val" "${card#*${BR}}"
+}
+
+pdi_update_attrib(){
+  local card="${BR}$1" name="$2" cnt="$3" val="$4"
+  while [ "$cnt" -gt 0 ]; do
+    if [ "${card#*${BR}${name};*:}" = "${card}" ]; then
+       printf '%s\n%s;' "${card%${BR}END;:VCARD*}" "${name}"
+       card=":${BR}END;:VCARD"
+       break;
+    else
+       printf '%s\n%s;' "${card%%${BR}${name};*}" "${name}"
+       card="${card#*${BR}${name};}"
+    fi
+    cnt=$((cnt - 1))
+  done
+  printf '%s:%s\n' "$val" "${card#*:}"
+}