X-Git-Url: https://git.plutz.net/?p=confetti;a=blobdiff_plain;f=pdiread.sh;h=08fbaec3fb373c2fbb6cdde7974caaa520b849e3;hp=d774ada3caedba752d1271aa1bc8ceacc662b5d5;hb=6217c300314dd5791995c9bb7e148c3752d2d3d1;hpb=a76ef45bd763687be2e517955bc50089adaa64ed diff --git a/pdiread.sh b/pdiread.sh index d774ada..08fbaec 100755 --- a/pdiread.sh +++ b/pdiread.sh @@ -26,13 +26,23 @@ include_pdi="$0" BR=' ' +unescape() { + local unescape='s;(^(\\\\)*|[^\\](\\\\)*)\\n;\1\n;g; s;\\(.);\1;g' + if [ $# -eq 0 ]; then + sed -E "$unescape" + else + printf %s "$*" \ + | sed -E "$unescape" + fi +} + pdi_load() { # normalise PDI file for processing with pdi_* functions # functions in this library can only be applied to normalised data # Usage example: # data="$(pdi_load file.vcf)" - sed -En ' + sed -srn ' # === Read entire file into buffer === :X $bY; N; bX; :Y s;^.*$;\n&\n;; @@ -56,7 +66,7 @@ pdi_load() { # === Unscramble aggregated fields === :disag - s;\n([^:]+:)((.*[^\])?(\\\\)*),;\n\1\2\n\1;; + s;\n([^:\n]+:)(([^\n]*[^\])?(\\\\)*),;\n\1\2\n\1;; t disag; # === Insert FN when only N is present === @@ -100,7 +110,7 @@ pdi_load() { # === Update obsolete LABEL property === s;\nLABEL((\;[A-Za-z0-9-]+|\;[A-Za-z0-9-]+=([^;,:"]+|"[^"]+")(,[^;,:"]+|,"[^"]+")*)*):(.*)\n;\nADR\1\;LABEL="\5":\n;g; - p;' "$1" + p;' "$@" } pdi_count(){ @@ -114,13 +124,31 @@ pdi_count(){ } pdi_attrib(){ - local card=":$1" name="$2" cnt="${3:-1}" + local card=":$1" name="$2" cnt="${3:-1}" attr="$4" while [ $cnt -gt 0 ]; do [ "${card#*${BR}${name};}" = "$card" ] && return 1 card="${card#*${BR}${name};}" cnt=$((cnt - 1)) done - printf %s\\n "${card%%:*}" + card="${card%%:*}" + if [ "$attr" ]; then + case $card in + *\;"$attr"=*) card="${card#*;${attr}=}";; + "$attr"=*) card="${card#${attr}=}";; + "$attr"|*\;"$attr"|"$attr"\;*|*\;"$attr"\;*) return 0;; + *) return 1;; + esac + case $card in + \"*\"\;*|\'*\'\;*) + card="${card#[\"\']}"; card="${card%%[\"\'];*}" + ;; + \"*\"|\'*\') + card="${card#[\"\']}"; card="${card%%[\"\']}" + ;; + *\;*) card="${card%%;*}";; + esac + fi + printf %s\\n "${card}" } pdi_value(){