}
function inline( line, LOCAL, len, text, code, href, guard ) {
- nu = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\_]|_[[:alnum:]])*" # not underline (except when escaped)
- na = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\\\*])*" # not asterisk (except when escaped)
- ieu = "_([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])_" # inner <em> (underline)
- isu = "__([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])__" # inner <strong> (underline)
- iea = "\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*" # inner <em> (asterisk)
- isa = "\\*\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*\\*" # inner <strong> (asterisk)
-
if ( line ~ /^$/ ) { # Recursion End
return "";
return "<code>" code "</code>" inline( substr( line, len + 1 ) )
}
+ # Macros
+ } else if ( match( line, /^<<([^>]|>[^>])+>>/ ) ) {
+ len = RLENGTH;
+ return "<code class=\"macro\">" HTML( substr( line, 3, len - 4 ) ) "</code>" inline(substr(line, len + 1));
+
# Wiki style links
} else if ( match( line, /^\[\[([^]|]+)(\|[^]]+)?\]\]/) ) {
len = RLENGTH;
href = URL( substr( line, 2, len - 2) );
return "<a href=\"mailto:" href "\">" href "</a>" inline( substr( line, len + 1) );
+ # Verbatim inline HTML
+ } else if ( AllowHTML && match( line, /^(<!--([^-]|-[^-]|--[^>])*-->|<\?([^\?]|\?[^>])*\?>|<![A-Z][^>]*>|<!\[CDATA\[([^\]]|\][^\]]|\]\][^>])*\]\]>|<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)/) ) {
+ len = RLENGTH;
+ return substr( line, 1, len) inline(substr(line, len + 1));
+
# inline links
- # ,_______________________Image____________________________,
- } else if ( match(line, /^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/) ) {
+ } else if ( match(line, "^" lii "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)") ) {
len = RLENGTH;
- text = gensub(/^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \
- "\\1", 1, substr(line, 1, len) );
- href = gensub(/^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \
- "\\4", 1, substr(line, 1, len) );
- title = gensub(/^\[([^]]+|!\[[^]]*\]\([^"\)]+([ \t]+"[^"]+")?\)(\{[a-zA-Z \t-]*\})?)\]\(([^"\)]+)([[:space:]]+"([^"]+)")?\)/, \
- "\\6", 1, substr(line, 1, len) );
- if ( title ) {
- return "<a href=\"" URL(href) "\" title=\"" HTML(title) "\">" inline( text ) "</a>" inline( substr( line, len + 1) );
- } else {
- return "<a href=\"" URL(href) "\">" inline( text ) "</a>" inline( substr( line, len + 1) );
- }
+ text = href = title = substr( line, 1, len);
+ sub("^\\[", "", text); sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)$", "", text);
+ sub("^" lii "\\([\n\t ]*", "", href); sub("([\n\t ]+" lit ")?[\n\t ]*\\)$", "", href);
+ sub("^" lii "\\([\n\t ]*" lid, "", title); sub("[\n\t ]*\\)$", "", title); sub("^[\n\t ]+", "", title);
+
+ if ( match(href, /^<.*>$/) ) { sub(/^</, "", href); sub(/>$/, "", href); }
+ if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); }
+ else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); }
+ else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); }
+
+ gsub(/\\/, "", href); gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title);
+
+ return "<a href=\"" URL(href) "\"" (title?" title=\"" HTML(title) "\"":"") ">" \
+ inline( text ) "</a>" inline( substr( line, len + 1) );
# reference style links
} else if ( match(line, /^\[([^]]+)\] ?\[([^]]*)\]/ ) ) {
}
# inline images
- } else if ( match(line, /^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/) ) {
- len = RLENGTH;
- text = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\1", "g", substr(line, 1, len) );
- href = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\2", "g", substr(line, 1, len) );
- title = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\4", "g", substr(line, 1, len) );
- attrib = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?/, "\\6", "g", substr(line, 1, len) );
- if ( title && attrib ) {
- return "<img src=\"" URL(href, 1) "\" alt=\"" HTML(text) "\" title=\"" HTML(title) "\" class=\"" HTML(attrib) "\">" \
- inline( substr( line, len + 1) );
- } else if ( title ) {
- return "<img src=\"" URL(href, 1) "\" alt=\"" HTML(text) "\" title=\"" HTML(title) "\">" \
- inline( substr( line, len + 1) );
- } else if ( attrib ) {
- return "<img src=\"" URL(href, 1) "\" alt=\"" HTML(text) "\" class=\"" HTML(attrib) "\">" \
- inline( substr( line, len + 1) );
- } else {
- return "<img src=\"" URL(href, 1) "\" alt=\"" HTML(text) "\">" \
- inline( substr( line, len + 1) );
- }
+ } else if ( match(line, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?") ) {
+ len = RLENGTH; text = href = title = attrib = substr( line, 1, len);
+
+ sub("^!\\[", "", text);
+ sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", text);
+
+ sub("^!" lix "\\([\n\t ]*", "", href);
+ sub("([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", href);
+
+ sub("^!" lix "\\([\n\t ]*" lid, "", title);
+ sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?$", "", title);
+ sub("^[\n\t ]+", "", title);
+
+ sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib);
+ sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib);
+
+ if ( match(href, /^<.*>$/) ) { sub(/^</, "", href); sub(/>$/, "", href); }
+ if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); }
+ else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); }
+ else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); }
+
+ gsub(/^[\t ]+$/, "", text); gsub(/\\/, "", href);
+ gsub(/\\/, "", title); gsub(/[\n\t]+/, " ", title);
+
+ return "<img src=\"" URL(href, 1) "\" alt=\"" HTML(text?text:title?title:href) "\"" \
+ (title?" title=\"" HTML(title) "\"":"") (attrib?" class=\"" HTML(attrib) "\"":"") \
+ ">" inline( substr( line, len + 1) );
# reference style images
} else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\]/ ) ) {
len = RLENGTH;
return "<em>" inline( substr( line, 2, len - 2 ) ) "</em>" inline( substr( line, len + 1 ) );
- # Macros
- } else if ( AllowMacros && match( line, /^<<([^>]|>[^>])+>>/) ) {
- len = RLENGTH;
- return macro( substr( line, 3, len - 4 ) ) inline(substr(line, len + 1));
-
- # Verbatim inline HTML
- } else if ( AllowHTML && match( line, /^(<!--([^-]|-[^-]|--[^>])*-->|<\?([^\?]|\?[^>])*\?>|<![A-Z][^>]*>|<!\[CDATA\[([^\]]|\][^\]]|\]\][^>])*\]\]>|<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)/) ) {
- len = RLENGTH;
- return substr( line, 1, len) inline(substr(line, len + 1));
-
# Literal HTML entities
} else if ( match( line, /^&([a-zA-Z]{2,32}|#[0-9]{1,7}|#[xX][0-9a-fA-F]{1,6});/) ) {
len = RLENGTH;
# Pipe Tables (pandoc / php md / gfm )
} else if ( match(block, "^((\\|)?([^\n]+\\|)+[^\n]+(\\|)?)\n" \
- "((\\|)?:?(-+:?[\\|+])+:?-+:?(\\|)?)\n" \
+ "((\\|)?(:?-+:?[\\|+])+:?-+:?(\\|)?)\n" \
"((\\|)?([^\n]+\\|)+[^\n]+(\\|)?(\n|$))+" ) ) {
len = RLENGTH; st = RSTART;
#initialize empty arrays
return headline( n, text, 0 ) _block( substr( block, len + 1) );
# block images (wrapped in <figure>)
- } else if ( match(block, /^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n|$)/) ) {
- len = RLENGTH;
- text = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\1", "g", block);
- href = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\2", "g", block);
- title = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\4", "g", block);
- attrib = gensub(/^!\[([^]]*)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)(\{([a-zA-Z \t-]*)\})?(\n.*)?$/, "\\6", "g", block);
- if ( title && attrib ) {
- return "<figure data-src=\"" URL(href, 1) "\" class=\"" HTML(attrib) "\">" \
- "<img src=\"" URL(href, 1) "\" alt=\"" HTML(text) "\" class=\"" HTML(attrib) "\">" \
- "<figcaption>" inline(title) "</figcaption>" \
- "</figure>\n\n" \
- _block( substr( block, len + 1) );
- } else if ( title ) {
- return "<figure data-src=\"" URL(href, 1) "\">" \
- "<img src=\"" URL(href, 1) "\" alt=\"" HTML(text) "\">" \
- "<figcaption>" inline(title) "</figcaption>" \
- "</figure>\n\n" \
- _block( substr( block, len + 1) );
- } else if ( attrib ) {
- return "<figure data-src=\"" URL(href, 1) "\" class=\"" HTML(attrib) "\">" \
- "<img src=\"" URL(href, 1) "\" alt=\"" HTML(text) "\" class=\"" HTML(attrib) "\">" \
- "</figure>\n\n" \
- _block( substr( block, len + 1) );
- } else {
- return "<figure data-src=\"" URL(href, 1) "\">" \
- "<img src=\"" URL(href, 1) "\" alt=\"" HTML(text) "\">" \
- "</figure>\n\n" \
- _block( substr( block, len + 1) );
- }
+ } else if ( match(block, "^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n|$)") ) {
+ len = RLENGTH; text = href = title = attrib = substr( block, 1, len);
+
+ sub("^!\\[", "", text);
+ sub("\\]\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", text);
+
+ sub("^!" lix "\\([\n\t ]*", "", href);
+ sub("([\n\t ]+" lit ")?[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", href);
+
+ sub("^!" lix "\\([\n\t ]*" lid, "", title);
+ sub("[\n\t ]*\\)(\\{[a-zA-Z \t-]*\\})?(\n.*)?$", "", title);
+ sub("^[\n\t ]+", "", title);
+
+ sub("^!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\)", "", attrib);
+ sub("(\n.*)?$", "", attrib);
+ sub(/^\{[ \t]*/, "", attrib); sub(/[ \t]*\}$/, "", attrib); gsub(/[ \t]+/, " ", attrib);
+
+ if ( match(href, /^<.*>$/) ) { sub(/^</, "", href); sub(/>$/, "", href); }
+ if ( match(title, /^".*"$/) ) { sub(/^"/, "", title); sub(/"$/, "", title); }
+ else if ( match(title, /^'.*'$/) ) { sub(/^'/, "", title); sub(/'$/, "", title); }
+ else if ( match(title, /^\(.*\)$/) ) { sub(/^\(/, "", title); sub(/\)$/, "", title); }
+
+ gsub(/^[\t ]+$/, "", text); gsub(/\\/, "", href);
+
+ return "<figure data-src=\"" URL(href, 1) "\"" (attrib?" class=\"" HTML(attrib) "\"":"") ">" \
+ "<img src=\"" URL(href, 1) "\" alt=\"" HTML(text?text:title?title:href) "\"" \
+ (attrib?" class=\"" HTML(attrib) "\"":"") ">" \
+ (title?"<figcaption>" inline(title) "</figcaption>":"") \
+ "</figure>\n\n" \
+ _block( substr( block, len + 1) );
# reference style images (block)
} else if ( match(line, /^!\[([^]]*)\] ?\[([^]]*)\](\n|$)/ ) ) {
}
# Macros (standalone <<macro>> calls handled as block, so they are not wrapped in paragraph)
- } else if ( AllowMacros && match( block, /^<<(([^>]|>[^>])+)>>(\n|$)/) ) {
+ } else if ( match( block, /^<<(([^>]|>[^>])+)>>(\n|$)/ ) ) {
len = RLENGTH;
text = gensub(/^<<(([^>]|>[^>])+)>>(\n.*)?$/, "\\1", 1, block);
- return macro(text) _block(substr(block, len + 1) );
+ return "<code class=\"macro\">" HTML(text) "</code>" _block(substr(block, len + 1) );
# Definition list
} else if (match( block, "^(([ \t]*\n)*[^:\n \t][^\n]+\n" \
return "\n<dl>\n" _dlist( list ) "</dl>\n" _block( block );
# Unordered list types
- } else if ( text = _startlist( block, "ul", "-", "([+*]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) {
+ } else if ( text = _startlist( block, "ul", "-", "([+*•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) {
+ return text;
+ } else if ( text = _startlist( block, "ul", "\\+", "([-*•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) {
return text;
- } else if ( text = _startlist( block, "ul", "\\+", "([-*]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) {
+ } else if ( text = _startlist( block, "ul", "\\*", "([-+•]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) {
return text;
- } else if ( text = _startlist( block, "ul", "\\*", "([-+]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) {
+ } else if ( text = _startlist( block, "ul", "•", "([-+*]|[0-9]+\\.|#\\.|[0-9]+\\)|#\\))") ) {
return text;
# Ordered list types
- } else if ( text = _startlist( block, "ol", "[0-9]+\\.", "([-+*]|#\\.|[0-9]+\\)|#\\))") ) {
+ } else if ( text = _startlist( block, "ol", "[0-9]+\\.", "([-+*•]|#\\.|[0-9]+\\)|#\\))") ) {
return text;
- } else if ( text = _startlist( block, "ol", "[0-9]+\\)", "([-+*]|[0-9]+\\.|#\\.|#\\))") ) {
+ } else if ( text = _startlist( block, "ol", "[0-9]+\\)", "([-+*•]|[0-9]+\\.|#\\.|#\\))") ) {
return text;
- } else if ( text = _startlist( block, "ol", "#\\.", "([-+*]|[0-9]+\\.|[0-9]+\\)|#\\))") ) {
+ } else if ( text = _startlist( block, "ol", "#\\.", "([-+*•]|[0-9]+\\.|[0-9]+\\)|#\\))") ) {
return text;
- } else if ( text = _startlist( block, "ol", "#\\)", "([-+*]|[0-9]+\\.|#\\.|[0-9]+\\))") ) {
+ } else if ( text = _startlist( block, "ol", "#\\)", "([-+*•]|[0-9]+\\.|#\\.|[0-9]+\\))") ) {
return text;
# Split paragraphs
function _startlist(block, type, mark, exclude, LOCAL, st, len, list, indent, text) {
if (match( block, "(^|\n) ? ? ?" mark "[ \t][^\n]+(\n|$)" \
- "(([ \t]*\n)* ? ? ?" mark "[ \t][^\n]+(\n|$)" \
- "|([ \t]*\n)*( ? ? ?\t| +)[^\n]+(\n|$)" \
- "|[^\n \t][^\n]+(\n|$))*" ) ) {
- st = RSTART; len = RLENGTH; list = substr( block, RSTART, RLENGTH);
+ "(([ \t]*\n)* ? ? ?" mark "[ \t][^\n]+(\n|$)" \
+ "|([ \t]*\n)*( ? ? ?\t| +)[^\n]+(\n|$)" \
+ "|[^\n \t][^\n]+(\n|$))*" ) ) {
+ st = RSTART; len = RLENGTH; list = substr( block, st, len);
sub("^\n", "", list); match(list, "^ ? ? ?"); indent = RLENGTH;
gsub( "(^|\n) {0," indent "}", "\n", list); sub("^\n", "", list);
text = substr(block, 1, st - 1); block = substr(block, st + len);
+ if (match(text, /\n[[:space:]]*\n/)) return 0;
+ if (match(text, "(^|\n) ? ? ?" exclude "[ \t][^\n]+")) return 0;
if (match( list, "\n" exclude "[ \t]" )) {
block = substr(list, RSTART + 1) block;
list = substr(list, 1, RSTART);
HL[1] = 0; HL[2] = 0; HL[3] = 0; HL[4] = 0; HL[5] = 0; HL[6] = 0;
# hls = "0 0 0 0 0 0";
+ # Universal Patterns
+ nu = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\_]|_[[:alnum:]])*" # not underline (except when escaped)
+ na = "(\\\\\\\\|\\\\[^\\\\]|[^\\\\\\*])*" # not asterisk (except when escaped)
+ ieu = "_([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])_" # inner <em> (underline)
+ isu = "__([^_[:space:]]|[^_[:space:]]" nu "[^_[:space:]])__" # inner <strong> (underline)
+ iea = "\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*" # inner <em> (asterisk)
+ isa = "\\*\\*([^\\*[:space:]]|[^\\*[:space:]]" na "[^\\*[:space:]])\\*\\*" # inner <strong> (asterisk)
+
+ lix="\\[(\\\\[^\n]|[^]\n\\\\[])*\\]" # link text
+ lid="(<(\\\\[^\n]|[^\n<>\\\\])*>|(\\\\.|[^()\"'\\\\])+|([^<\n\t ()\\\\]|\\\\[^\n])(\\\\[\n]|[^\n\t \\(\\)\\\\])*)" # link dest
+ lit="(\"(\\\\.|[^\"\\\\])*\"|'(\\\\.|[^'\\\\])*'|\\((\\\\.|[^\\(\\)\\\\])*\\))" # link text
+ # link text with image def
+ lii="\\[(\\\\[^\n]|[^]\n\\\\[])*(!" lix "\\([\n\t ]*" lid "([\n\t ]+" lit ")?[\n\t ]*\\))?(\\\\[^\n]|[^]\n\\\\[])*\\]"
+
# Buffering of full file ist necessary, e.g. to find reference links
while (getline) { file = file $0 "\n"; }
# Clean up MS-DOS line breaks