# - [x] Lists (ordered, unordered)
# - [x] Code blocks (using indention)
# - [x] Horizontal rules
-# - [x] Verbatim HTML block (gfm)
+# - [x] Verbatim HTML block (disabled by default)
#
# Basic Markdown - Inline elements:
# ---------------------------------
# - [x] Images / reference style images
# - [x] <automatic links>
# - [x] backslash escapes
-# - [x] Verbatim HTML inline
-# - [ ] HTML escaping
+# - [x] Verbatim HTML inline (disabled by default)
+# - [x] HTML escaping
+#
+# NOTE: Set the environment variable MD_HTML=true to enable verbatim HTML
#
# Extensions - Block elements:
# ----------------------------
} else if ( match(line, /^ \n/) ) {
return "<br />\n" inline( substr(line, RLENGTH + 1) );
- # Verbatim inline HTML
- } else if ( match( line, /^(<!--([^-]|-[^-]|--[^>])*-->|<\?([^\?]|\?[^>])*\?>|<![A-Z][^>]*>|<!\[CDATA\[([^\]]|\][^\]]|\]\][^>])*\]\]>|<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)/) ) {
- len = RLENGTH;
- return substr( line, 1, len) inline(substr(line, len + 1));
-
# ``code spans``
} else if ( match( line, /^`+/) ) {
len = RLENGTH
len = RLENGTH;
return "<em>" inline( substr( line, 2, len - 2 ) ) "</em>" inline( substr( line, len + 1 ) );
+ # Verbatim inline HTML
+ } else if ( AllowHTML && match( line, /^(<!--([^-]|-[^-]|--[^>])*-->|<\?([^\?]|\?[^>])*\?>|<![A-Z][^>]*>|<!\[CDATA\[([^\]]|\][^\]]|\]\][^>])*\]\]>|<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)/) ) {
+ len = RLENGTH;
+ return substr( line, 1, len) inline(substr(line, len + 1));
+
+ # Literal HTML entities
+ } else if ( match( line, /^&([a-zA-Z]{2,32}|#[0-9]{1,7}|#[xX][0-9a-fA-F]{1,6});/) ) {
+ len = RLENGTH;
+ return substr( line, 1, len ) inline(substr(line, len + 1));
+
+ # Escape lone HTML character
+ } else if ( match( line, /^[&<>"']/) ) {
+ return HTML(substr(line, 1, 1)) inline(substr(line, 2));
+
# continue walk over string
} else {
return substr(line, 1, 1) inline( substr(line, 2) );
return "";
# HTML #2 #3 #4 $5
- } else if ( match( block, /(^|\n) ? ? ?(<!--([^-]|-[^-]|--[^>])*(-->|$)|<\?([^\?]|\?[^>])*(\?>|$)|<![A-Z][^>]*(>|$)|<!\[CDATA\[([^\]]|\][^\]]|\]\][^>])*(\]\]>|$))/) ) {
+ } else if ( AllowHTML && match( block, /(^|\n) ? ? ?(<!--([^-]|-[^-]|--[^>])*(-->|$)|<\?([^\?]|\?[^>])*(\?>|$)|<![A-Z][^>]*(>|$)|<!\[CDATA\[([^\]]|\][^\]]|\]\][^>])*(\]\]>|$))/) ) {
len = RLENGTH; st = RSTART;
return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len));
# HTML #6
- } else if ( match( tolower(block), /(^|\n) ? ? ?<\/?(address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)([[:space:]\n>]|\/>)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) {
+ } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<\/?(address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)([[:space:]\n>]|\/>)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) {
len = RLENGTH; st = RSTART;
return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len));
# HTML #1
- } else if ( match( tolower(block), /(^|\n) ? ? ?<(script|pre|style)([[:space:]\n>]).*(<\/script>|<\/pre>|<\/style>|$)/) ) {
+ } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<(script|pre|style)([[:space:]\n>]).*(<\/script>|<\/pre>|<\/style>|$)/) ) {
len = RLENGTH; st = RSTART;
match( tolower(substr(block, st, len)), /(<\/script>|<\/pre>|<\/style>)/);
len = RSTART + RLENGTH;
return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len));
# HTML #7
- } else if ( match( block, /^ ? ? ?(<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)([[:space:]]*\n)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) {
+ } else if ( AllowHTML && match( block, /^ ? ? ?(<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)([[:space:]]*\n)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) {
len = RLENGTH; st = RSTART;
return substr(block, st, len) _block(substr(block, st + len));
BEGIN {
# Global Vars
file = ""; rl_href[""] = ""; rl_title[""] = "";
+ if (ENVIRON["MD_HTML"] == "true") { AllowHTML = "true"; }
# Buffering of full file ist necessary, e.g. to find reference links
while (getline) { file = file $0 "\n"; }