# - [x] Lists (ordered, unordered)
# - [x] Code blocks (using indention)
# - [x] Horizontal rules
-# - [x] Verbatim HTML block (gfm)
+# - [x] Verbatim HTML block (disabled by default)
#
# Basic Markdown - Inline elements:
# ---------------------------------
# - [x] Images / reference style images
# - [x] <automatic links>
# - [x] backslash escapes
-# - [ ] Verbatim HTML inline
+# - [x] Verbatim HTML inline (disabled by default)
+# - [x] HTML escaping
+#
+# NOTE: Set the environment variable MD_HTML=true to enable verbatim HTML
#
# Extensions - Block elements:
# ----------------------------
len = RLENGTH;
return "<em>" inline( substr( line, 2, len - 2 ) ) "</em>" inline( substr( line, len + 1 ) );
+ # Verbatim inline HTML
+ } else if ( AllowHTML && match( line, /^(<!--([^-]|-[^-]|--[^>])*-->|<\?([^\?]|\?[^>])*\?>|<![A-Z][^>]*>|<!\[CDATA\[([^\]]|\][^\]]|\]\][^>])*\]\]>|<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)/) ) {
+ len = RLENGTH;
+ return substr( line, 1, len) inline(substr(line, len + 1));
+
+ # Literal HTML entities
+ } else if ( match( line, /^&([a-zA-Z]{2,32}|#[0-9]{1,7}|#[xX][0-9a-fA-F]{1,6});/) ) {
+ len = RLENGTH;
+ return substr( line, 1, len ) inline(substr(line, len + 1));
+
+ # Escape lone HTML character
+ } else if ( match( line, /^[&<>"']/) ) {
+ return HTML(substr(line, 1, 1)) inline(substr(line, 2));
+
# continue walk over string
} else {
return substr(line, 1, 1) inline( substr(line, 2) );
if ( block == "" ) {
return "";
- # HTML #2
- } else if ( match( block, /(^|\n) ? ? ?<!--([^-]|-[^-]|--[^>])*(-->|$)/) ) {
- len = RLENGTH; st = RSTART;
- return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len));
-
- # HTML #3
- } else if ( match( block, /(^|\n) ? ? ?<\?([^\?]|\?[^>])*(\?>|$)/) ) {
- len = RLENGTH; st = RSTART;
- return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len));
-
- # HTML #4
- } else if ( match( block, /(^|\n) ? ? ?<![A-Z][^>]*(>|$)/) ) {
- len = RLENGTH; st = RSTART;
- return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len));
-
- # HTML #5
- } else if ( match( block, /(^|\n) ? ? ?<!\[CDATA\[([^\]]|\][^\]]|\]\][^>])*(\]\]>|$)/) ) {
+ # HTML #2 #3 #4 $5
+ } else if ( AllowHTML && match( block, /(^|\n) ? ? ?(<!--([^-]|-[^-]|--[^>])*(-->|$)|<\?([^\?]|\?[^>])*(\?>|$)|<![A-Z][^>]*(>|$)|<!\[CDATA\[([^\]]|\][^\]]|\]\][^>])*(\]\]>|$))/) ) {
len = RLENGTH; st = RSTART;
return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len));
# HTML #6
- } else if ( match( tolower(block), /(^|\n) ? ? ?<\/?(address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)([[:space:]\n>]|\/>)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) {
+ } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<\/?(address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)([[:space:]\n>]|\/>)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) {
len = RLENGTH; st = RSTART;
return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len));
# HTML #1
- } else if ( match( tolower(block), /(^|\n) ? ? ?<(script|pre|style)([[:space:]\n>]).*(<\/script>|<\/pre>|<\/style>|$)/) ) {
+ } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<(script|pre|style)([[:space:]\n>]).*(<\/script>|<\/pre>|<\/style>|$)/) ) {
len = RLENGTH; st = RSTART;
match( tolower(substr(block, st, len)), /(<\/script>|<\/pre>|<\/style>)/);
len = RSTART + RLENGTH;
return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len));
# HTML #7
- } else if ( match( block, /^ ? ? ?(<\/[A-Za-z][A-Za-z0-9-]* \t]*>|<[A-Za-z][A-Za-z0-9-]*([ \t]+[A-Za-z_:][A-Za-z0-9_\.:-]*[ \t]*=[ \t]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))*[ \t]*\/?>)([[:space:]]*\n)([^\n]|\n[[:space:]]*[^\n])*(\n[ \t]*\n|$)/) ) {
+ } else if ( AllowHTML && match( block, /^ ? ? ?(<\/[A-Za-z][A-Za-z0-9-]*[[:space:]]*>|<[A-Za-z][A-Za-z0-9-]*([[:space:]]+[A-Za-z_:][A-Za-z0-9_\.:-]*([[:space:]]*=[[:space:]]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))?)*[[:space:]]*\/?>)([[:space:]]*\n)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) {
len = RLENGTH; st = RSTART;
return substr(block, st, len) _block(substr(block, st + len));
BEGIN {
# Global Vars
file = ""; rl_href[""] = ""; rl_title[""] = "";
+ if (ENVIRON["MD_HTML"] == "true") { AllowHTML = "true"; }
# Buffering of full file ist necessary, e.g. to find reference links
while (getline) { file = file $0 "\n"; }