X-Git-Url: https://git.plutz.net/?a=blobdiff_plain;f=markdown.awk;h=d1de2c322ca2e1854d4530c61855f04638963617;hb=fcdebd0c42dcdff43f998a655463658ea2161505;hp=a5c2eb1f587d79f203df3858819d33da565236bf;hpb=b2cf4a3c3d0022d1abbeb69909500ea3147cbd10;p=cgilite diff --git a/markdown.awk b/markdown.awk index a5c2eb1..d1de2c3 100755 --- a/markdown.awk +++ b/markdown.awk @@ -23,7 +23,8 @@ # - [x] Blockquotes # - [x] Lists (ordered, unordered) # - [x] Code blocks (using indention) -# - [ ] Horizontal rules +# - [x] Horizontal rules +# - [x] Verbatim HTML block (gfm) # # Basic Markdown - Inline elements: # --------------------------------- @@ -31,9 +32,10 @@ # - [x] Reference style links # - [x] Emphasis *em*/**strong** (*Asterisk*, _Underscore_) # - [x] `code`, also ``code containing `backticks` `` -# - [ ] Images / reference style images +# - [x] Images / reference style images # - [x] # - [x] backslash escapes +# - [ ] Verbatim HTML inline # # Extensions - Block elements: # ---------------------------- @@ -146,6 +148,32 @@ function inline( line, LOCAL, len, code, href, guard ) { return "" substr(line, 1, len) inline( substr(line, len + 1) ); } + # inline images + } else if ( match(line, /^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/) ) { + len = RLENGTH; + text = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\1", "g", line); + href = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\2", "g", line); + title = gensub(/^!\[([^]]+)\]\(([^"\)]+)([ \t]+"([^"]+)")?\)/, "\\4", "g", line); + if ( title ) { + return "\""" inline( substr( line, len + 1) ); + } else { + return "\""" inline( substr( line, len + 1) ); + } + + # reference style images + } else if ( match(line, /^!\[([^]]+)\] ?\[([^]]*)\]/ ) ) { + len = RLENGTH; + text = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\1", 1, line); + id = gensub(/^!\[([^\n]+)\] ?\[([^\n]*)\].*/, "\\2", 1, line); + if ( ! id ) id = text; + if ( rl_href[id] && rl_title[id] ) { + return "\""" inline( substr( line, len + 1) ); + } else if ( rl_href[id] ) { + return "\""" inline( substr( line, len + 1) ); + } else { + return "" substr(line, 1, len) inline( substr(line, len + 1) ); + } + # ~~strikeout~~ (pandoc) } else if ( match(line, /^~~([[:graph:]]|[[:graph:]]([^~]|~[^~])*[[:graph:]])~~/) ) { len = RLENGTH; @@ -171,7 +199,7 @@ function inline( line, LOCAL, len, code, href, guard ) { return "" inline( substr( line, 3, len - 4 ) ) "" inline( substr( line, len + 1 ) ); # __strong__ - } else if ( match(line, "^__(([^_[:space:]]|" ieu ")|([^_[:space:]]|" ieu ")(" nu "|" ieu ")*([^_[:space:]]|" ieu "))__[[:space:]]") ) { + } else if ( match(line, "^__(([^_[:space:]]|" ieu ")|([^_[:space:]]|" ieu ")(" nu "|" ieu ")*([^_[:space:]]|" ieu "))__[[:space:][:punct:]]") ) { len = RLENGTH; return "" inline( substr( line, 3, len - 5 ) ) "" inline( substr( line, len) ); @@ -186,7 +214,7 @@ function inline( line, LOCAL, len, code, href, guard ) { return "" inline( substr( line, 2, len - 2 ) ) "" inline( substr( line, len + 1 ) ); # _em_ - } else if ( match(line, "^_(([^_[:space:]]|" isu ")|([^_[:space:]]|" isu ")(" nu "|" isu ")*([^_[:space:]]|" isu "))_[[:space:]]") ) { + } else if ( match(line, "^_(([^_[:space:]]|" isu ")|([^_[:space:]]|" isu ")(" nu "|" isu ")*([^_[:space:]]|" isu "))_[[:space:][:punct:]]") ) { len = RLENGTH; return "" inline( substr( line, 2, len - 3 ) ) "" inline( substr( line, len ) ); @@ -206,6 +234,48 @@ function _block( block, LOCAL, st, len, hlvl, htxt, guard, code, indent ) { if ( block == "" ) { return ""; + + # HTML #2 + } else if ( match( block, /(^|\n) ? ? ?|$)/) ) { + len = RLENGTH; st = RSTART; + return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len)); + + # HTML #3 + } else if ( match( block, /(^|\n) ? ? ?<\?([^\?]|\?[^>])*(\?>|$)/) ) { + len = RLENGTH; st = RSTART; + return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len)); + + # HTML #4 + } else if ( match( block, /(^|\n) ? ? ?]*(>|$)/) ) { + len = RLENGTH; st = RSTART; + return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len)); + + # HTML #5 + } else if ( match( block, /(^|\n) ? ? ?])*(\]\]>|$)/) ) { + len = RLENGTH; st = RSTART; + return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len)); + + # HTML #6 + } else if ( match( tolower(block), /(^|\n) ? ? ?<\/?(address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)([[:space:]\n>]|\/>)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) { + len = RLENGTH; st = RSTART; + return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len)); + + # HTML #1 + } else if ( match( tolower(block), /(^|\n) ? ? ?<(script|pre|style)([[:space:]\n>]).*(<\/script>|<\/pre>|<\/style>|$)/) ) { + len = RLENGTH; st = RSTART; + match( tolower(substr(block, st, len)), /(<\/script>|<\/pre>|<\/style>)/); + len = RSTART + RLENGTH; + return _block(substr(block, 1, st - 1)) substr(block, st, len) _block(substr(block, st + len)); + + # HTML #7 + } else if ( match( block, /^ ? ? ?(<\/[A-Za-z][A-Za-z0-9-]* \t]*>|<[A-Za-z][A-Za-z0-9-]*([ \t]+[A-Za-z_:][A-Za-z0-9_\.:-]*[ \t]*=[ \t]*([[:space:]"'=<>`]+|"[^"]*"|'[^']*'))*[ \t]*\/?>)([[:space:]]*\n)([^\n]|\n[[:space:]]*[^\n])*(\n[ \t]*\n|$)/) ) { + len = RLENGTH; st = RSTART; + return substr(block, st, len) _block(substr(block, st + len)); + + # Horizontal rule + } else if ( match( block, /(^|\n) ? ? ?((\* *){3,}|(- *){3,}|(_ *){3,})($|\n)/) ) { + len = RLENGTH; st = RSTART; + return _block(substr(block, 1, st - 1)) "
\n" _block(substr(block, st + len)); # Blockquote (leading >) } else if ( match( block, /^> /) ) {