# Literal HTML entities
# } else if ( match( line, /^&([a-zA-Z]{2,32}|#[0-9]{1,7}|#[xX][0-9a-fA-F]{1,6});/) ) {
# mawk does not support repitition ranges
- } else if ( match( line, /^&([a-zA-Z][a-zA-Z][a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?|#[0-9][0-9]?[0-9]?[0-9]?[0-9]?[0-9]?[0-9]?|#[xX][0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?);/) ) {
+ } else if ( match( line, /^&[a-zA-Z][a-zA-Z][a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?;/) ) {
+ len = RLENGTH;
+ ret = ret substr( line, 1, len ); line = substr(line, len + 1);
+ continue;
+ } else if ( match( line, /^&(#[0-9][0-9]?[0-9]?[0-9]?[0-9]?[0-9]?[0-9]?|#[xX][0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?);/) ) {
len = RLENGTH;
ret = ret substr( line, 1, len ); line = substr(line, len + 1);
continue;
ret = ret _block(substr(block, 1, st - 1)) substr(block, st, len); block = substr(block, st + len);
continue;
- # HTML #6
- } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<\/?(address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)([[:space:]\n>]|\/>)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) {
+ # HTML #6 (part1)
+ } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<\/?(address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset)([[:space:]\n>]|\/>)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) {
+ len = RLENGTH; st = RSTART;
+ ret = ret _block(substr(block, 1, st - 1)) substr(block, st, len); block = substr(block, st + len);
+ continue;
+
+ # HTML #6 (part2)
+ } else if ( AllowHTML && match( tolower(block), /(^|\n) ? ? ?<\/?(h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)([[:space:]\n>]|\/>)([^\n]|\n[ \t]*[^\n])*(\n[[:space:]]*\n|$)/) ) {
len = RLENGTH; st = RSTART;
ret = ret _block(substr(block, 1, st - 1)) substr(block, st, len); block = substr(block, st + len);
continue;
"((\\|([^\n]+\\|)+\n)+" \
"\\+(-+\\+)+(\n|$))+" \
) || \
- match( block, "^()()()" \
- "(\\+(:?-+:?\\+)+)\n" \
+ match( block, "^(\\+(:?-+:?\\+)+)\n" \
"((\\|([^\n]+\\|)+\n)+" \
"\\+(-+\\+)+(\n|$))+" \
) ) {
"|[^\n \t][^\n]+(\n|$))*" ) ) {
st = RSTART; len = RLENGTH; list = substr( block, st, len);
- sub("^\n", "", list); match(list, "^( | | |)"); indent = RLENGTH;
+ sub("^\n", "", list); match(list, "^( | | )?"); indent = RLENGTH;
# gsub( "(^|\n) {0," indent "}", "\n", list); sub("^\n", "", list);
# emulate greedy range matcher for mawk
it = "("; while ( indent > 0 ) { for (k = indent; k > 0; k--) { it = it " "; } it = it "|"; indent--; }