Changeset 6191
- Timestamp:
- 11/27/07 23:47:01 (1 year ago)
- Files:
-
- plugins/sfMarkdownPlugin/README (modified) (1 diff)
- plugins/sfMarkdownPlugin/lib/markdown.php (modified) (62 diffs)
- plugins/sfMarkdownPlugin/package.xml (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
plugins/sfMarkdownPlugin/README
r4712 r6191 156 156 == Changelog == 157 157 158 2007-11-27: 0.1.1 alpha 159 160 * Updated: PHP Markdown 1.1.7 161 158 162 2007-07-22: 0.1.0 alpha 159 163 plugins/sfMarkdownPlugin/lib/markdown.php
r4712 r6191 13 13 14 14 15 define( 'MARKDOWN_VERSION', "1.0.1g" ); # Tue 3 Jul200716 define( 'MARKDOWNEXTRA_VERSION', "1.1. 3" ); # Tue 3 Jul200715 define( 'MARKDOWN_VERSION', "1.0.1k" ); # Wed 26 Sep 2007 16 define( 'MARKDOWNEXTRA_VERSION', "1.1.7" ); # Wed 26 Sep 2007 17 17 18 18 … … 22 22 23 23 # Change to ">" for HTML output 24 define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />");24 @define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />"); 25 25 26 26 # Define the width of a tab for code blocks. 27 define( 'MARKDOWN_TAB_WIDTH', 4 );27 @define( 'MARKDOWN_TAB_WIDTH', 4 ); 28 28 29 29 # Optional title attribute for footnote links and backlinks. 30 define( 'MARKDOWN_FN_LINK_TITLE', "" );31 define( 'MARKDOWN_FN_BACKLINK_TITLE', "" );30 @define( 'MARKDOWN_FN_LINK_TITLE', "" ); 31 @define( 'MARKDOWN_FN_BACKLINK_TITLE', "" ); 32 32 33 33 # Optional class attribute for footnote links and backlinks. 34 define( 'MARKDOWN_FN_LINK_CLASS', "" );35 define( 'MARKDOWN_FN_BACKLINK_CLASS', "" );34 @define( 'MARKDOWN_FN_LINK_CLASS', "" ); 35 @define( 'MARKDOWN_FN_BACKLINK_CLASS', "" ); 36 36 37 37 … … 41 41 42 42 # Change to false to remove Markdown from posts and/or comments. 43 define( 'MARKDOWN_WP_POSTS', true );44 define( 'MARKDOWN_WP_COMMENTS', true );43 @define( 'MARKDOWN_WP_POSTS', true ); 44 @define( 'MARKDOWN_WP_COMMENTS', true ); 45 45 46 46 … … 48 48 ### Standard Function Interface ### 49 49 50 define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' );50 @define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' ); 51 51 52 52 function Markdown($text) { … … 72 72 Plugin URI: http://www.michelf.com/projects/php-markdown/ 73 73 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a> 74 Version: 1.1. 374 Version: 1.1.7 75 75 Author: Michel Fortin 76 76 Author URI: http://www.michelf.com/ … … 117 117 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7); 118 118 119 global $ wp_markdown_hidden;120 $ wp_markdown_hidden[1] =121 '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>' ;122 $ wp_markdown_hidden[2]= explode(' ', str_rot13(119 global $mdwp_hidden_tags, $mdwp_placeholders; 120 $mdwp_hidden_tags = explode(' ', 121 '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>'); 122 $mdwp_placeholders = explode(' ', str_rot13( 123 123 'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '. 124 124 'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli')); … … 136 136 137 137 function mdwp_hide_tags($text) { 138 global $wp_markdown_hidden; 139 return str_replace(explode($wp_markdown_hidden), 140 explode($wp_markdown_hidden), $text); 138 global $mdwp_hidden_tags, $mdwp_placeholders; 139 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text); 141 140 } 142 141 function mdwp_show_tags($text) { 143 global $markdown_hidden_tags; 144 return str_replace(array_values($markdown_hidden_tags), 145 array_keys($markdown_hidden_tags), $text); 142 global $mdwp_hidden_tags, $mdwp_placeholders; 143 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text); 146 144 } 147 145 } … … 212 210 # Table of hash values for escaped characters: 213 211 var $escape_chars = '\`*_{}[]()>#+-.!'; 214 var $escape_table = array();215 var $backslash_escape_table = array();216 212 217 213 # Change to ">" for HTML output. 218 214 var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX; 219 215 var $tab_width = MARKDOWN_TAB_WIDTH; 216 217 # Change to `true` to disallow markup or entities. 218 var $no_markup = false; 219 var $no_entities = false; 220 220 221 221 … … 233 233 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). 234 234 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); 235 236 # Create an identical table but for escaped characters.237 foreach (preg_split('/(?!^|$)/', $this->escape_chars) as $char) {238 $entity = "&#". ord($char). ";";239 $this->escape_table[$char] = $entity;240 $this->backslash_escape_table["\\$char"] = $entity;241 }242 235 243 236 # Sort document, block, and span gamut in ascendent priority order. … … 251 244 var $urls = array(); 252 245 var $titles = array(); 253 var $html_blocks = array(); 254 var $html_hashes = array(); # Contains both blocks and span hashes. 246 var $html_hashes = array(); 255 247 256 248 # Status flag to avoid invalid nesting. … … 271 263 $this->urls = array(); 272 264 $this->titles = array(); 273 $this->html_blocks = array();274 265 $this->html_hashes = array(); 275 266 276 267 # Standardize line endings: 277 268 # DOS to Unix and Mac to Unix 278 $text = str_replace(array("\r\n", "\r"), "\n", $text);269 $text = preg_replace('{\r\n?}', "\n", $text); 279 270 280 271 # Make sure $text ends with a couple of newlines: … … 349 340 350 341 function hashHTMLBlocks($text) { 342 if ($this->no_markup) return $text; 343 351 344 $less_than_tab = $this->tab_width - 1; 352 345 … … 357 350 # phrase emphasis, and spans. The list of tags we're looking for is 358 351 # hard-coded: 359 $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 360 'script|noscript|form|fieldset|iframe|math|ins|del'; 352 # 353 # * List "a" is made of tags which can be both inline or block-level. 354 # These will be treated block-level when the start tag is alone on 355 # its line, otherwise they're not matched here and will be taken as 356 # inline later. 357 # * List "b" is made of tags which are always block-level; 358 # 359 $block_tags_a = 'ins|del'; 361 360 $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 362 361 'script|noscript|form|fieldset|iframe|math'; … … 385 384 <\2 # nested opening tag 386 385 '.$attr.' # attributes 387 (? :386 (?> 388 387 /> 389 388 | … … 398 397 )*', 399 398 $nested_tags_level); 399 $content2 = str_replace('\2', '\3', $content); 400 400 401 401 # First, look for nested blocks, e.g.: … … 410 410 # We need to do this before the next, more liberal match, because the next 411 411 # match will start at the first `<div>` and stop at the first `</div>`. 412 $text = preg_replace_callback('{ 413 ( # save in $1 414 ^ # start of line (with /m) 415 <('.$block_tags_a.')# start tag = $2 416 '.$attr.'>\n # attributes followed by > and \n 412 $text = preg_replace_callback('{(?> 413 (?> 414 (?<=\n\n) # Starting after a blank line 415 | # or 416 \A\n? # the beginning of the doc 417 ) 418 ( # save in $1 419 420 # Match from `\n<tag>` to `</tag>\n`, handling nested tags 421 # in between. 422 423 [ ]{0,'.$less_than_tab.'} 424 <('.$block_tags_b.')# start tag = $2 425 '.$attr.'> # attributes followed by > and \n 417 426 '.$content.' # content, support nesting 418 427 </\2> # the matching end tag 419 428 [ ]* # trailing spaces/tabs 420 429 (?=\n+|\Z) # followed by a newline or end of document 421 ) 422 }xmi', 423 array(&$this, '_hashHTMLBlocks_callback'), 424 $text); 425 426 # 427 # Match from `\n<tag>` to `</tag>\n`, handling nested tags in between. 428 # 429 $text = preg_replace_callback('{ 430 ( # save in $1 431 ^ # start of line (with /m) 432 <('.$block_tags_b.')# start tag = $2 433 '.$attr.'> # attributes followed by > 434 '.$content.' # content, support nesting 435 </\2> # the matching end tag 430 431 | # Special version for tags of group a. 432 433 [ ]{0,'.$less_than_tab.'} 434 <('.$block_tags_a.')# start tag = $3 435 '.$attr.'>[ ]*\n # attributes followed by > 436 '.$content2.' # content, support nesting 437 </\3> # the matching end tag 436 438 [ ]* # trailing spaces/tabs 437 439 (?=\n+|\Z) # followed by a newline or end of document 438 ) 439 }xmi', 440 array(&$this, '_hashHTMLBlocks_callback'), 441 $text); 442 443 # Special case just for <hr />. It was easier to make a special case than 444 # to make the other regex more complicated. 445 $text = preg_replace_callback('{ 446 (?: 447 (?<=\n\n) # Starting after a blank line 448 | # or 449 \A\n? # the beginning of the doc 450 ) 451 ( # save in $1 440 441 | # Special case just for <hr />. It was easier to make a special 442 # case than to make the other regex more complicated. 443 452 444 [ ]{0,'.$less_than_tab.'} 453 445 <(hr) # start tag = $2 … … 457 449 [ ]* 458 450 (?=\n{2,}|\Z) # followed by a blank line or end of document 459 ) 460 }xi', 461 array(&$this, '_hashHTMLBlocks_callback'), 462 $text); 463 464 # Special case for standalone HTML comments: 465 $text = preg_replace_callback('{ 466 (?: 467 (?<=\n\n) # Starting after a blank line 468 | # or 469 \A\n? # the beginning of the doc 470 ) 471 ( # save in $1 451 452 | # Special case for standalone HTML comments: 453 472 454 [ ]{0,'.$less_than_tab.'} 473 455 (?s: … … 476 458 [ ]* 477 459 (?=\n{2,}|\Z) # followed by a blank line or end of document 478 ) 479 }x', 480 array(&$this, '_hashHTMLBlocks_callback'), 481 $text); 482 483 # PHP and ASP-style processor instructions (<? and <%) 484 $text = preg_replace_callback('{ 485 (?: 486 (?<=\n\n) # Starting after a blank line 487 | # or 488 \A\n? # the beginning of the doc 489 ) 490 ( # save in $1 460 461 | # PHP and ASP-style processor instructions (<? and <%) 462 491 463 [ ]{0,'.$less_than_tab.'} 492 464 (?s: … … 497 469 [ ]* 498 470 (?=\n{2,}|\Z) # followed by a blank line or end of document 499 ) 500 }x', 471 472 ) 473 )}Sxmi', 501 474 array(&$this, '_hashHTMLBlocks_callback'), 502 475 $text); … … 509 482 return "\n\n$key\n\n"; 510 483 } 511 512 513 function hashBlock($text) { 514 # 515 # Called whenever a tag must be hashed when a function insert a block-level 516 # tag in $text, it pass through this function and is automaticaly escaped, 517 # which remove the need to call _HashHTMLBlocks at every step. 484 485 486 function hashPart($text, $boundary = 'X') { 487 # 488 # Called whenever a tag must be hashed when a function insert an atomic 489 # element in the text stream. Passing $text to through this function gives 490 # a unique text-token which will be reverted back when calling unhash. 491 # 492 # The $boundary argument specify what character should be used to surround 493 # the token. By convension, "B" is used for block elements that needs not 494 # to be wrapped into paragraph tags at the end, ":" is used for elements 495 # that are word separators and "S" is used for general span-level elements. 518 496 # 519 497 # Swap back any tag hash found in $text so we do not have to `unhash` … … 522 500 523 501 # Then hash the block. 524 $key = "B\x1A". md5($text); 502 static $i = 0; 503 $key = "$boundary\x1A" . ++$i . $boundary; 525 504 $this->html_hashes[$key] = $text; 526 $this->html_blocks[$key] = $text;527 505 return $key; # String that will replace the tag. 528 506 } 529 507 530 508 531 function hashSpan($text, $word_separator = false) { 532 # 533 # Called whenever a tag must be hashed when a function insert a span-level 534 # element in $text, it pass through this function and is automaticaly 535 # escaped, blocking invalid nested overlap. If optional argument 536 # $word_separator is true, surround the hash value by spaces. 537 # 538 # Swap back any tag hash found in $text so we do not have to `unhash` 539 # multiple times at the end. 540 $text = $this->unhash($text); 541 542 # Then hash the span. 543 $key = "S\x1A". md5($text); 544 if ($word_separator) $key = ":$key:"; 545 546 $this->html_hashes[$key] = $text; 547 return $key; # String that will replace the span tag. 509 function hashBlock($text) { 510 # 511 # Shortcut function for hashPart with block-level boundaries. 512 # 513 return $this->hashPart($text, 'B'); 548 514 } 549 515 … … 596 562 # Do Horizontal Rules: 597 563 return preg_replace( 598 array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ ]*$}mx', 599 '{^[ ]{0,2}([ ]? -[ ]?){3,}[ ]*$}mx', 600 '{^[ ]{0,2}([ ]? _[ ]?){3,}[ ]*$}mx'), 564 '{ 565 ^[ ]{0,3} # Leading space 566 ([-*_]) # $1: First marker 567 (?> # Repeated marker group 568 [ ]{0,2} # Zero, one, or two spaces. 569 \1 # Marker character 570 ){2,} # Group repeated at least twice 571 [ ]* # Tailing spaces 572 $ # End of line. 573 }mx', 601 574 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 602 575 $text); … … 609 582 # tags like paragraphs, headers, and list items. 610 583 # 611 "escapeSpecialCharsWithinTagAttributes" => -20,612 "doCodeSpans" => -10,613 " encodeBackslashEscapes" => -5,584 # Process character escapes, code spans, and inline HTML 585 # in one shot. 586 "parseSpan" => -30, 614 587 615 588 # Process anchor and image tags. Images must come first, … … 642 615 function doHardBreaks($text) { 643 616 # Do hard breaks: 644 $br_tag = $this->hashSpan("<br$this->empty_element_suffix\n"); 645 return preg_replace('/ {2,}\n/', $br_tag, $text); 646 } 647 648 649 function escapeSpecialCharsWithinTagAttributes($text) { 650 # 651 # Within tags -- meaning between < and > -- encode [\ ` * _] so they 652 # don't conflict with their use in Markdown for code, italics and strong. 653 # We're replacing each such character with its corresponding MD5 checksum 654 # value; this is likely overkill, but it should prevent us from colliding 655 # with the escape values by accident. 656 # 657 $tokens = $this->tokenizeHTML($text); 658 $text = ''; # rebuild $text from the tokens 659 660 foreach ($tokens as $cur_token) { 661 if ($cur_token[0] == 'tag') { 662 $cur_token[1] = str_replace('\\', $this->escape_table['\\'], $cur_token[1]); 663 $cur_token[1] = str_replace('`', $this->escape_table['`'], $cur_token[1]); 664 $cur_token[1] = str_replace('*', $this->escape_table['*'], $cur_token[1]); 665 $cur_token[1] = str_replace('_', $this->escape_table['_'], $cur_token[1]); 666 } 667 $text .= $cur_token[1]; 668 } 669 return $text; 617 return preg_replace_callback('/ {2,}\n/', 618 array(&$this, '_doHardBreaks_callback'), $text); 619 } 620 function _doHardBreaks_callback($matches) { 621 return $this->hashPart("<br$this->empty_element_suffix\n"); 670 622 } 671 623 … … 768 720 $link_text = $this->runSpanGamut($link_text); 769 721 $result .= ">$link_text</a>"; 770 $result = $this->hash Span($result);722 $result = $this->hashPart($result); 771 723 } 772 724 else { … … 793 745 $result .= ">$link_text</a>"; 794 746 795 return $this->hash Span($result);747 return $this->hashPart($result); 796 748 } 797 749 … … 870 822 } 871 823 $result .= $this->empty_element_suffix; 872 $result = $this->hash Span($result);824 $result = $this->hashPart($result); 873 825 } 874 826 else { … … 893 845 $result .= $this->empty_element_suffix; 894 846 895 return $this->hash Span($result);847 return $this->hashPart($result); 896 848 } 897 849 … … 905 857 # -------- 906 858 # 907 $text = preg_replace_callback('{ ^(.+?)[ ]*\n=+[ ]*\n+ }mx', 908 array(&$this, '_doHeaders_callback_setext_h1'), $text); 909 $text = preg_replace_callback('{ ^(.+?)[ ]*\n-+[ ]*\n+ }mx', 910 array(&$this, '_doHeaders_callback_setext_h2'), $text); 859 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx', 860 array(&$this, '_doHeaders_callback_setext'), $text); 911 861 912 862 # atx-style headers: … … 929 879 return $text; 930 880 } 931 function _doHeaders_callback_setext_h1($matches) { 932 $block = "<h1>".$this->runSpanGamut($matches[1])."</h1>"; 933 return "\n" . $this->hashBlock($block) . "\n\n"; 934 } 935 function _doHeaders_callback_setext_h2($matches) { 936 $block = "<h2>".$this->runSpanGamut($matches[1])."</h2>"; 881 function _doHeaders_callback_setext($matches) { 882 $level = $matches[2]{0} == '=' ? 1 : 2; 883 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>"; 937 884 return "\n" . $this->hashBlock($block) . "\n\n"; 938 885 } … … 1094 1041 (?:\n\n|\A) 1095 1042 ( # $1 = the code block -- one or more lines, starting with a space/tab 1096 (? :1097 (?:[ ]{'.$this->tab_width.'} | \t)# Lines must start with a tab or a tab-width of spaces1043 (?> 1044 [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces 1098 1045 .*\n+ 1099 1046 )+ … … 1108 1055 $codeblock = $matches[1]; 1109 1056 1110 $codeblock = $this->encodeCode($this->outdent($codeblock)); 1111 // $codeblock = $this->detab($codeblock); 1112 # trim leading newlines and trailing whitespace 1113 $codeblock = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $codeblock); 1114 1115 $result = "\n\n".$this->hashBlock("<pre><code>" . $codeblock . "\n</code></pre>")."\n\n"; 1116 1117 return $result; 1118 } 1119 1120 1121 function doCodeSpans($text) { 1122 # 1123 # * Backtick quotes are used for <code></code> spans. 1124 # 1125 # * You can use multiple backticks as the delimiters if you want to 1126 # include literal backticks in the code span. So, this input: 1127 # 1128 # Just type ``foo `bar` baz`` at the prompt. 1129 # 1130 # Will translate to: 1131 # 1132 # <p>Just type <code>foo `bar` baz</code> at the prompt.</p> 1133 # 1134 # There's no arbitrary limit to the number of backticks you 1135 # can use as delimters. If you need three consecutive backticks 1136 # in your code, use four for delimiters, etc. 1137 # 1138 # * You can use spaces to get literal backticks at the edges: 1139 # 1140 # ... type `` `bar` `` ... 1141 # 1142 # Turns to: 1143 # 1144 # ... type <code>`bar`</code> ... 1145 # 1146 $text = preg_replace_callback('@ 1147 (?<!\\\) # Character before opening ` can\'t be a backslash 1148 (`+) # $1 = Opening run of ` 1149 (.+?) # $2 = The code block 1150 (?<!`) 1151 \1 # Matching closer 1152 (?!`) 1153 @xs', 1154 array(&$this, '_doCodeSpans_callback'), $text); 1155 1156 return $text; 1157 } 1158 function _doCodeSpans_callback($matches) { 1159 $c = $matches[2]; 1160 $c = preg_replace('/^[ ]*/', '', $c); # leading whitespace 1161 $c = preg_replace('/[ ]*$/', '', $c); # trailing whitespace 1162 $c = $this->encodeCode($c); 1163 return $this->hashSpan("<code>$c</code>"); 1164 } 1165 1166 1167 function encodeCode($_) { 1168 # 1169 # Encode/escape certain characters inside Markdown code runs. 1170 # The point is that in code, these characters are literals, 1171 # and lose their special Markdown meanings. 1172 # 1173 # Encode all ampersands; HTML entities are not 1174 # entities within a Markdown code span. 1175 $_ = str_replace('&', '&', $_); 1176 1177 # Do the angle bracket song and dance: 1178 $_ = str_replace(array('<', '>'), 1179 array('<', '>'), $_); 1180 1181 # Now, escape characters that are magic in Markdown: 1182 // $_ = str_replace(array_keys($this->escape_table), 1183 // array_values($this->escape_table), $_); 1184 1185 return $_; 1057 $codeblock = $this->outdent($codeblock); 1058 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 1059 1060 # trim leading newlines and trailing newlines 1061 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock); 1062 1063 $codeblock = "<pre><code>$codeblock\n</code></pre>"; 1064 return "\n\n".$this->hashBlock($codeblock)."\n\n"; 1065 } 1066 1067 1068 function makeCodeSpan($code) { 1069 # 1070 # Create a code span markup for $code. Called from handleSpanToken. 1071 # 1072 $code = htmlspecialchars(trim($code), ENT_NOQUOTES); 1073 return $this->hashPart("<code>$code</code>"); 1186 1074 } 1187 1075 … … 1220 1108 $text = $matches[2]; 1221 1109 $text = $this->runSpanGamut($text); 1222 return $this->hash Span("<em>$text</em>");1110 return $this->hashPart("<em>$text</em>"); 1223 1111 } 1224 1112 function _doItalicAndBold_strong_callback($matches) { 1225 1113 $text = $matches[2]; 1226 1114 $text = $this->runSpanGamut($text); 1227 return $this->hash Span("<strong>$text</strong>");1115 return $this->hashPart("<strong>$text</strong>"); 1228 1116 } 1229 1117 … … 1232 1120 $text = preg_replace_callback('/ 1233 1121 ( # Wrap whole match in $1 1234 ( 1122 (?> 1235 1123 ^[ ]*>[ ]? # ">" at the start of a line 1236 1124 .+\n # rest of the first line … … 1247 1135 $bq = $matches[1]; 1248 1136 # trim one level of quoting - trim whitespace-only lines 1249 $bq = preg_replace( array('/^[ ]*>[ ]?/m', '/^[ ]+$/m'), '', $bq);1137 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); 1250 1138 $bq = $this->runBlockGamut($bq); # recurse 1251 1139 … … 1271 1159 # 1272 1160 # Strip leading and trailing lines: 1273 $text = preg_replace( array('/\A\n+/', '/\n+\z/'), '', $text);1161 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 1274 1162 1275 1163 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 1276 1164 1277 1165 # 1278 # Wrap <p> tags .1166 # Wrap <p> tags and unhashify HTML blocks 1279 1167 # 1280 1168 foreach ($grafs as $key => $value) { 1281 if (!isset( $this->html_blocks[$value] )) { 1169 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { 1170 # Is a paragraph. 1282 1171 $value = $this->runSpanGamut($value); 1283 1172 $value = preg_replace('/^([ ]*)/', "<p>", $value); … … 1285 1174 $grafs[$key] = $this->unhash($value); 1286 1175 } 1287 } 1288 1289 # 1290 # Unhashify HTML blocks 1291 # 1292 foreach ($grafs as $key => $graf) { 1293 # Modify elements of @grafs in-place... 1294 if (isset($this->html_blocks[$graf])) { 1295 $block = $this->html_blocks[$graf]; 1176 else { 1177 # Is a block. 1178 # Modify elements of @grafs in-place... 1179 $graf = $value; 1180 $block = $this->html_hashes[$graf]; 1296 1181 $graf = $block; 1297 1182 // if (preg_match('{ … … 1340 1225 function encodeAmpsAndAngles($text) { 1341 1226 # Smart processing for ampersands and angle brackets that need to be encoded. 1227 if ($this->no_entities) { 1228 $text = str_replace('&', '&', $text); 1229 $text = str_replace('<', '<', $text); 1230 return $text; 1231 } 1342 1232 1343 1233 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: … … 1350 1240 1351 1241 return $text; 1352 }1353 1354 1355 function encodeBackslashEscapes($text) {1356 #1357 # Parameter: String.1358 # Returns: The string, with after processing the following backslash1359 # escape sequences.1360 #1361 # Must process escaped backslashes first.1362 return str_replace(array_keys($this->backslash_escape_table),1363 array_values($this->backslash_escape_table), $text);1364 1242 } 1365 1243 … … 1387 1265 $url = $this->encodeAmpsAndAngles($matches[1]); 1388 1266 $link = "<a href=\"$url\">$url</a>"; 1389 return $this->hash Span($link);1267 return $this->hashPart($link); 1390 1268 } 1391 1269 function _doAutoLinks_email_callback($matches) { 1392 1270 $address = $matches[1]; 1393 1271 $link = $this->encodeEmailAddress($address); 1394 return $this->hash Span($link);1272 return $this->hashPart($link); 1395 1273 } 1396 1274 … … 1437 1315 1438 1316 1439 function tokenizeHTML($str) { 1440 # 1441 # Parameter: String containing HTML + Markdown markup. 1442 # Returns: An array of the tokens comprising the input 1443 # string. Each token is either a tag or a run of text 1444 # between tags. Each element of the array is a 1445 # two-element array; the first is either 'tag' or 'text'; 1446 # the second is the actual value. 1447 # Note: Markdown code spans are taken into account: no tag token is 1448 # generated within a code span. 1449 # 1450 $tokens = array(); 1451 1452 while ($str != "") { 1453 # 1454 # Each loop iteration seach for either the next tag or the next 1455 # openning code span marker. If a code span marker is found, the 1456 # code span is extracted in entierty and will result in an extra 1457 # text token. 1458 # 1459 $parts = preg_split('{ 1317 function parseSpan($str) { 1318 # 1319 # Take the string $str and parse it into tokens, hashing embeded HTML, 1320 # escaped characters and handling code spans. 1321 # 1322 $output = ''; 1323 1324 $regex = '{ 1460 1325 ( 1326 \\\\['.preg_quote($this->escape_chars).'] 1327 | 1461 1328 (?<![`\\\\]) 1462 1329 `+ # code span marker 1330 '.( $this->no_markup ? '' : ' 1463 1331 | 1464 1332 <!-- .*? --> # comment … … 1467 1335 | 1468 1336 <[/!$]?[-a-zA-Z0-9:]+ # regular tags 1469 (? :1337 (?> 1470 1338 \s 1471 1339 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* 1472 1340 )? 1473 1341 > 1342 ').' 1474 1343 ) 1475 }xs', $str, 2, PREG_SPLIT_DELIM_CAPTURE); 1344 }xs'; 1345 1346 while (1) { 1347 # 1348 # Each loop iteration seach for either the next tag, the next 1349 # openning code span marker, or the next escaped character. 1350 # Each token is then passed to handleSpanToken. 1351 # 1352 $parts = preg_split($regex, $str, 2, PREG_SPLIT_DELIM_CAPTURE); 1476 1353 1477 1354 # Create token from text preceding tag. 1478 1355 if ($parts[0] != "") { 1479 $ tokens[] = array('text', $parts[0]);1356 $output .= $parts[0]; 1480 1357 } 1481 1358 1482 1359 # Check if we reach the end. 1483 if (count($parts) < 3) { 1360 if (isset($parts[1])) { 1361 $output .= $this->handleSpanToken($parts[1], $parts[2]); 1362 $str = $parts[2]; 1363 } 1364 else { 1484 1365 break; 1485 1366 } 1486 1487 # Create token from tag or code span. 1488 if ($parts[1]{0} == "`") { 1489 $tokens[] = array('text', $parts[1]); 1490 $str = $parts[2]; 1491 1492 # Skip the whole code span, pass as text token. 1493 if (preg_match('/^(.*(?<!`\\\\)'.$parts[1].'(?!`))(.*)$/sm', 1367 } 1368 1369 return $output; 1370 } 1371 1372 1373 function handleSpanToken($token, &$str) { 1374 # 1375 # Handle $token provided by parseSpan by determining its nature and 1376 # returning the corresponding value that should replace it. 1377 # 1378 switch ($token{0}) { 1379 case "\\": 1380 return $this->hashPart("&#". ord($token{1}). ";"); 1381 case "`": 1382 # Search for end marker in remaining text. 1383 if (preg_match('/^(.*?[^`])'.$token.'(?!`)(.*)$/sm', 1494 1384 $str, $matches)) 1495 1385 { 1496 $tokens[] = array('text', $matches[1]);1497 1386 $str = $matches[2]; 1387 $codespan = $this->makeCodeSpan($matches[1]); 1388 return $this->hashPart($codespan); 1498 1389 } 1499 } else { 1500 $tokens[] = array('tag', $parts[1]); 1501 $str = $parts[2]; 1502 } 1503 } 1504 1505 return $tokens; 1390 return $token; // return as text since no ending marker found. 1391 default: 1392 return $this->hashPart($token); 1393 } 1506 1394 } 1507 1395 … … 1511 1399 # Remove one level of line-leading tabs or spaces 1512 1400 # 1513 return preg_replace( "/^(\\t|[ ]{1,$this->tab_width})/m", "", $text);1401 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text); 1514 1402 } 1515 1403 … … 1527 1415 # appropriate number of space between each blocks. 1528 1416 1417 $text = preg_replace_callback('/^.*\t.*$/m', 1418 array(&$this, '_detab_callback'), $text); 1419 1420 return $text; 1421 } 1422 function _detab_callback($matches) { 1423 $line = $matches[0]; 1529 1424 $strlen = $this->utf8_strlen; # strlen function for UTF-8. 1530 $lines = explode("\n", $text); 1531 $text = ""; 1532 1533 foreach ($lines as $line) { 1534 # Split in blocks. 1535 $blocks = explode("\t", $line); 1536 # Add each blocks to the line. 1537 $line = $blocks[0]; 1538 unset($blocks[0]); # Do not add first block twice. 1539 foreach ($blocks as $block) { 1540 # Calculate amount of space, insert spaces, insert block. 1541 $amount = $this->tab_width - 1542 $strlen($line, 'UTF-8') % $this->tab_width; 1543 $line .= str_repeat(" ", $amount) . $block; 1544 } 1545 $text .= "$line\n"; 1546 } 1547 return $text; 1425 1426 # Split in blocks. 1427 $blocks = explode("\t", $line); 1428 # Add each blocks to the line. 1429 $line = $blocks[0]; 1430 unset($blocks[0]); # Do not add first block twice. 1431 foreach ($blocks as $block) { 1432 # Calculate amount of space, insert spaces, insert block. 1433 $amount = $this->tab_width - 1434 $strlen($line, 'UTF-8') % $this->tab_width; 1435 $line .= str_repeat(" ", $amount) . $block; 1436 } 1437 return $line; 1548 1438 } 1549 1439 function _initDetab() { … … 1565 1455 # Swap back in all the tags hashed by _HashHTMLBlocks. 1566 1456 # 1567 return str_replace(array_keys($this->html_hashes), 1568 array_values($this->html_hashes), $text); 1457 return preg_replace_callback('/(.)\x1A[0-9]+\1/', 1458 array(&$this, '_unhash_callback'), $text); 1459 } 1460 function _unhash_callback($matches) { 1461 return $this->html_hashes[$matches[0]]; 1569 1462 } 1570 1463 … … 1623 1516 var $abbr_desciptions = array(); 1624 1517 var $abbr_matches = array(); 1625 var $html_cleans = array();1626 1518 1627 1519 # Status flag to avoid invalid nesting. … … 1642 1534 $this->abbr_desciptions = array(); 1643 1535 $this->abbr_matches = array(); 1644 $this->html_cleans = array();1645 1536 1646 1537 return parent::transform($text); … … 1777 1668 # after each newline to prevent triggering any block element. 1778 1669 if ($span) { 1779 $void = $this->hash Span("", true);1780 $newline = $this->hashSpan("", true) . "\n";1670 $void = $this->hashPart("", ':'); 1671 $newline = "$void\n"; 1781 1672 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; 1782 1673 } … … 1797 1688 # 1798 1689 if (# Find current paragraph 1799 preg_match('/(?>^\n?|\n\n)((?>. \n?)+?)$/', $parsed, $matches) &&1690 preg_match('/(?>^\n?|\n\n)((?>.+\n?)*?)$/', $parsed, $matches) && 1800 1691 ( 1801 1692 # Then match in it either a code block... … … 2048 1939 # blocking invalid nested overlap. 2049 1940 # 2050 # Swap back any tag hash found in $text so we do not have to `unhash` 2051 # multiple times at the end. 2052 $text = $this->unhash($text); 2053 2054 # Then hash the tag. 2055 $key = "C\x1A". md5($text); 2056 $this->html_cleans[$key] = $text; 2057 $this->html_hashes[$key] = $text; 2058 return $key; # String that will replace the clean tag. 1941 return $this->hashPart($text, 'C'); 2059 1942 } 2060 1943 … … 2072 1955 # 2073 1956 $text = preg_replace_callback( 2074 '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ ]*\n=+[ ]*\n+ }mx', 2075 array(&$this, '_doHeaders_callback_setext_h1'), $text); 2076 $text = preg_replace_callback( 2077 '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ ]*\n-+[ ]*\n+ }mx', 2078 array(&$this, '_doHeaders_callback_setext_h2'), $text); 1957 '{ 1958 (^.+?) # $1: Header text 1959 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # $2: Id attribute 1960 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer 1961 }mx', 1962 array(&$this, '_doHeaders_callback_setext'), $text); 2079 1963 2080 1964 # atx-style headers: … … 2103 1987 return " id=\"$attr\""; 2104 1988 } 2105 function _doHeaders_callback_setext_h1($matches) { 1989 function _doHeaders_callback_setext($matches) { 1990 $level = $matches[3]{0} == '=' ? 1 : 2; 2106 1991 $attr = $this->_doHeaders_attr($id =& $matches[2]); 2107 $block = "<h1$attr>".$this->runSpanGamut($matches[1])."</h1>"; 2108 return "\n" . $this->hashBlock($block) . "\n\n"; 2109 } 2110 function _doHeaders_callback_setext_h2($matches) { 2111 $attr = $this->_doHeaders_attr($id =& $matches[2]); 2112 $block = "<h2$attr>".$this->runSpanGamut($matches[1])."</h2>"; 1992 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>"; 2113 1993 return "\n" . $this->hashBlock($block) . "\n\n"; 2114 1994 } … … 2145 2025 2146 2026 ( # $3: Cells 2147 (? :2027 (?> 2148 2028 [ ]* # Allowed whitespace. 2149 2029 [|] .* \n # Row content. … … 2172 2052 2173 2053 ( # $3: Cells 2174 (? :2054 (?> 2175 2055 .* [|] .* \n # Row content 2176 2056 )* … … 2211 2091 } 2212 2092 2213 # Creating code spans before splitting the row is an easy way to2214 # handle a code span containg pipes.2215 $head = $this->doCodeSpans($head);2093 # Parsing span elements, including code spans, character escapes, 2094 # and inline HTML tags, so that pipes inside those gets ignored. 2095 $head = $this->parseSpan($head); 2216 2096 $headers = preg_split('/ *[|] */', $head); 2217 2097 $col_count = count($headers); … … 2231 2111 $text .= "<tbody>\n"; 2232 2112 foreach ($rows as $row) { 2233 # Creating code spans before splitting the row is an easy way to2234 # handle a code span containg pipes.2235 $row = $this-> doCodeSpans($row);2113 # Parsing span elements, including code spans, character escapes, 2114 # and inline HTML tags, so that pipes inside those gets ignored. 2115 $row = $this->parseSpan($row); 2236 2116 2237 2117 # Split row by cell. … … 2258 2138 2259 2139 # Re-usable pattern to match any entire dl list: 2260 $whole_list = ' 2140 $whole_list = '(?> 2261 2141 ( # $1 = whole list 2262 2142 ( # $2 … … 2283 2163 ) 2284 2164 ) 2285 '; // mx2165 )'; // mx 2286 2166 2287 2167 $text = preg_replace_callback('{ … … 2437 2317 # 2438 2318 # Strip leading and trailing lines: 2439 $text = preg_replace( array('/\A\n+/', '/\n+\z/'), '', $text);2319 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 2440 2320 2441 2321 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); … … 2449 2329 # Check if this should be enclosed in a paragraph. 2450 2330 # Clean tag hashes & block tag hashes are left alone. 2451 $clean_key = $value; 2452 $block_key = substr($value, 0, 34); 2453 2454 $is_p = (!isset($this->html_blocks[$block_key]) && 2455 !isset($this->html_cleans[$clean_key]));