暫無描述

gfm.php 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. <?php
  2. /**
  3. * GitHub-Flavoured Markdown. Inspired by Evan's plugin, but modified.
  4. *
  5. * @author Evan Solomon
  6. * @author Matt Wiebe <wiebe@automattic.com>
  7. * @link https://github.com/evansolomon/wp-github-flavored-markdown-comments
  8. *
  9. * Add a few extras from GitHub's Markdown implementation. Must be used in a WordPress environment.
  10. */
  11. class WPCom_GHF_Markdown_Parser extends MarkdownExtra_Parser {
  12. /**
  13. * Hooray somewhat arbitrary numbers that are fearful of 1.0.x.
  14. */
  15. const WPCOM_GHF_MARDOWN_VERSION = '0.9.0';
  16. /**
  17. * Use a [code] shortcode when encountering a fenced code block
  18. * @var boolean
  19. */
  20. public $use_code_shortcode = true;
  21. /**
  22. * Preserve shortcodes, untouched by Markdown.
  23. * This requires use within a WordPress installation.
  24. * @var boolean
  25. */
  26. public $preserve_shortcodes = true;
  27. /**
  28. * Preserve the legacy $latex your-latex-code-here$ style
  29. * LaTeX markup
  30. */
  31. public $preserve_latex = true;
  32. /**
  33. * Preserve single-line <code> blocks.
  34. * @var boolean
  35. */
  36. public $preserve_inline_code_blocks = true;
  37. /**
  38. * Strip paragraphs from the output. This is the right default for WordPress,
  39. * which generally wants to create its own paragraphs with `wpautop`
  40. * @var boolean
  41. */
  42. public $strip_paras = true;
  43. // Will run through sprintf - you can supply your own syntax if you want
  44. public $shortcode_start = '[code lang=%s]';
  45. public $shortcode_end = '[/code]';
  46. // Stores shortcodes we remove and then replace
  47. protected $preserve_text_hash = array();
  48. /**
  49. * Set environment defaults based on presence of key functions/classes.
  50. */
  51. public function __construct() {
  52. $this->use_code_shortcode = class_exists( 'SyntaxHighlighter' );
  53. /**
  54. * Allow processing shortcode contents.
  55. *
  56. * @module markdown
  57. *
  58. * @since 4.4.0
  59. *
  60. * @param boolean $preserve_shortcodes Defaults to $this->preserve_shortcodes.
  61. */
  62. $this->preserve_shortcodes = apply_filters( 'jetpack_markdown_preserve_shortcodes', $this->preserve_shortcodes ) && function_exists( 'get_shortcode_regex' );
  63. $this->preserve_latex = function_exists( 'latex_markup' );
  64. $this->strip_paras = function_exists( 'wpautop' );
  65. parent::__construct();
  66. }
  67. /**
  68. * Overload to specify heading styles only if the hash has space(s) after it. This is actually in keeping with
  69. * the documentation and eases the semantic overload of the hash character.
  70. * #Will Not Produce a Heading 1
  71. * # This Will Produce a Heading 1
  72. *
  73. * @param string $text Markdown text
  74. * @return string HTML-transformed text
  75. */
  76. public function transform( $text ) {
  77. // Preserve anything inside a single-line <code> element
  78. if ( $this->preserve_inline_code_blocks ) {
  79. $text = $this->single_line_code_preserve( $text );
  80. }
  81. // Remove all shortcodes so their interiors are left intact
  82. if ( $this->preserve_shortcodes ) {
  83. $text = $this->shortcode_preserve( $text );
  84. }
  85. // Remove legacy LaTeX so it's left intact
  86. if ( $this->preserve_latex ) {
  87. $text = $this->latex_preserve( $text );
  88. }
  89. // Do not process characters inside URLs.
  90. $text = $this->urls_preserve( $text );
  91. // escape line-beginning # chars that do not have a space after them.
  92. $text = preg_replace_callback( '|^#{1,6}( )?|um', array( $this, '_doEscapeForHashWithoutSpacing' ), $text );
  93. /**
  94. * Allow third-party plugins to define custom patterns that won't be processed by Markdown.
  95. *
  96. * @module markdown
  97. *
  98. * @since 3.9.2
  99. *
  100. * @param array $custom_patterns Array of custom patterns to be ignored by Markdown.
  101. */
  102. $custom_patterns = apply_filters( 'jetpack_markdown_preserve_pattern', array() );
  103. if ( is_array( $custom_patterns ) && ! empty( $custom_patterns ) ) {
  104. foreach ( $custom_patterns as $pattern ) {
  105. $text = preg_replace_callback( $pattern, array( $this, '_doRemoveText'), $text );
  106. }
  107. }
  108. // run through core Markdown
  109. $text = parent::transform( $text );
  110. // Occasionally Markdown Extra chokes on a para structure, producing odd paragraphs.
  111. $text = str_replace( "<p>&lt;</p>\n\n<p>p>", '<p>', $text );
  112. // put start-of-line # chars back in place
  113. $text = $this->restore_leading_hash( $text );
  114. // Strip paras if set
  115. if ( $this->strip_paras ) {
  116. $text = $this->unp( $text );
  117. }
  118. // Restore preserved things like shortcodes/LaTeX
  119. $text = $this->do_restore( $text );
  120. return $text;
  121. }
  122. /**
  123. * Prevents blocks like <code>__this__</code> from turning into <code><strong>this</strong></code>
  124. * @param string $text Text that may need preserving
  125. * @return string Text that was preserved if needed
  126. */
  127. public function single_line_code_preserve( $text ) {
  128. return preg_replace_callback( '|<code\b[^>]*>(.*?)</code>|', array( $this, 'do_single_line_code_preserve' ), $text );
  129. }
  130. /**
  131. * Regex callback for inline code presevation
  132. * @param array $matches Regex matches
  133. * @return string Hashed content for later restoration
  134. */
  135. public function do_single_line_code_preserve( $matches ) {
  136. return '<code>' . $this->hash_block( $matches[1] ) . '</code>';
  137. }
  138. /**
  139. * Preserve code block contents by HTML encoding them. Useful before getting to KSES stripping.
  140. * @param string $text Markdown/HTML content
  141. * @return string Markdown/HTML content with escaped code blocks
  142. */
  143. public function codeblock_preserve( $text ) {
  144. return preg_replace_callback( "/^([`~]{3})([^`\n]+)?\n([^`~]+)(\\1)/m", array( $this, 'do_codeblock_preserve' ), $text );
  145. }
  146. /**
  147. * Regex callback for code block preservation.
  148. * @param array $matches Regex matches
  149. * @return string Codeblock with escaped interior
  150. */
  151. public function do_codeblock_preserve( $matches ) {
  152. $block = stripslashes( $matches[3] );
  153. $block = esc_html( $block );
  154. $block = str_replace( '\\', '\\\\', $block );
  155. $open = $matches[1] . $matches[2] . "\n";
  156. return $open . $block . $matches[4];
  157. }
  158. /**
  159. * Restore previously preserved (i.e. escaped) code block contents.
  160. * @param string $text Markdown/HTML content with escaped code blocks
  161. * @return string Markdown/HTML content
  162. */
  163. public function codeblock_restore( $text ) {
  164. return preg_replace_callback( "/^([`~]{3})([^`\n]+)?\n([^`~]+)(\\1)/m", array( $this, 'do_codeblock_restore' ), $text );
  165. }
  166. /**
  167. * Regex callback for code block restoration (unescaping).
  168. * @param array $matches Regex matches
  169. * @return string Codeblock with unescaped interior
  170. */
  171. public function do_codeblock_restore( $matches ) {
  172. $block = html_entity_decode( $matches[3], ENT_QUOTES );
  173. $open = $matches[1] . $matches[2] . "\n";
  174. return $open . $block . $matches[4];
  175. }
  176. /**
  177. * Called to preserve legacy LaTeX like $latex some-latex-text $
  178. * @param string $text Text in which to preserve LaTeX
  179. * @return string Text with LaTeX replaced by a hash that will be restored later
  180. */
  181. protected function latex_preserve( $text ) {
  182. // regex from latex_remove()
  183. $regex = '%
  184. \$latex(?:=\s*|\s+)
  185. ((?:
  186. [^$]+ # Not a dollar
  187. |
  188. (?<=(?<!\\\\)\\\\)\$ # Dollar preceded by exactly one slash
  189. )+)
  190. (?<!\\\\)\$ # Dollar preceded by zero slashes
  191. %ix';
  192. $text = preg_replace_callback( $regex, array( $this, '_doRemoveText'), $text );
  193. return $text;
  194. }
  195. /**
  196. * Called to preserve WP shortcodes from being formatted by Markdown in any way.
  197. * @param string $text Text in which to preserve shortcodes
  198. * @return string Text with shortcodes replaced by a hash that will be restored later
  199. */
  200. protected function shortcode_preserve( $text ) {
  201. $text = preg_replace_callback( $this->get_shortcode_regex(), array( $this, '_doRemoveText' ), $text );
  202. return $text;
  203. }
  204. /**
  205. * Avoid characters inside URLs from being formatted by Markdown in any way.
  206. *
  207. * @param string $text Text in which to preserve URLs.
  208. *
  209. * @return string Text with URLs replaced by a hash that will be restored later.
  210. */
  211. protected function urls_preserve( $text ) {
  212. $text = preg_replace_callback(
  213. '#(?<!<)(?:https?|ftp)://([^\s<>"\'\[\]()]+|\[(?1)*+\]|\((?1)*+\))+(?<![_*.?])#i',
  214. array( $this, '_doRemoveText' ),
  215. $text
  216. );
  217. return $text;
  218. }
  219. /**
  220. * Restores any text preserved by $this->hash_block()
  221. * @param string $text Text that may have hashed preservation placeholders
  222. * @return string Text with hashed preseravtion placeholders replaced by original text
  223. */
  224. protected function do_restore( $text ) {
  225. // Reverse hashes to ensure nested blocks are restored.
  226. $hashes = array_reverse( $this->preserve_text_hash, true );
  227. foreach( $hashes as $hash => $value ) {
  228. $placeholder = $this->hash_maker( $hash );
  229. $text = str_replace( $placeholder, $value, $text );
  230. }
  231. // reset the hash
  232. $this->preserve_text_hash = array();
  233. return $text;
  234. }
  235. /**
  236. * Regex callback for text preservation
  237. * @param array $m Regex $matches array
  238. * @return string A placeholder that will later be replaced by the original text
  239. */
  240. protected function _doRemoveText( $m ) {
  241. return $this->hash_block( $m[0] );
  242. }
  243. /**
  244. * Call this to store a text block for later restoration.
  245. * @param string $text Text to preserve for later
  246. * @return string Placeholder that will be swapped out later for the original text
  247. */
  248. protected function hash_block( $text ) {
  249. $hash = md5( $text );
  250. $this->preserve_text_hash[ $hash ] = $text;
  251. $placeholder = $this->hash_maker( $hash );
  252. return $placeholder;
  253. }
  254. /**
  255. * Less glamorous than the Keymaker
  256. * @param string $hash An md5 hash
  257. * @return string A placeholder hash
  258. */
  259. protected function hash_maker( $hash ) {
  260. return 'MARKDOWN_HASH' . $hash . 'MARKDOWN_HASH';
  261. }
  262. /**
  263. * Remove bare <p> elements. <p>s with attributes will be preserved.
  264. * @param string $text HTML content
  265. * @return string <p>-less content
  266. */
  267. public function unp( $text ) {
  268. return preg_replace( "#<p>(.*?)</p>(\n|$)#ums", '$1$2', $text );
  269. }
  270. /**
  271. * A regex of all shortcodes currently registered by the current
  272. * WordPress installation
  273. * @uses get_shortcode_regex()
  274. * @return string A regex for grabbing shortcodes.
  275. */
  276. protected function get_shortcode_regex() {
  277. $pattern = get_shortcode_regex();
  278. // don't match markdown link anchors that could be mistaken for shortcodes.
  279. $pattern .= '(?!\()';
  280. return "/$pattern/s";
  281. }
  282. /**
  283. * Since we escape unspaced #Headings, put things back later.
  284. * @param string $text text with a leading escaped hash
  285. * @return string text with leading hashes unescaped
  286. */
  287. protected function restore_leading_hash( $text ) {
  288. return preg_replace( "/^(<p>)?(&#35;|\\\\#)/um", "$1#", $text );
  289. }
  290. /**
  291. * Overload to support ```-fenced code blocks for pre-Markdown Extra 1.2.8
  292. * https://help.github.com/articles/github-flavored-markdown#fenced-code-blocks
  293. */
  294. public function doFencedCodeBlocks( $text ) {
  295. // If we're at least at 1.2.8, native fenced code blocks are in.
  296. // Below is just copied from it in case we somehow got loaded on
  297. // top of someone else's Markdown Extra
  298. if ( version_compare( MARKDOWNEXTRA_VERSION, '1.2.8', '>=' ) )
  299. return parent::doFencedCodeBlocks( $text );
  300. #
  301. # Adding the fenced code block syntax to regular Markdown:
  302. #
  303. # ~~~
  304. # Code block
  305. # ~~~
  306. #
  307. $less_than_tab = $this->tab_width;
  308. $text = preg_replace_callback('{
  309. (?:\n|\A)
  310. # 1: Opening marker
  311. (
  312. (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
  313. )
  314. [ ]*
  315. (?:
  316. \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
  317. |
  318. '.$this->id_class_attr_catch_re.' # 3: Extra attributes
  319. )?
  320. [ ]* \n # Whitespace and newline following marker.
  321. # 4: Content
  322. (
  323. (?>
  324. (?!\1 [ ]* \n) # Not a closing marker.
  325. .*\n+
  326. )+
  327. )
  328. # Closing marker.
  329. \1 [ ]* (?= \n )
  330. }xm',
  331. array($this, '_doFencedCodeBlocks_callback'), $text);
  332. return $text;
  333. }
  334. /**
  335. * Callback for pre-processing start of line hashes to slyly escape headings that don't
  336. * have a leading space
  337. * @param array $m preg_match matches
  338. * @return string possibly escaped start of line hash
  339. */
  340. public function _doEscapeForHashWithoutSpacing( $m ) {
  341. if ( ! isset( $m[1] ) )
  342. $m[0] = '\\' . $m[0];
  343. return $m[0];
  344. }
  345. /**
  346. * Overload to support Viper's [code] shortcode. Because awesome.
  347. */
  348. public function _doFencedCodeBlocks_callback( $matches ) {
  349. // in case we have some escaped leading hashes right at the start of the block
  350. $matches[4] = $this->restore_leading_hash( $matches[4] );
  351. // just MarkdownExtra_Parser if we're not going ultra-deluxe
  352. if ( ! $this->use_code_shortcode ) {
  353. return parent::_doFencedCodeBlocks_callback( $matches );
  354. }
  355. // default to a "text" class if one wasn't passed. Helps with encoding issues later.
  356. if ( empty( $matches[2] ) ) {
  357. $matches[2] = 'text';
  358. }
  359. $classname =& $matches[2];
  360. $codeblock = preg_replace_callback('/^\n+/', array( $this, '_doFencedCodeBlocks_newlines' ), $matches[4] );
  361. if ( $classname[0] == '.' )
  362. $classname = substr( $classname, 1 );
  363. $codeblock = esc_html( $codeblock );
  364. $codeblock = sprintf( $this->shortcode_start, $classname ) . "\n{$codeblock}" . $this->shortcode_end;
  365. return "\n\n" . $this->hashBlock( $codeblock ). "\n\n";
  366. }
  367. }