Açıklama Yok

class-jetpack-tweetstorm-helper.php 57KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683
  1. <?php
  2. /**
  3. * Tweetstorm block and API helper.
  4. *
  5. * @package automattic/jetpack
  6. * @since 8.7.0
  7. */
  8. use Automattic\Jetpack\Connection\Client;
  9. use Automattic\Jetpack\Status;
  10. use Twitter\Text\Regex as Twitter_Regex;
  11. use Twitter\Text\Validator as Twitter_Validator;
  12. /**
  13. * Class Jetpack_Tweetstorm_Helper
  14. *
  15. * @since 8.7.0
  16. */
  17. class Jetpack_Tweetstorm_Helper {
  18. /**
  19. * Blocks that can be converted to tweets.
  20. *
  21. * @var array {
  22. * The key for each element must match the registered block name.
  23. *
  24. * @type string $type Required. The type of content this block produces. Can be one of 'break', 'embed', 'image',
  25. * 'multiline', 'text', or 'video'.
  26. * @type string $content_location Optional. Where the block content can be found. Can be 'html', if we need to parse
  27. * it out of the block HTML text, 'html-attributes', if the we need to parse it out of HTML attributes
  28. * in the block HTML, or 'block-attributes', if the content can be found in the block attributes.
  29. * Note that these attributes need to be available when the serialised block is
  30. * parsed using `parse_blocks()`. If it isn't set, it's assumed the block doesn't add
  31. * any content to the Twitter thread.
  32. * @type array $content Optional. Defines what parts of the block content need to be extracted. Behaviour can vary based on
  33. * `$content_location`, and `$type`:
  34. *
  35. * - When `$content_location` is 'html', a value of `array()` or `array( 'content' )` have the same meaning:
  36. * The entire block HTML should be used. In both cases, 'content' will be the corresponding tag in `$template`.
  37. * - When `$content_location` is 'html', it should be formatted as `array( 'container' => 'tag' )`,
  38. * where 'container' is the name of the corresponding RichText container in the block editor, and is also the name
  39. * of the corresponding tag in the $template string. 'tag' is the HTML tag within the block that corresponds to this
  40. * container. When `$type` is 'multiline', there must only be one element in the array, and tag should be set to the HTML
  41. * tag that corresponds to each line, though the 'container' should still be the RichText container name. (Eg, in the core/list block, the tag is 'li'.)
  42. * - When `$content_location` is 'html-attributes', the array should be formatted as `array( 'name' => array( 'tag', 'attribute') )`,
  43. * where 'name' is the name of a particular value that different block types require, 'tag' is the name of the HTML tag where 'attribute'
  44. * can be found, containing the value to use for 'name'. When `$type` is 'image', 'url' and 'alt' must be defined. When `$type` is 'video',
  45. * 'url' must be defined.
  46. * - When `$content_location` is 'block-attributes', it must be an array of block attribute names. When `$type` is 'embed', there
  47. * only be one element, corresponding to the URL for the embed.
  48. * @type string $template Required for 'text' and 'multiline' types, ignored for all other types. Describes how the block content will be formatted when tweeted.
  49. * Tags should match the keys of `$content`, except for the special "{{content}}", which matches the entire HTML content of the block.
  50. * For 'multiline' types, the template will be repeated for every line in the block.
  51. * @type boolean $force_new Required. Whether or not a new tweet should be started when this block is encountered.
  52. * @type boolean $force_finished Required. Whether or not a new tweet should be started after this block is finished.
  53. * }
  54. */
  55. private static $supported_blocks = array(
  56. 'core/embed' => array(
  57. 'type' => 'embed',
  58. 'content_location' => 'block-attributes',
  59. 'content' => array( 'url' ),
  60. 'force_new' => false,
  61. 'force_finished' => true,
  62. ),
  63. 'core/gallery' => array(
  64. 'type' => 'image',
  65. 'content_location' => 'html-attributes',
  66. 'content' => array(
  67. 'url' => array( 'img', 'src' ),
  68. 'alt' => array( 'img', 'alt' ),
  69. ),
  70. 'force_new' => false,
  71. 'force_finished' => true,
  72. ),
  73. 'core/heading' => array(
  74. 'type' => 'text',
  75. 'content_location' => 'html',
  76. 'content' => array(),
  77. 'template' => '{{content}}',
  78. 'force_new' => true,
  79. 'force_finished' => false,
  80. ),
  81. 'core/image' => array(
  82. 'type' => 'image',
  83. 'content_location' => 'html-attributes',
  84. 'content' => array(
  85. 'url' => array( 'img', 'src' ),
  86. 'alt' => array( 'img', 'alt' ),
  87. ),
  88. 'force_new' => false,
  89. 'force_finished' => true,
  90. ),
  91. 'core/list' => array(
  92. 'type' => 'multiline',
  93. 'content_location' => 'html',
  94. // It looks a little weird to use the 'values' key for a single line,
  95. // but 'values' is the name of the RichText content area.
  96. 'content' => array(
  97. 'values' => 'li',
  98. ),
  99. 'template' => '- {{values}}',
  100. 'force_new' => false,
  101. 'force_finished' => false,
  102. ),
  103. 'core/paragraph' => array(
  104. 'type' => 'text',
  105. 'content_location' => 'html',
  106. 'content' => array(),
  107. 'template' => '{{content}}',
  108. 'force_new' => false,
  109. 'force_finished' => false,
  110. ),
  111. 'core/quote' => array(
  112. 'type' => 'text',
  113. 'content_location' => 'html',
  114. // The quote content will always be inside <p> tags.
  115. 'content' => array(
  116. 'value' => 'p',
  117. 'citation' => 'cite',
  118. ),
  119. 'template' => '“{{value}}” – {{citation}}',
  120. 'force_new' => false,
  121. 'force_finished' => false,
  122. ),
  123. 'core/separator' => array(
  124. 'type' => 'break',
  125. 'force_new' => false,
  126. 'force_finished' => true,
  127. ),
  128. 'core/spacer' => array(
  129. 'type' => 'break',
  130. 'force_new' => false,
  131. 'force_finished' => true,
  132. ),
  133. 'core/verse' => array(
  134. 'type' => 'text',
  135. 'content_location' => 'html',
  136. 'content' => array(),
  137. 'template' => '{{content}}',
  138. 'force_new' => false,
  139. 'force_finished' => false,
  140. ),
  141. 'core/video' => array(
  142. 'type' => 'video',
  143. 'content_location' => 'html-attributes',
  144. 'content' => array(
  145. 'url' => array( 'video', 'src' ),
  146. ),
  147. 'force_new' => false,
  148. 'force_finished' => true,
  149. ),
  150. 'jetpack/gif' => array(
  151. 'type' => 'embed',
  152. 'content_location' => 'block-attributes',
  153. 'content' => array( 'giphyUrl' ),
  154. 'force_new' => false,
  155. 'force_finished' => true,
  156. ),
  157. );
  158. /**
  159. * A cache of _wp_emoji_list( 'entities' ), after being run through html_entity_decode().
  160. *
  161. * Initialised in ::is_valid_tweet().
  162. *
  163. * @var array
  164. */
  165. private static $emoji_list = array();
  166. /**
  167. * Special line separator character, for multiline text.
  168. *
  169. * @var string
  170. */
  171. private static $line_separator = "\xE2\x80\xA8";
  172. /**
  173. * Special inline placeholder character, for inline tags that change content length in the RichText..
  174. *
  175. * @var string
  176. */
  177. private static $inline_placeholder = "\xE2\x81\xA3";
  178. /**
  179. * URLs always take up a fixed length from the text limit.
  180. *
  181. * @var int
  182. */
  183. private static $characters_per_url = 24;
  184. /**
  185. * Every media attachment takes up some space from the text limit.
  186. *
  187. * @var int
  188. */
  189. private static $characters_per_media = 24;
  190. /**
  191. * An array to store all the tweets in.
  192. *
  193. * @var array
  194. */
  195. private static $tweets = array();
  196. /**
  197. * While we're caching everything, we want to keep track of the URLs we're adding.
  198. *
  199. * @var array
  200. */
  201. private static $urls = array();
  202. /**
  203. * Gather the Tweetstorm.
  204. *
  205. * @param string $url The tweet URL to gather from.
  206. * @return mixed
  207. */
  208. public static function gather( $url ) {
  209. if ( ( new Status() )->is_offline_mode() ) {
  210. return new WP_Error(
  211. 'dev_mode',
  212. __( 'Tweet unrolling is not available in offline mode.', 'jetpack' )
  213. );
  214. }
  215. $site_id = self::get_site_id();
  216. if ( is_wp_error( $site_id ) ) {
  217. return $site_id;
  218. }
  219. if ( defined( 'IS_WPCOM' ) && IS_WPCOM ) {
  220. if ( ! class_exists( 'WPCOM_Gather_Tweetstorm' ) ) {
  221. \jetpack_require_lib( 'gather-tweetstorm' );
  222. }
  223. return WPCOM_Gather_Tweetstorm::gather( $url );
  224. }
  225. $response = Client::wpcom_json_api_request_as_blog(
  226. sprintf( '/sites/%d/tweetstorm/gather?url=%s', $site_id, rawurlencode( $url ) ),
  227. 2,
  228. array( 'headers' => array( 'content-type' => 'application/json' ) ),
  229. null,
  230. 'wpcom'
  231. );
  232. if ( is_wp_error( $response ) ) {
  233. return $response;
  234. }
  235. $data = json_decode( wp_remote_retrieve_body( $response ) );
  236. if ( wp_remote_retrieve_response_code( $response ) >= 400 ) {
  237. return new WP_Error( $data->code, $data->message, $data->data );
  238. }
  239. return $data;
  240. }
  241. /**
  242. * Parse blocks into an array of tweets.
  243. *
  244. * @param array $blocks {
  245. * An array of blocks, with optional editor-specific information, that need to be parsed into tweets.
  246. *
  247. * @type array $block A single block, in the form produce by parse_blocks().
  248. * @type array $attributes Optional. A list of block attributes and their values from the block editor.
  249. * @type string $clientId Optional. The clientId of this block in the block editor.
  250. * }
  251. * @return array An array of tweets.
  252. */
  253. public static function parse( $blocks ) {
  254. // Reset the tweets array.
  255. self::$tweets = array();
  256. $blocks = self::extract_blocks( $blocks );
  257. if ( empty( $blocks ) ) {
  258. return array();
  259. }
  260. // Initialise the tweets array with an empty tweet, so we don't need to check
  261. // if we're creating the first tweet while processing blocks.
  262. self::start_new_tweet();
  263. foreach ( $blocks as $block ) {
  264. $block_def = self::get_block_definition( $block['name'] );
  265. // Grab the most recent tweet.
  266. $current_tweet = self::get_current_tweet();
  267. // Break blocks have no content to add, so we can skip the rest of this loop.
  268. if ( 'break' === $block_def['type'] ) {
  269. self::save_current_tweet( $current_tweet, $block );
  270. continue;
  271. }
  272. // Check if we need to start a new tweet.
  273. if ( $current_tweet['finished'] || $block_def['force_new'] ) {
  274. self::start_new_tweet();
  275. }
  276. // Process the block.
  277. self::add_text_to_tweets( $block );
  278. self::add_media_to_tweets( $block );
  279. self::add_tweet_to_tweets( $block );
  280. self::add_embed_to_tweets( $block );
  281. }
  282. return self::clean_return_tweets();
  283. }
  284. /**
  285. * If the passed block name is supported, return the block definition.
  286. *
  287. * @param string $block_name The registered block name.
  288. * @return array|null The block definition, if it's supported.
  289. */
  290. private static function get_block_definition( $block_name ) {
  291. if ( isset( self::$supported_blocks[ $block_name ] ) ) {
  292. return self::$supported_blocks[ $block_name ];
  293. }
  294. return null;
  295. }
  296. /**
  297. * If the block has any text, process it, and add it to the tweet list.
  298. *
  299. * @param array $block The block to process.
  300. */
  301. private static function add_text_to_tweets( $block ) {
  302. // This is a text block, is there any text?
  303. if ( 0 === strlen( $block['text'] ) ) {
  304. return;
  305. }
  306. $block_def = self::get_block_definition( $block['name'] );
  307. // Grab the most recent tweet, so we can append to that if we can.
  308. $current_tweet = self::get_current_tweet();
  309. // If the entire block can't be fit in this tweet, we need to start a new tweet.
  310. if ( $current_tweet['changed'] && ! self::is_valid_tweet( trim( $current_tweet['text'] ) . "\n\n{$block['text']}" ) ) {
  311. self::start_new_tweet();
  312. }
  313. // Multiline blocks prioritise splitting by line, but are otherwise identical to
  314. // normal text blocks. This means we can treat normal text blocks as being
  315. // "multiline", but with a single line.
  316. if ( 'multiline' === $block_def['type'] ) {
  317. $lines = explode( self::$line_separator, $block['text'] );
  318. } else {
  319. $lines = array( $block['text'] );
  320. }
  321. $line_total = count( $lines );
  322. // Keep track of how many characters from this block we've allocated to tweets.
  323. $current_character_count = 0;
  324. for ( $line_count = 0; $line_count < $line_total; $line_count++ ) {
  325. $line_text = $lines[ $line_count ];
  326. // Make sure we have the most recent tweet at the start of every loop.
  327. $current_tweet = self::get_current_tweet();
  328. if ( $current_tweet['changed'] ) {
  329. // When it's the first line, add an extra blank line to seperate
  330. // the tweet text from that of the previous block.
  331. $separator = "\n\n";
  332. if ( $line_count > 0 ) {
  333. $separator = "\n";
  334. }
  335. // Is this line short enough to append to the current tweet?
  336. if ( self::is_valid_tweet( trim( $current_tweet['text'] ) . "$separator$line_text" ) ) {
  337. // Don't trim the text yet, as we may need it for boundary calculations.
  338. $current_tweet['text'] = $current_tweet['text'] . "$separator$line_text";
  339. self::save_current_tweet( $current_tweet, $block );
  340. continue;
  341. }
  342. // This line is too long, and lines *must* be split to a new tweet if they don't fit
  343. // into the current tweet. If this isn't the first line, record where we split the block.
  344. if ( $line_count > 0 ) {
  345. // Increment by 1 to allow for the \n between lines to be counted by ::get_boundary().
  346. $current_character_count += strlen( $current_tweet['text'] ) + 1;
  347. $current_tweet['boundary'] = self::get_boundary( $block, $current_character_count );
  348. self::save_current_tweet( $current_tweet );
  349. }
  350. // Start a new tweet.
  351. $current_tweet = self::start_new_tweet();
  352. }
  353. // Since we're now at the start of a new tweet, is this line short enough to be a tweet by itself?
  354. if ( self::is_valid_tweet( $line_text ) ) {
  355. $current_tweet['text'] = $line_text;
  356. self::save_current_tweet( $current_tweet, $block );
  357. continue;
  358. }
  359. // The line is too long for a single tweet, so split it by sentences, or linebreaks.
  360. $sentences = preg_split( '/(?|(?<!\.\.\.)(?<=[.?!]|\.\)|\.["\'])(\s+)(?=[\p{L}\'"\(])|(\n+))/u', $line_text, -1, PREG_SPLIT_DELIM_CAPTURE );
  361. $sentence_total = count( $sentences );
  362. // preg_split() puts the blank space between sentences into a seperate entry in the result,
  363. // so we need to step through the result array by two, and append the blank space when needed.
  364. for ( $sentence_count = 0; $sentence_count < $sentence_total; $sentence_count += 2 ) {
  365. $current_sentence = $sentences[ $sentence_count ];
  366. if ( isset( $sentences[ $sentence_count + 1 ] ) ) {
  367. $current_sentence .= $sentences[ $sentence_count + 1 ];
  368. }
  369. // Make sure we have the most recent tweet.
  370. $current_tweet = self::get_current_tweet();
  371. // After the first sentence, we can try and append sentences to the previous sentence.
  372. if ( $current_tweet['changed'] && $sentence_count > 0 ) {
  373. // Is this sentence short enough for appending to the current tweet?
  374. if ( self::is_valid_tweet( $current_tweet['text'] . rtrim( $current_sentence ) ) ) {
  375. $current_tweet['text'] .= $current_sentence;
  376. self::save_current_tweet( $current_tweet, $block );
  377. continue;
  378. }
  379. }
  380. // Will this sentence fit in its own tweet?
  381. if ( self::is_valid_tweet( trim( $current_sentence ) ) ) {
  382. if ( $current_tweet['changed'] ) {
  383. // If we're already in the middle of a block, record the boundary
  384. // before creating a new tweet.
  385. if ( $line_count > 0 || $sentence_count > 0 ) {
  386. $current_character_count += strlen( $current_tweet['text'] );
  387. $current_tweet['boundary'] = self::get_boundary( $block, $current_character_count );
  388. self::save_current_tweet( $current_tweet );
  389. }
  390. $current_tweet = self::start_new_tweet();
  391. }
  392. $current_tweet['text'] = $current_sentence;
  393. self::save_current_tweet( $current_tweet, $block );
  394. continue;
  395. }
  396. // This long sentence will start the next tweet that this block is going
  397. // to be turned into, so we need to record the boundary and start a new tweet.
  398. if ( $current_tweet['changed'] ) {
  399. $current_character_count += strlen( $current_tweet['text'] );
  400. $current_tweet['boundary'] = self::get_boundary( $block, $current_character_count );
  401. self::save_current_tweet( $current_tweet );
  402. $current_tweet = self::start_new_tweet();
  403. }
  404. // Split the long sentence into words.
  405. $words = preg_split( '/(\p{Z})/u', $current_sentence, -1, PREG_SPLIT_DELIM_CAPTURE );
  406. $word_total = count( $words );
  407. for ( $word_count = 0; $word_count < $word_total; $word_count += 2 ) {
  408. // Make sure we have the most recent tweet.
  409. $current_tweet = self::get_current_tweet();
  410. // If we're on a new tweet, we don't want to add a space at the start.
  411. if ( ! $current_tweet['changed'] ) {
  412. $current_tweet['text'] = $words[ $word_count ];
  413. self::save_current_tweet( $current_tweet, $block );
  414. continue;
  415. }
  416. // Can we add this word to the current tweet?
  417. if ( self::is_valid_tweet( "{$current_tweet['text']} {$words[ $word_count ]}…" ) ) {
  418. $space = isset( $words[ $word_count - 1 ] ) ? $words[ $word_count - 1 ] : ' ';
  419. $current_tweet['text'] .= $space . $words[ $word_count ];
  420. self::save_current_tweet( $current_tweet, $block );
  421. continue;
  422. }
  423. // Add one for the space character that we won't include in the tweet text.
  424. $current_character_count += strlen( $current_tweet['text'] ) + 1;
  425. // We're starting a new tweet with this word. Append ellipsis to
  426. // the current tweet, then move on.
  427. $current_tweet['text'] .= '…';
  428. $current_tweet['boundary'] = self::get_boundary( $block, $current_character_count );
  429. self::save_current_tweet( $current_tweet );
  430. $current_tweet = self::start_new_tweet();
  431. // If this is the second tweet created by the split sentence, it'll start
  432. // with ellipsis, which we don't want to count, but we do want to count the space
  433. // that was replaced by this ellipsis.
  434. $current_tweet['text'] = "…{$words[ $word_count ]}";
  435. $current_character_count -= strlen( '…' );
  436. self::save_current_tweet( $current_tweet, $block );
  437. }
  438. }
  439. }
  440. }
  441. /**
  442. * Check if the block has any media to add, and add it.
  443. *
  444. * @param array $block The block to process.
  445. */
  446. private static function add_media_to_tweets( $block ) {
  447. // There's some media to attach!
  448. $media_count = count( $block['media'] );
  449. if ( 0 === $media_count ) {
  450. return;
  451. }
  452. $current_tweet = self::get_current_tweet();
  453. // We can only attach media to the previous tweet if the previous tweet
  454. // doesn't already have media.
  455. if ( count( $current_tweet['media'] ) > 0 ) {
  456. $current_tweet = self::start_new_tweet();
  457. }
  458. // Would adding this media make the text of the previous tweet too long?
  459. if ( ! self::is_valid_tweet( $current_tweet['text'], $media_count * self::$characters_per_media ) ) {
  460. $current_tweet = self::start_new_tweet();
  461. }
  462. $media = array_values(
  463. array_filter(
  464. $block['media'],
  465. function ( $single ) {
  466. // Only images and videos can be uploaded.
  467. if ( 0 === strpos( $single['type'], 'image/' ) || 0 === strpos( $single['type'], 'video/' ) ) {
  468. return true;
  469. }
  470. return false;
  471. }
  472. )
  473. );
  474. if ( count( $media ) > 0 ) {
  475. if ( 0 === strpos( $media[0]['type'], 'video/' ) || 'image/gif' === $media[0]['type'] ) {
  476. // We can only attach a single video or GIF.
  477. $current_tweet['media'] = array_slice( $media, 0, 1 );
  478. } else {
  479. // Since a GIF or video isn't the first element, we can remove all of them from the array.
  480. $filtered_media = array_values(
  481. array_filter(
  482. $media,
  483. function ( $single ) {
  484. if ( 0 === strpos( $single['type'], 'video/' ) || 'image/gif' === $single['type'] ) {
  485. return false;
  486. }
  487. return true;
  488. }
  489. )
  490. );
  491. // We can only add the first four images found to the tweet.
  492. $current_tweet['media'] = array_slice( $filtered_media, 0, 4 );
  493. }
  494. self::save_current_tweet( $current_tweet, $block );
  495. }
  496. }
  497. /**
  498. * Check if the block has a tweet that we can attach to the current tweet as a quote, and add it.
  499. *
  500. * @param array $block The block to process.
  501. */
  502. private static function add_tweet_to_tweets( $block ) {
  503. if ( 0 === strlen( $block['tweet'] ) ) {
  504. return;
  505. }
  506. $current_tweet = self::get_current_tweet();
  507. // We can only attach a tweet to the previous tweet if the previous tweet
  508. // doesn't already have a tweet quoted.
  509. if ( strlen( $current_tweet['tweet'] ) > 0 ) {
  510. $current_tweet = self::start_new_tweet();
  511. }
  512. $current_tweet['tweet'] = $block['tweet'];
  513. self::save_current_tweet( $current_tweet, $block );
  514. }
  515. /**
  516. * Check if the block has an embed URL that we can append to the current tweet text.
  517. *
  518. * @param array $block The block to process.
  519. */
  520. private static function add_embed_to_tweets( $block ) {
  521. if ( 0 === strlen( $block['embed'] ) ) {
  522. return;
  523. }
  524. $current_tweet = self::get_current_tweet();
  525. $reserved_characters = count( $current_tweet['media'] ) * self::$characters_per_media;
  526. $reserved_characters += 1 + self::$characters_per_url;
  527. // We can only attach an embed to the previous tweet if it doesn't already
  528. // have any URLs in it. Also, we can't attach it if it'll make the tweet too long.
  529. if ( preg_match( '/url-placeholder-\d+-*/', $current_tweet['text'] ) || ! self::is_valid_tweet( $current_tweet['text'], $reserved_characters ) ) {
  530. $current_tweet = self::start_new_tweet();
  531. $current_tweet['text'] = self::generate_url_placeholder( $block['embed'] );
  532. } else {
  533. $space = empty( $current_tweet['text'] ) ? '' : ' ';
  534. $current_tweet['text'] .= $space . self::generate_url_placeholder( $block['embed'] );
  535. }
  536. self::save_current_tweet( $current_tweet, $block );
  537. }
  538. /**
  539. * Given an array of blocks and optional editor information, this will extract them into
  540. * the internal representation used during parsing.
  541. *
  542. * @param array $blocks An array of blocks and optional editor-related information.
  543. * @return array An array of blocks, in our internal representation.
  544. */
  545. private static function extract_blocks( $blocks ) {
  546. if ( empty( $blocks ) ) {
  547. return array();
  548. }
  549. $block_count = count( $blocks );
  550. for ( $ii = 0; $ii < $block_count; $ii++ ) {
  551. if ( ! self::get_block_definition( $blocks[ $ii ]['block']['blockName'] ) ) {
  552. unset( $blocks[ $ii ] );
  553. continue;
  554. }
  555. $blocks[ $ii ]['name'] = $blocks[ $ii ]['block']['blockName'];
  556. $blocks[ $ii ]['text'] = self::extract_text_from_block( $blocks[ $ii ]['block'] );
  557. $blocks[ $ii ]['media'] = self::extract_media_from_block( $blocks[ $ii ]['block'] );
  558. $blocks[ $ii ]['tweet'] = self::extract_tweet_from_block( $blocks[ $ii ]['block'] );
  559. $blocks[ $ii ]['embed'] = self::extract_embed_from_block( $blocks[ $ii ]['block'] );
  560. }
  561. return array_values( $blocks );
  562. }
  563. /**
  564. * Creates a blank tweet, appends it to the tweets array, and returns the tweet.
  565. *
  566. * @return array The blank tweet.
  567. */
  568. private static function start_new_tweet() {
  569. self::$tweets[] = array(
  570. // An array of blocks that make up this tweet.
  571. 'blocks' => array(),
  572. // If this tweet only contains part of a block, the boundary contains
  573. // information about where in the block the tweet ends.
  574. 'boundary' => false,
  575. // The text content of the tweet.
  576. 'text' => '',
  577. // The media content of the tweet.
  578. 'media' => array(),
  579. // The quoted tweet in this tweet.
  580. 'tweet' => '',
  581. // Some blocks force a hard finish to the tweet, even if subsequent blocks
  582. // could technically be appended. This flag shows when a tweet is finished.
  583. 'finished' => false,
  584. // Flag if the current tweet already has content in it.
  585. 'changed' => false,
  586. );
  587. return self::get_current_tweet();
  588. }
  589. /**
  590. * Get the last tweet in the array.
  591. *
  592. * @return array The tweet.
  593. */
  594. private static function get_current_tweet() {
  595. return end( self::$tweets );
  596. }
  597. /**
  598. * Saves the passed tweet array as the last tweet, overwriting the former last tweet.
  599. *
  600. * This method adds some last minute checks: marking the tweet as "changed", as well
  601. * as adding the $block to the tweet (if it was passed, and hasn't already been added).
  602. *
  603. * @param array $tweet The tweet being stored.
  604. * @param array $block Optional. The block that was used to modify this tweet.
  605. * @return array The saved tweet, after the last minute checks have been done.
  606. */
  607. private static function save_current_tweet( $tweet, $block = null ) {
  608. $tweet['changed'] = true;
  609. if ( isset( $block ) ) {
  610. $block_def = self::get_block_definition( $block['name'] );
  611. // Check if this block type will be forcing a new tweet.
  612. if ( $block_def['force_finished'] ) {
  613. $tweet['finished'] = true;
  614. }
  615. // Check if this block is already recorded against this tweet.
  616. $last_block = end( $tweet['blocks'] );
  617. if ( isset( $block['clientId'] ) && ( false === $last_block || $last_block['clientId'] !== $block['clientId'] ) ) {
  618. $tweet['blocks'][] = $block;
  619. }
  620. }
  621. // Find the index of the last tweet in the array.
  622. end( self::$tweets );
  623. $tweet_index = key( self::$tweets );
  624. self::$tweets[ $tweet_index ] = $tweet;
  625. return $tweet;
  626. }
  627. /**
  628. * Checks if the passed text is valid for a tweet or not.
  629. *
  630. * @param string $text The text to check.
  631. * @param int $reserved_characters Optional. The number of characters to reduce the maximum tweet length by.
  632. * @return bool Whether or not the text is valid.
  633. */
  634. private static function is_valid_tweet( $text, $reserved_characters = 0 ) {
  635. return self::is_within_twitter_length( $text, 280 - $reserved_characters );
  636. }
  637. /**
  638. * Checks if the passed text is valid for image alt text.
  639. *
  640. * @param string $text The text to check.
  641. * @return bool Whether or not the text is valid.
  642. */
  643. private static function is_valid_alt_text( $text ) {
  644. return self::is_within_twitter_length( $text, 1000 );
  645. }
  646. /**
  647. * Check if a string is shorter than a given length, according to Twitter's rules for counting string length.
  648. *
  649. * @param string $text The text to check.
  650. * @param int $max_length The number of characters long this string can be.
  651. * @return bool Whether or not the string is no longer than the length limit.
  652. */
  653. private static function is_within_twitter_length( $text, $max_length ) {
  654. // Replace all multiline separators with a \n, since that's the
  655. // character we actually want to count.
  656. $text = str_replace( self::$line_separator, "\n", $text );
  657. // Keep a running total of characters we've removed.
  658. $stripped_characters = 0;
  659. // Since we use '…' a lot, strip it out, so we can still use the ASCII checks.
  660. $ellipsis_count = 0;
  661. $text = str_replace( '…', '', $text, $ellipsis_count );
  662. // The ellipsis glyph counts for two characters.
  663. $stripped_characters += $ellipsis_count * 2;
  664. // Try filtering out emoji first, since ASCII text + emoji is a relatively common case.
  665. if ( ! self::is_ascii( $text ) ) {
  666. // Initialise the emoji cache.
  667. if ( 0 === count( self::$emoji_list ) ) {
  668. self::$emoji_list = array_map( 'html_entity_decode', _wp_emoji_list( 'entities' ) );
  669. }
  670. $emoji_count = 0;
  671. $text = str_replace( self::$emoji_list, '', $text, $emoji_count );
  672. // Emoji graphemes count as 2 characters each.
  673. $stripped_characters += $emoji_count * 2;
  674. }
  675. if ( self::is_ascii( $text ) ) {
  676. $stripped_characters += strlen( $text );
  677. if ( $stripped_characters <= $max_length ) {
  678. return true;
  679. }
  680. return false;
  681. }
  682. // Remove any glyphs that count as 1 character.
  683. // Source: https://github.com/twitter/twitter-text/blob/master/config/v3.json .
  684. // Note that the source ranges are in decimal, the regex ranges are converted to hex.
  685. $single_character_count = 0;
  686. $text = preg_replace( '/[\x{0000}-\x{10FF}\x{2000}-\x{200D}\x{2010}-\x{201F}\x{2032}-\x{2037}]/uS', '', $text, -1, $single_character_count );
  687. $stripped_characters += $single_character_count;
  688. // Check if there's any text we haven't counted yet.
  689. // Any remaining glyphs count as 2 characters each.
  690. if ( 0 !== strlen( $text ) ) {
  691. // WP provides a compat version of mb_strlen(), no need to check if it exists.
  692. $stripped_characters += mb_strlen( $text, 'UTF-8' ) * 2;
  693. }
  694. if ( $stripped_characters <= $max_length ) {
  695. return true;
  696. }
  697. return false;
  698. }
  699. /**
  700. * Checks if a string only contains ASCII characters.
  701. *
  702. * @param string $text The string to check.
  703. * @return bool Whether or not the string is ASCII-only.
  704. */
  705. private static function is_ascii( $text ) {
  706. if ( function_exists( 'mb_check_encoding' ) ) {
  707. if ( mb_check_encoding( $text, 'ASCII' ) ) {
  708. return true;
  709. }
  710. } elseif ( ! preg_match( '/[^\x00-\x7F]/', $text ) ) {
  711. return true;
  712. }
  713. return false;
  714. }
  715. /**
  716. * A block will generate a certain amount of text to be inserted into a tweet. If that text is too
  717. * long for a tweet, we already know where the text will be split when it's published as tweet, but
  718. * we need to calculate where that corresponds to in the block edit UI.
  719. *
  720. * The tweet template for that block may add extra characters, extra characters are added for URL
  721. * placeholders, and the block may contain multiple RichText areas (corresponding to attributes),
  722. * so we need to keep track of both until the this function calculates which attribute area (in the
  723. * block editor, the richTextIdentifier) that offset corresponds to, and how far into that attribute
  724. * area it is.
  725. *
  726. * @param array $block The block being checked.
  727. * @param integer $offset The position in the tweet text where it will be split.
  728. * @return array|false `false` if the boundary can't be determined. Otherwise, returns the
  729. * position in the block editor to insert the tweet boundary annotation.
  730. */
  731. private static function get_boundary( $block, $offset ) {
  732. // If we don't have a clientId, there's no point in generating a boundary, since this
  733. // parse request doesn't have a way to map blocks back to editor UI.
  734. if ( ! isset( $block['clientId'] ) ) {
  735. return false;
  736. }
  737. $block_def = self::get_block_definition( $block['name'] );
  738. if ( isset( $block_def['content'] ) && count( $block_def['content'] ) > 0 ) {
  739. $tags = $block_def['content'];
  740. } else {
  741. $tags = array( 'content' );
  742. }
  743. $tag_content = self::extract_tag_content_from_html( $tags, $block['block']['innerHTML'] );
  744. // $tag_content is split up by tag first, then lines. We want to remap it to split it by lines
  745. // first, then tag.
  746. $lines = array();
  747. foreach ( $tag_content as $tag => $content ) {
  748. if ( 'content' === $tag ) {
  749. $attribute_name = 'content';
  750. } else {
  751. $attribute_name = array_search( $tag, $block_def['content'], true );
  752. }
  753. foreach ( $content as $id => $content_string ) {
  754. // Multiline blocks can have multiple lines, but other blocks will always only have 1.
  755. if ( 'multiline' === $block_def['type'] ) {
  756. $line_number = $id;
  757. } else {
  758. $line_number = 0;
  759. }
  760. if ( ! isset( $lines[ $line_number ] ) ) {
  761. $lines[ $line_number ] = array();
  762. }
  763. if ( ! isset( $lines[ $line_number ][ $attribute_name ] ) ) {
  764. // For multiline blocks, or the first time this attribute has been encountered
  765. // in single line blocks, assign the string to the line/attribute.
  766. $lines[ $line_number ][ $attribute_name ] = $content_string;
  767. } else {
  768. // For subsequent times this line/attribute is encountered (only in single line blocks),
  769. // append the string with a line break.
  770. $lines[ $line_number ][ $attribute_name ] .= "\n$content_string";
  771. }
  772. }
  773. }
  774. $line_count = count( $lines );
  775. $template_parts = preg_split( '/({{\w+}})/', $block_def['template'], -1, PREG_SPLIT_DELIM_CAPTURE );
  776. // Keep track of the total number of bytes we've processed from this block.
  777. $total_bytes_processed = 0;
  778. // Keep track of the number of characters that the processed data translates to in the editor.
  779. $characters_processed = 0;
  780. foreach ( $lines as $line_number => $line ) {
  781. // Add up the length of all the parts of this line.
  782. $line_byte_total = array_sum( array_map( 'strlen', $line ) );
  783. if ( $line_byte_total > 0 ) {
  784. // We have something to use in the template, so loop over each part of the template, and count it.
  785. foreach ( $template_parts as $template_part ) {
  786. $matches = array();
  787. if ( preg_match( '/{{(\w+)}}/', $template_part, $matches ) ) {
  788. $part_name = $matches[1];
  789. $line_part_data = $line[ $part_name ];
  790. $line_part_bytes = strlen( $line_part_data );
  791. $cleaned_line_part_data = preg_replace( '/ \(url-placeholder-\d+-*\)/', '', $line_part_data );
  792. $cleaned_line_part_data = preg_replace_callback(
  793. '/url-placeholder-(\d+)-*/',
  794. function ( $matches ) {
  795. return self::$urls[ $matches[1] ];
  796. },
  797. $cleaned_line_part_data
  798. );
  799. if ( $total_bytes_processed + $line_part_bytes >= $offset ) {
  800. // We know that the offset is somewhere inside this part of the tweet, but we need to remove the length
  801. // of any URL placeholders that appear before the boundary, to be able to calculate the correct attribute offset.
  802. // $total_bytes_processed is the sum of everything we've processed so far, (including previous parts)
  803. // on this line. This makes it relatively easy to calculate the number of bytes into this part
  804. // that the boundary will occur.
  805. $line_part_byte_boundary = $offset - $total_bytes_processed;
  806. // Grab the data from this line part that appears before the boundary.
  807. $line_part_pre_boundary_data = substr( $line_part_data, 0, $line_part_byte_boundary );
  808. // Remove any URL placeholders, since these aren't shown in the editor.
  809. $line_part_pre_boundary_data = preg_replace( '/ \(url-placeholder-\d+-*\)/', '', $line_part_pre_boundary_data );
  810. $line_part_pre_boundary_data = preg_replace_callback(
  811. '/url-placeholder-(\d+)-*/',
  812. function ( $matches ) {
  813. return self::$urls[ $matches[1] ];
  814. },
  815. $line_part_pre_boundary_data
  816. );
  817. $boundary_start = self::utf_16_code_unit_length( $line_part_pre_boundary_data ) - 1;
  818. // Multiline blocks need to offset for the characters that are in the same content area,
  819. // but which were counted on previous lines.
  820. if ( 'multiline' === $block_def['type'] ) {
  821. $boundary_start += $characters_processed;
  822. }
  823. // Check if the boundary is happening on a line break or a space.
  824. if ( "\n" === $line_part_data[ $line_part_byte_boundary - 1 ] ) {
  825. $type = 'line-break';
  826. // A line break boundary can actually be multiple consecutive line breaks,
  827. // count them all up so we know how big the annotation needs to be.
  828. $matches = array();
  829. preg_match( '/\n+$/', substr( $line_part_data, 0, $line_part_byte_boundary ), $matches );
  830. $boundary_end = $boundary_start + 1;
  831. $boundary_start -= strlen( $matches[0] ) - 1;
  832. } else {
  833. $type = 'normal';
  834. $boundary_end = $boundary_start + 1;
  835. }
  836. return array(
  837. 'start' => $boundary_start,
  838. 'end' => $boundary_end,
  839. 'container' => $part_name,
  840. 'type' => $type,
  841. );
  842. } else {
  843. $total_bytes_processed += $line_part_bytes;
  844. $characters_processed += self::utf_16_code_unit_length( $cleaned_line_part_data );
  845. continue;
  846. }
  847. } else {
  848. $total_bytes_processed += strlen( $template_part );
  849. }
  850. }
  851. // Are we breaking at the end of this line?
  852. if ( $total_bytes_processed + 1 === $offset && $line_count > 1 ) {
  853. reset( $block_def['content'] );
  854. $container = key( $block_def['content'] );
  855. return array(
  856. 'line' => $line_number,
  857. 'container' => $container,
  858. 'type' => 'end-of-line',
  859. );
  860. }
  861. // The newline at the end of each line is 1 byte, but we don't need to count empty lines.
  862. $total_bytes_processed++;
  863. }
  864. // We do need to count empty lines in the editor, since they'll be displayed.
  865. $characters_processed++;
  866. }
  867. return false;
  868. }
  869. /**
  870. * JavaScript uses UTF-16 for encoding strings, which means we need to provide UTF-16
  871. * based offsets for the block editor to render tweet boundaries in the correct location.
  872. *
  873. * UTF-16 is a variable-width character encoding: every code unit is 2 bytes, a single character
  874. * can be one or two code units long. Fortunately for us, JavaScript's String.charAt() is based
  875. * on the older UCS-2 character encoding, which only counts single code units. PHP's strlen()
  876. * counts a code unit as being 2 characters, so once a string is converted to UTF-16, we have
  877. * a fast way to determine how long it is in UTF-16 code units.
  878. *
  879. * @param string $text The natively encoded string to get the length of.
  880. * @return int The length of the string in UTF-16 code units. Returns -1 if the length could not
  881. * be calculated.
  882. */
  883. private static function utf_16_code_unit_length( $text ) {
  884. // If mb_convert_encoding() exists, we can use that for conversion.
  885. if ( function_exists( 'mb_convert_encoding' ) ) {
  886. // UTF-16 can add an additional code unit to the start of the string, called the
  887. // Byte Order Mark (BOM), which indicates whether the string is encoding as
  888. // big-endian, or little-endian. Since we don't want to count code unit, and the endianness
  889. // doesn't matter for our purposes, using PHP's UTF-16BE encoding uses big-endian
  890. // encoding, and ensures the BOM *won't* be prepended to the string to the string.
  891. return strlen( mb_convert_encoding( $text, 'UTF-16BE' ) ) / 2;
  892. }
  893. // If we can't convert this string, return a result that will avoid an incorrect annotation being added.
  894. return -1;
  895. }
  896. /**
  897. * Extracts the tweetable text from a block.
  898. *
  899. * @param array $block A single block, as generated by parse_block().
  900. * @return string The tweetable text from the block, in the correct template form.
  901. */
  902. private static function extract_text_from_block( $block ) {
  903. // If the block doesn't have an innerHTMl, we're not going to get any text.
  904. if ( empty( $block['innerHTML'] ) ) {
  905. return '';
  906. }
  907. $block_def = self::get_block_definition( $block['blockName'] );
  908. // We currently only support extracting text from HTML text nodes.
  909. if ( ! isset( $block_def['content_location'] ) || 'html' !== $block_def['content_location'] ) {
  910. return '';
  911. }
  912. // Find out which tags we need to extract content from.
  913. if ( isset( $block_def['content'] ) && count( $block_def['content'] ) > 0 ) {
  914. $tags = $block_def['content'];
  915. } else {
  916. $tags = array( 'content' );
  917. }
  918. $tag_values = self::extract_tag_content_from_html( $tags, $block['innerHTML'] );
  919. // We can treat single line blocks as "multiline", with only one line in them.
  920. $lines = array();
  921. foreach ( $tag_values as $tag => $values ) {
  922. // For single-line blocks, we need to squash all the values for this tag into a single value.
  923. if ( 'multiline' !== $block_def['type'] ) {
  924. $values = array( implode( "\n", $values ) );
  925. }
  926. // Handle the special "content" tag.
  927. if ( 'content' === $tag ) {
  928. $placeholder = 'content';
  929. } else {
  930. $placeholder = array_search( $tag, $block_def['content'], true );
  931. }
  932. // Loop over each instance of this value, appling that value to the corresponding line template.
  933. foreach ( $values as $line_number => $value ) {
  934. if ( ! isset( $lines[ $line_number ] ) ) {
  935. $lines[ $line_number ] = $block_def['template'];
  936. }
  937. $lines[ $line_number ] = str_replace( '{{' . $placeholder . '}}', $value, $lines[ $line_number ] );
  938. }
  939. }
  940. // Remove any lines that didn't apply any content.
  941. $empty_template = preg_replace( '/{{.*?}}/', '', $block_def['template'] );
  942. $lines = array_filter(
  943. $lines,
  944. function ( $line ) use ( $empty_template ) {
  945. return $line !== $empty_template;
  946. }
  947. );
  948. // Join the lines together into a single string.
  949. $text = implode( self::$line_separator, $lines );
  950. // Trim off any trailing whitespace that we no longer need.
  951. $text = preg_replace( '/(\s|' . self::$line_separator . ')+$/u', '', $text );
  952. return $text;
  953. }
  954. /**
  955. * Extracts the tweetable media from a block.
  956. *
  957. * @param array $block A single block, as generated by parse_block().
  958. * @return array {
  959. * An array of media.
  960. *
  961. * @type string url The URL of the media.
  962. * @type string alt The alt text of the media.
  963. * }
  964. */
  965. private static function extract_media_from_block( $block ) {
  966. $block_def = self::get_block_definition( $block['blockName'] );
  967. $media = array();
  968. if ( 'image' === $block_def['type'] ) {
  969. $url = self::extract_attr_content_from_html(
  970. $block_def['content']['url'][0],
  971. $block_def['content']['url'][1],
  972. $block['innerHTML']
  973. );
  974. $alt = self::extract_attr_content_from_html(
  975. $block_def['content']['alt'][0],
  976. $block_def['content']['alt'][1],
  977. $block['innerHTML']
  978. );
  979. $img_count = count( $url );
  980. for ( $ii = 0; $ii < $img_count; $ii++ ) {
  981. $filedata = wp_check_filetype( basename( wp_parse_url( $url[ $ii ], PHP_URL_PATH ) ) );
  982. $media[] = array(
  983. 'url' => $url[ $ii ],
  984. 'alt' => self::is_valid_alt_text( $alt[ $ii ] ) ? $alt[ $ii ] : '',
  985. 'type' => $filedata['type'],
  986. );
  987. }
  988. } elseif ( 'video' === $block_def['type'] ) {
  989. // Handle VideoPress videos.
  990. if ( isset( $block['attrs']['src'] ) && 0 === strpos( $block['attrs']['src'], 'https://videos.files.wordpress.com/' ) ) {
  991. $url = array( $block['attrs']['src'] );
  992. } else {
  993. $url = self::extract_attr_content_from_html(
  994. $block_def['content']['url'][0],
  995. $block_def['content']['url'][1],
  996. $block['innerHTML']
  997. );
  998. }
  999. // We can only ever use the first video found, no need to go through all of them.
  1000. if ( count( $url ) > 0 ) {
  1001. $filedata = wp_check_filetype( basename( wp_parse_url( $url[0], PHP_URL_PATH ) ) );
  1002. $media[] = array(
  1003. 'url' => $url[0],
  1004. 'type' => $filedata['type'],
  1005. );
  1006. }
  1007. }
  1008. return $media;
  1009. }
  1010. /**
  1011. * Extracts the tweet URL from a Twitter embed block.
  1012. *
  1013. * @param array $block A single block, as generated by parse_block().
  1014. * @return string The tweet URL. Empty string if there is none available.
  1015. */
  1016. private static function extract_tweet_from_block( $block ) {
  1017. if (
  1018. 'core/embed' === $block['blockName']
  1019. && ( isset( $block['attrs']['providerNameSlug'] ) && 'twitter' === $block['attrs']['providerNameSlug'] )
  1020. ) {
  1021. return $block['attrs']['url'];
  1022. }
  1023. return '';
  1024. }
  1025. /**
  1026. * Extracts URL from an embed block.
  1027. *
  1028. * @param array $block A single block, as generated by parse_block().
  1029. * @return string The URL. Empty string if there is none available.
  1030. */
  1031. private static function extract_embed_from_block( $block ) {
  1032. $block_def = self::get_block_definition( $block['blockName'] );
  1033. if ( 'embed' !== $block_def['type'] ) {
  1034. return '';
  1035. }
  1036. // Twitter embeds are handled in ::extract_tweet_from_block().
  1037. if (
  1038. 'core/embed' === $block['blockName']
  1039. && ( isset( $block['attrs']['providerNameSlug'] ) && 'twitter' === $block['attrs']['providerNameSlug'] )
  1040. ) {
  1041. return '';
  1042. }
  1043. $url = '';
  1044. if ( 'block-attributes' === $block_def['content_location'] ) {
  1045. $url = $block['attrs'][ $block_def['content'][0] ];
  1046. }
  1047. if ( 'jetpack/gif' === $block['blockName'] ) {
  1048. $url = str_replace( '/embed/', '/gifs/', $url );
  1049. }
  1050. return $url;
  1051. }
  1052. /**
  1053. * There's a bunch of left-over cruft in the tweets array that we don't need to return. Removing
  1054. * it helps keep the size of the data down.
  1055. */
  1056. private static function clean_return_tweets() {
  1057. // Before we return, clean out unnecessary cruft from the return data.
  1058. $tweets = array_map(
  1059. function ( $tweet ) {
  1060. // Remove tweets that don't have anything saved in them. eg, if the last block is a
  1061. // header with no text, it'll force a new tweet, but we won't end up putting anything
  1062. // in that tweet.
  1063. if ( ! $tweet['changed'] ) {
  1064. return false;
  1065. }
  1066. // Replace any URL placeholders that appear in the text.
  1067. $tweet['urls'] = array();
  1068. foreach ( self::$urls as $id => $url ) {
  1069. $count = 0;
  1070. $tweet['text'] = str_replace( str_pad( "url-placeholder-$id", self::$characters_per_url, '-' ), $url, $tweet['text'], $count );
  1071. // If we found a URL, keep track of it for the editor.
  1072. if ( $count > 0 ) {
  1073. $tweet['urls'][] = $url;
  1074. }
  1075. }
  1076. // Remove any inline placeholders.
  1077. $tweet['text'] = str_replace( self::$inline_placeholder, '', $tweet['text'] );
  1078. // If the tweet text consists only of whitespace, we can remove all of it.
  1079. if ( preg_match( '/^\s*$/u', $tweet['text'] ) ) {
  1080. $tweet['text'] = '';
  1081. }
  1082. // Remove trailing whitespace from every line.
  1083. $tweet['text'] = preg_replace( '/\p{Z}+$/um', '', $tweet['text'] );
  1084. // Remove all trailing whitespace (including line breaks) from the end of the text.
  1085. $tweet['text'] = rtrim( $tweet['text'] );
  1086. // Remove internal flags.
  1087. unset( $tweet['changed'] );
  1088. unset( $tweet['finished'] );
  1089. // Remove bulky block data.
  1090. if ( ! isset( $tweet['blocks'][0]['attributes'] ) && ! isset( $tweet['blocks'][0]['clientId'] ) ) {
  1091. $tweet['blocks'] = array();
  1092. } else {
  1093. // Remove the parts of the block data that the editor doesn't need.
  1094. $block_count = count( $tweet['blocks'] );
  1095. for ( $ii = 0; $ii < $block_count; $ii++ ) {
  1096. $keys = array_keys( $tweet['blocks'][ $ii ] );
  1097. foreach ( $keys as $key ) {
  1098. // The editor only needs these attributes, everything else will be unset.
  1099. if ( in_array( $key, array( 'attributes', 'clientId' ), true ) ) {
  1100. continue;
  1101. }
  1102. unset( $tweet['blocks'][ $ii ][ $key ] );
  1103. }
  1104. }
  1105. }
  1106. // Once we've finished cleaning up, check if there's anything left to be tweeted.
  1107. if ( empty( $tweet['text'] ) && empty( $tweet['media'] ) && empty( $tweet['tweet'] ) ) {
  1108. return false;
  1109. }
  1110. return $tweet;
  1111. },
  1112. self::$tweets
  1113. );
  1114. // Clean any removed tweets out of the result.
  1115. return array_values( array_filter( $tweets, 'is_array' ) );
  1116. }
  1117. /**
  1118. * Given a list of tags and a HTML blob, this will extract the text content inside
  1119. * each of the given tags.
  1120. *
  1121. * @param array $tags An array of tag names.
  1122. * @param string $html A blob of HTML.
  1123. * @return array An array of the extract content. The keys in the array are the $tags,
  1124. * each value is an array. The value array is indexed in the same order as the tag
  1125. * appears in the HTML blob, including nested tags.
  1126. */
  1127. private static function extract_tag_content_from_html( $tags, $html ) {
  1128. // Serialised blocks will sometimes wrap the innerHTML in newlines, but those newlines
  1129. // are removed when innerHTML is parsed into an attribute. Remove them so we're working
  1130. // with the same information.
  1131. if ( "\n" === $html[0] && "\n" === $html[ strlen( $html ) - 1 ] ) {
  1132. $html = substr( $html, 1, strlen( $html ) - 2 );
  1133. }
  1134. // Normalise <br>.
  1135. $html = preg_replace( '/<br\s*\/?>/', '<br>', $html );
  1136. // If there were no tags passed, assume the entire text is required.
  1137. if ( empty( $tags ) ) {
  1138. $tags = array( 'content' );
  1139. }
  1140. $values = array();
  1141. $tokens = wp_html_split( $html );
  1142. $validator = new Twitter_Validator();
  1143. foreach ( $tags as $tag ) {
  1144. $values[ $tag ] = array();
  1145. // Since tags can be nested, keeping track of the nesting level allows
  1146. // us to extract nested content into a flat array.
  1147. if ( 'content' === $tag ) {
  1148. // The special "content" tag means we should store the entire content,
  1149. // so assume the tag is open from the beginning.
  1150. $opened = 0;
  1151. $closed = -1;
  1152. $values['content'][0] = '';
  1153. } else {
  1154. $opened = -1;
  1155. $closed = -1;
  1156. }
  1157. // When we come across a URL, we need to keep track of it, so it can then be inserted
  1158. // in the right place.
  1159. $current_url = '';
  1160. foreach ( $tokens as $token ) {
  1161. if ( 0 === strlen( $token ) ) {
  1162. // Skip any empty tokens.
  1163. continue;
  1164. }
  1165. // If we're currently storing content, check if it's a text-formatting
  1166. // tag that we should apply.
  1167. if ( $opened !== $closed ) {
  1168. // End of a paragraph, put in some newlines (as long as we're not extracting paragraphs).
  1169. if ( '</p>' === $token && 'p' !== $tag ) {
  1170. $values[ $tag ][ $opened ] .= "\n\n";
  1171. }
  1172. // A line break gets one newline.
  1173. if ( '<br>' === $token ) {
  1174. $values[ $tag ][ $opened ] .= "\n";
  1175. }
  1176. // A link has opened, grab the URL for inserting later.
  1177. if ( 0 === strpos( $token, '<a ' ) ) {
  1178. $href_values = self::extract_attr_content_from_html( 'a', 'href', $token );
  1179. if ( ! empty( $href_values[0] ) && $validator->isValidURL( $href_values[0] ) ) {
  1180. // Remember the URL.
  1181. $current_url = $href_values[0];
  1182. }
  1183. }
  1184. // A link has closed, insert the URL from that link if we have one.
  1185. if ( '</a>' === $token && '' !== $current_url ) {
  1186. // Generate a unique-to-this-block placeholder which takes up the
  1187. // same number of characters as a URL does.
  1188. $values[ $tag ][ $opened ] .= ' (' . self::generate_url_placeholder( $current_url ) . ')';
  1189. $current_url = '';
  1190. }
  1191. // We don't return inline images, but they technically take up 1 character in the RichText.
  1192. if ( 0 === strpos( $token, '<img ' ) ) {
  1193. $values[ $tag ][ $opened ] .= self::$inline_placeholder;
  1194. }
  1195. }
  1196. if ( "<$tag>" === $token || 0 === strpos( $token, "<$tag " ) ) {
  1197. // A tag has just been opened.
  1198. $opened++;
  1199. // Set an empty value now, so we're keeping track of empty tags.
  1200. if ( ! isset( $values[ $tag ][ $opened ] ) ) {
  1201. $values[ $tag ][ $opened ] = '';
  1202. }
  1203. continue;
  1204. }
  1205. if ( "</$tag>" === $token ) {
  1206. // The tag has been closed.
  1207. $closed++;
  1208. continue;
  1209. }
  1210. if ( '<' === $token[0] ) {
  1211. // We can skip any other tags.
  1212. continue;
  1213. }
  1214. if ( $opened !== $closed ) {
  1215. // We're currently in a tag, with some content. Start by decoding any HTML entities.
  1216. $token = html_entity_decode( $token, ENT_QUOTES );
  1217. // Find any URLs in this content, and replace them with a placeholder.
  1218. preg_match_all( Twitter_Regex::getValidUrlMatcher(), $token, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE );
  1219. $offset = 0;
  1220. foreach ( $matches as $match ) {
  1221. list( $url, $start ) = $match[2];
  1222. $token = substr_replace( $token, self::generate_url_placeholder( $url ), $start + $offset, strlen( $url ) );
  1223. $offset += self::$characters_per_url - strlen( $url );
  1224. // If we're in a link with a URL set, there's no need to keep two copies of the same link.
  1225. if ( ! empty( $current_url ) ) {
  1226. $lower_url = strtolower( $url );
  1227. $lower_current_url = strtolower( $current_url );
  1228. if ( $lower_url === $lower_current_url ) {
  1229. $current_url = '';
  1230. }
  1231. // Check that the link text isn't just a shortened version of the href value.
  1232. $trimmed_current_url = preg_replace( '|^https?://|', '', $lower_current_url );
  1233. if ( $lower_url === $trimmed_current_url || trim( $trimmed_current_url, '/' ) === $lower_url ) {
  1234. $current_url = '';
  1235. }
  1236. }
  1237. }
  1238. // Append it to the right value.
  1239. $values[ $tag ][ $opened ] .= $token;
  1240. }
  1241. }
  1242. }
  1243. return $values;
  1244. }
  1245. /**
  1246. * Extracts the attribute content from a tag.
  1247. *
  1248. * This method allows for the HTML to have multiple instances of the tag, and will return
  1249. * an array containing the attribute value (or an empty string, if the tag doesn't have the
  1250. * requested attribute) for each occurrence of the tag.
  1251. *
  1252. * @param string $tag The tag we're looking for.
  1253. * @param string $attr The name of the attribute we're looking for.
  1254. * @param string $html The HTML we're searching through.
  1255. * @param array $attr_filters Optional. Filters tags based on whether or not they have attributes with given values.
  1256. * @return array The array of attribute values found.
  1257. */
  1258. private static function extract_attr_content_from_html( $tag, $attr, $html, $attr_filters = array() ) {
  1259. // Given our single tag and attribute, construct a KSES filter for it.
  1260. $kses_filter = array(
  1261. $tag => array(
  1262. $attr => array(),
  1263. ),
  1264. );
  1265. foreach ( $attr_filters as $filter_attr => $filter_value ) {
  1266. $kses_filter[ $tag ][ $filter_attr ] = array();
  1267. }
  1268. // Remove all HTML except for the tag we're after. On that tag,
  1269. // remove all attributes except for the one we're after.
  1270. $stripped_html = wp_kses( $html, $kses_filter );
  1271. $values = array();
  1272. $tokens = wp_html_split( $stripped_html );
  1273. foreach ( $tokens as $token ) {
  1274. $found_value = '';
  1275. if ( 0 === strlen( $token ) ) {
  1276. // Skip any empty tokens.
  1277. continue;
  1278. }
  1279. if ( '<' !== $token[0] ) {
  1280. // We can skip any non-tag tokens.
  1281. continue;
  1282. }
  1283. $token_attrs = wp_kses_attr_parse( $token );
  1284. // Skip tags that KSES couldn't handle.
  1285. if ( false === $token_attrs ) {
  1286. continue;
  1287. }
  1288. // Remove the tag open and close chunks.
  1289. $found_tag = array_shift( $token_attrs );
  1290. array_pop( $token_attrs );
  1291. // We somehow got a tag that isn't the one we're after. Skip it.
  1292. if ( 0 !== strpos( $found_tag, "<$tag " ) ) {
  1293. continue;
  1294. }
  1295. // We can only fail an attribute filter if one is set.
  1296. $passed_filter = count( $attr_filters ) === 0;
  1297. foreach ( $token_attrs as $token_attr_string ) {
  1298. // The first "=" in the string will be between the attribute name/value.
  1299. list( $token_attr_name, $token_attr_value ) = explode( '=', $token_attr_string, 2 );
  1300. $token_attr_name = trim( $token_attr_name );
  1301. $token_attr_value = trim( $token_attr_value );
  1302. // Remove a single set of quotes from around the value.
  1303. if ( '' !== $token_attr_value && in_array( $token_attr_value[0], array( '"', "'" ), true ) ) {
  1304. $token_attr_value = trim( $token_attr_value, $token_attr_value[0] );
  1305. }
  1306. // If this is the attribute we're after, save the value for the end of the loop.
  1307. if ( $token_attr_name === $attr ) {
  1308. $found_value = $token_attr_value;
  1309. }
  1310. if ( isset( $attr_filters[ $token_attr_name ] ) && $attr_filters[ $token_attr_name ] === $token_attr_value ) {
  1311. $passed_filter = true;
  1312. }
  1313. }
  1314. if ( $passed_filter ) {
  1315. // We always want to append the found value, even if we didn't "find" a matching attribute.
  1316. // An empty string in the return value means that we found the tag, but the attribute was
  1317. // either empty, or not set.
  1318. $values[] = html_entity_decode( $found_value, ENT_QUOTES );
  1319. }
  1320. }
  1321. return $values;
  1322. }
  1323. /**
  1324. * Generates a placeholder for URLs, using the appropriate number of characters to imitate how
  1325. * Twitter counts the length of URLs in tweets.
  1326. *
  1327. * @param string $url The URL to generate a placeholder for.
  1328. * @return string The placeholder.
  1329. */
  1330. public static function generate_url_placeholder( $url ) {
  1331. self::$urls[] = $url;
  1332. return str_pad( 'url-placeholder-' . ( count( self::$urls ) - 1 ), self::$characters_per_url, '-' );
  1333. }
  1334. /**
  1335. * Retrieves the Twitter card data for a list of URLs.
  1336. *
  1337. * @param array $urls The list of URLs to grab Twitter card data for.
  1338. * @return array The Twitter card data.
  1339. */
  1340. public static function generate_cards( $urls ) {
  1341. $validator = new Twitter_Validator();
  1342. $requests = array_map(
  1343. function ( $url ) use ( $validator ) {
  1344. if ( $validator->isValidURL( $url ) ) {
  1345. return array(
  1346. 'url' => $url,
  1347. );
  1348. }
  1349. return false;
  1350. },
  1351. $urls
  1352. );
  1353. $requests = array_filter( $requests );
  1354. $results = Requests::request_multiple( $requests );
  1355. $card_data = array(
  1356. 'creator' => array(
  1357. 'name' => 'twitter:creator',
  1358. ),
  1359. 'description' => array(
  1360. 'name' => 'twitter:description',
  1361. 'property' => 'og:description',
  1362. ),
  1363. 'image' => array(
  1364. 'name' => 'twitter:image',
  1365. 'property' => 'og:image:secure',
  1366. 'property' => 'og:image',
  1367. ),
  1368. 'title' => array(
  1369. 'name' => 'twitter:text:title',
  1370. 'property' => 'og:title',
  1371. ),
  1372. 'type' => array(
  1373. 'name' => 'twitter:card',
  1374. ),
  1375. );
  1376. $cards = array();
  1377. foreach ( $results as $id => $result ) {
  1378. $url = $requests[ $id ]['url'];
  1379. if ( ! $result->success ) {
  1380. $cards[ $url ] = array(
  1381. 'error' => 'invalid_url',
  1382. );
  1383. continue;
  1384. }
  1385. $url_card_data = array();
  1386. foreach ( $card_data as $key => $filters ) {
  1387. foreach ( $filters as $attribute => $value ) {
  1388. $found_data = self::extract_attr_content_from_html( 'meta', 'content', $result->body, array( $attribute => $value ) );
  1389. if ( count( $found_data ) > 0 && strlen( $found_data[0] ) > 0 ) {
  1390. $url_card_data[ $key ] = html_entity_decode( $found_data[0], ENT_QUOTES );
  1391. break;
  1392. }
  1393. }
  1394. }
  1395. if ( count( $url_card_data ) > 0 ) {
  1396. $cards[ $url ] = $url_card_data;
  1397. } else {
  1398. $cards[ $url ] = array(
  1399. 'error' => 'no_og_data',
  1400. );
  1401. }
  1402. }
  1403. return $cards;
  1404. }
  1405. /**
  1406. * Get the WPCOM or self-hosted site ID.
  1407. *
  1408. * @return mixed
  1409. */
  1410. public static function get_site_id() {
  1411. $is_wpcom = ( defined( 'IS_WPCOM' ) && IS_WPCOM );
  1412. $site_id = $is_wpcom ? get_current_blog_id() : Jetpack_Options::get_option( 'id' );
  1413. if ( ! $site_id ) {
  1414. return new WP_Error(
  1415. 'unavailable_site_id',
  1416. __( 'Sorry, something is wrong with your Jetpack connection.', 'jetpack' ),
  1417. 403
  1418. );
  1419. }
  1420. return (int) $site_id;
  1421. }
  1422. }