暂无描述

jetpack-wpes-query-parser.php 20KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701
  1. <?php
  2. /**
  3. * Parse a pure text query into WordPress Elasticsearch query. This builds on
  4. * the Jetpack_WPES_Query_Builder() to provide search query parsing.
  5. *
  6. * The key part of this parser is taking a user's query string typed into a box
  7. * and converting it into an ES search query.
  8. *
  9. * This varies by application, but roughly it means extracting some parts of the query
  10. * (authors, tags, and phrases) that are treated as a filter. Then taking the
  11. * remaining words and building the correct query (possibly with prefix searching
  12. * if we are doing search as you type)
  13. *
  14. * This class only supports ES 2.x+
  15. *
  16. * This parser builds queries of the form:
  17. * bool:
  18. * must:
  19. * AND match of a single field (ideally an edgengram field)
  20. * filter:
  21. * filter clauses from context (eg @gibrown, #news, etc)
  22. * should:
  23. * boosting of results by various fields
  24. *
  25. * Features supported:
  26. * - search as you type
  27. * - phrases
  28. * - supports querying across multiple languages at once
  29. *
  30. * Example usage (from Search on Reader Manage):
  31. *
  32. * require_lib( 'jetpack-wpes-query-builder/jetpack-wpes-search-query-parser' );
  33. * $parser = new Jetpack_WPES_Search_Query_Parser( $args['q'], array( $lang ) );
  34. *
  35. * //author
  36. * $parser->author_field_filter( array(
  37. * 'prefixes' => array( '@' ),
  38. * 'wpcom_id_field' => 'author_id',
  39. * 'must_query_fields' => array( 'author.engram', 'author_login.engram' ),
  40. * 'boost_query_fields' => array( 'author^2', 'author_login^2', 'title.default.engram' ),
  41. * ) );
  42. *
  43. * //remainder of query
  44. * $match_content_fields = $parser->merge_ml_fields(
  45. * array(
  46. * 'all_content' => 0.1,
  47. * ),
  48. * array(
  49. * 'all_content.default.engram^0.1',
  50. * )
  51. * );
  52. * $boost_content_fields = $parser->merge_ml_fields(
  53. * array(
  54. * 'title' => 2,
  55. * 'description' => 1,
  56. * 'tags' => 1,
  57. * ),
  58. * array(
  59. * 'author_login^2',
  60. * 'author^2',
  61. * )
  62. * );
  63. *
  64. * $parser->phrase_filter( array(
  65. * 'must_query_fields' => $match_content_fields,
  66. * 'boost_query_fields' => $boost_content_fields,
  67. * ) );
  68. * $parser->remaining_query( array(
  69. * 'must_query_fields' => $match_content_fields,
  70. * 'boost_query_fields' => $boost_content_fields,
  71. * ) );
  72. *
  73. * //Boost on phrases
  74. * $parser->remaining_query( array(
  75. * 'boost_query_fields' => $boost_content_fields,
  76. * 'boost_query_type' => 'phrase',
  77. * ) );
  78. *
  79. * //boosting
  80. * $parser->add_max_boost_to_functions( 20 );
  81. * $parser->add_function( 'field_value_factor', array(
  82. * 'follower_count' => array(
  83. * 'modifier' => 'sqrt',
  84. * 'factor' => 1,
  85. * 'missing' => 0,
  86. * ) ) );
  87. *
  88. * //Filtering
  89. * $parser->add_filter( array(
  90. * 'exists' => array( 'field' => 'langs.' . $lang )
  91. * ) );
  92. *
  93. * //run the query
  94. * $es_query_args = array(
  95. * 'name' => 'feeds',
  96. * 'blog_id' => false,
  97. * 'security_strategy' => 'a8c',
  98. * 'type' => 'feed,blog',
  99. * 'fields' => array( 'blog_id', 'feed_id' ),
  100. * 'query' => $parser->build_query(),
  101. * 'filter' => $parser->build_filter(),
  102. * 'size' => $size,
  103. * 'from' => $from
  104. * );
  105. * $es_results = es_api_search_index( $es_query_args, 'api-feed-find' );
  106. *
  107. */
  108. jetpack_require_lib( 'jetpack-wpes-query-builder' );
  109. class Jetpack_WPES_Search_Query_Parser extends Jetpack_WPES_Query_Builder {
  110. protected $orig_query = '';
  111. protected $current_query = '';
  112. protected $langs;
  113. protected $avail_langs = array( 'ar', 'bg', 'ca', 'cs', 'da', 'de', 'el', 'en', 'es', 'eu', 'fa', 'fi', 'fr', 'he', 'hi', 'hu', 'hy', 'id', 'it', 'ja', 'ko', 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' );
  114. public function __construct( $user_query, $langs ) {
  115. $this->orig_query = $user_query;
  116. $this->current_query = $this->orig_query;
  117. $this->langs = $this->norm_langs( $langs );
  118. }
  119. protected $extracted_phrases = array();
  120. public function get_current_query() {
  121. return $this->current_query;
  122. }
  123. public function set_current_query( $q ) {
  124. $this->current_query = $q;
  125. }
  126. ///////////////////////////////////////////////////////
  127. // Methods for Building arrays of multilingual fields
  128. /*
  129. * Normalize language codes
  130. */
  131. public function norm_langs( $langs ) {
  132. $lst = array();
  133. foreach( $langs as $l ) {
  134. $l = strtok( $l, '-_' );
  135. if ( in_array( $l, $this->avail_langs ) ) {
  136. $lst[$l] = true;
  137. } else {
  138. $lst['default'] = true;
  139. }
  140. }
  141. return array_keys( $lst );
  142. }
  143. public function get_lang_field_suffix() {
  144. if ( ! is_array( $this->langs ) || empty( $this->langs ) ) {
  145. return;
  146. }
  147. // Returns the first language only
  148. return $this->langs[0];
  149. }
  150. /*
  151. * Take a list of field prefixes and expand them for multi-lingual
  152. * with the provided boostings.
  153. */
  154. public function merge_ml_fields( $fields2boosts, $additional_fields ) {
  155. $flds = array();
  156. foreach( $fields2boosts as $f => $b ) {
  157. foreach( $this->langs as $l ) {
  158. $flds[] = $f . '.' . $l . '^' . $b;
  159. }
  160. }
  161. foreach( $additional_fields as $f ) {
  162. $flds[] = $f;
  163. }
  164. return $flds;
  165. }
  166. ////////////////////////////////////
  167. // Extract Fields for Filtering on
  168. /*
  169. * Extract any @mentions from the user query
  170. * use them as a filter if we can find a wp.com id
  171. * otherwise use them as a
  172. *
  173. * args:
  174. * wpcom_id_field: wp.com id field
  175. * must_query_fields: array of fields to search for matching results (optional)
  176. * boost_query_fields: array of fields to search in for boosting results (optional)
  177. * prefixes: array of prefixes that the user can use to indicate an author
  178. *
  179. * returns true/false of whether any were found
  180. *
  181. * See also: https://github.com/twitter/twitter-text/blob/master/java/src/com/twitter/Regex.java
  182. */
  183. public function author_field_filter( $args ) {
  184. $defaults = array(
  185. 'wpcom_id_field' => 'author_id',
  186. 'must_query_fields' => null,
  187. 'boost_query_fields' => null,
  188. 'prefixes' => array( '@' ),
  189. );
  190. $args = wp_parse_args( $args, $defaults );
  191. $names = array();
  192. foreach( $args['prefixes'] as $p ) {
  193. $found = $this->get_fields( $p );
  194. if ( $found ) {
  195. foreach( $found as $f ) {
  196. $names[] = $f;
  197. }
  198. }
  199. }
  200. if ( empty( $names ) ) {
  201. return false;
  202. }
  203. foreach( $args['prefixes'] as $p ) {
  204. $this->remove_fields( $p );
  205. }
  206. $user_ids = array();
  207. $query_names = array();
  208. //loop through the matches and separate into filters and queries
  209. foreach( $names as $n ) {
  210. //check for exact match on login
  211. $userdata = get_user_by( 'login', strtolower( $n ) );
  212. $filtering = false;
  213. if ( $userdata ) {
  214. $user_ids[ $userdata->ID ] = true;
  215. $filtering = true;
  216. }
  217. $is_phrase = false;
  218. if ( preg_match( '/"/', $n ) ) {
  219. $is_phrase = true;
  220. $n = preg_replace( '/"/', '', $n );
  221. }
  222. if ( !empty( $args['must_query_fields'] ) && !$filtering ) {
  223. if ( $is_phrase ) {
  224. $this->add_query( array(
  225. 'multi_match' => array(
  226. 'fields' => $args['must_query_fields'],
  227. 'query' => $n,
  228. 'type' => 'phrase',
  229. ) ) );
  230. } else {
  231. $this->add_query( array(
  232. 'multi_match' => array(
  233. 'fields' => $args['must_query_fields'],
  234. 'query' => $n,
  235. ) ) );
  236. }
  237. }
  238. if ( !empty( $args['boost_query_fields'] ) ) {
  239. if ( $is_phrase ) {
  240. $this->add_query( array(
  241. 'multi_match' => array(
  242. 'fields' => $args['boost_query_fields'],
  243. 'query' => $n,
  244. 'type' => 'phrase',
  245. ) ), 'should' );
  246. } else {
  247. $this->add_query( array(
  248. 'multi_match' => array(
  249. 'fields' => $args['boost_query_fields'],
  250. 'query' => $n,
  251. ) ), 'should' );
  252. }
  253. }
  254. }
  255. if ( ! empty( $user_ids ) ) {
  256. $user_ids = array_keys( $user_ids );
  257. $this->add_filter( array( 'terms' => array( $args['wpcom_id_field'] => $user_ids ) ) );
  258. }
  259. return true;
  260. }
  261. /*
  262. * Extract any prefix followed by text use them as a must clause,
  263. * and optionally as a boost to the should query
  264. * This can be used for hashtags. eg #News, or #"current events",
  265. * but also works for any arbitrary field. eg from:Greg
  266. *
  267. * args:
  268. * must_query_fields: array of fields that must match the tag (optional)
  269. * boost_query_fields: array of fields to boost search on (optional)
  270. * prefixes: array of prefixes that the user can use to indicate a tag
  271. *
  272. * returns true/false of whether any were found
  273. *
  274. */
  275. public function text_field_filter( $args ) {
  276. $defaults = array(
  277. 'must_query_fields' => array( 'tag.name' ),
  278. 'boost_query_fields' => array( 'tag.name' ),
  279. 'prefixes' => array( '#' ),
  280. );
  281. $args = wp_parse_args( $args, $defaults );
  282. $tags = array();
  283. foreach( $args['prefixes'] as $p ) {
  284. $found = $this->get_fields( $p );
  285. if ( $found ) {
  286. foreach( $found as $f ) {
  287. $tags[] = $f;
  288. }
  289. }
  290. }
  291. if ( empty( $tags ) ) {
  292. return false;
  293. }
  294. foreach( $args['prefixes'] as $p ) {
  295. $this->remove_fields( $p );
  296. }
  297. foreach( $tags as $t ) {
  298. $is_phrase = false;
  299. if ( preg_match( '/"/', $t ) ) {
  300. $is_phrase = true;
  301. $t = preg_replace( '/"/', '', $t );
  302. }
  303. if ( ! empty( $args['must_query_fields'] ) ) {
  304. if ( $is_phrase ) {
  305. $this->add_query( array(
  306. 'multi_match' => array(
  307. 'fields' => $args['must_query_fields'],
  308. 'query' => $t,
  309. 'type' => 'phrase',
  310. ) ) );
  311. } else {
  312. $this->add_query( array(
  313. 'multi_match' => array(
  314. 'fields' => $args['must_query_fields'],
  315. 'query' => $t,
  316. ) ) );
  317. }
  318. }
  319. if ( ! empty( $args['boost_query_fields'] ) ) {
  320. if ( $is_phrase ) {
  321. $this->add_query( array(
  322. 'multi_match' => array(
  323. 'fields' => $args['boost_query_fields'],
  324. 'query' => $t,
  325. 'type' => 'phrase',
  326. ) ), 'should' );
  327. } else {
  328. $this->add_query( array(
  329. 'multi_match' => array(
  330. 'fields' => $args['boost_query_fields'],
  331. 'query' => $t,
  332. ) ), 'should' );
  333. }
  334. }
  335. }
  336. return true;
  337. }
  338. /*
  339. * Extract anything surrounded by quotes or if there is an opening quote
  340. * that is not complete, and add them to the query as a phrase query.
  341. * Quotes can be either '' or ""
  342. *
  343. * args:
  344. * must_query_fields: array of fields that must match the phrases
  345. * boost_query_fields: array of fields to boost the phrases on (optional)
  346. *
  347. * returns true/false of whether any were found
  348. *
  349. */
  350. public function phrase_filter( $args ) {
  351. $defaults = array(
  352. 'must_query_fields' => array( 'all_content' ),
  353. 'boost_query_fields' => array( 'title' ),
  354. );
  355. $args = wp_parse_args( $args, $defaults );
  356. $phrases = array();
  357. if ( preg_match_all( '/"([^"]+)"/', $this->current_query, $matches ) ) {
  358. foreach ( $matches[1] as $match ) {
  359. $phrases[] = $match;
  360. }
  361. $this->current_query = preg_replace( '/"([^"]+)"/', '', $this->current_query );
  362. }
  363. if ( preg_match_all( "/'([^']+)'/", $this->current_query, $matches ) ) {
  364. foreach ( $matches[1] as $match ) {
  365. $phrases[] = $match;
  366. }
  367. $this->current_query = preg_replace( "/'([^']+)'/", '', $this->current_query );
  368. }
  369. //look for a final, uncompleted phrase
  370. $phrase_prefix = false;
  371. if ( preg_match_all( '/"([^"]+)$/', $this->current_query, $matches ) ) {
  372. $phrase_prefix = $matches[1][0];
  373. $this->current_query = preg_replace( '/"([^"]+)$/', '', $this->current_query );
  374. }
  375. if ( preg_match_all( "/(?:'\B|\B')([^']+)$/", $this->current_query, $matches ) ) {
  376. $phrase_prefix = $matches[1][0];
  377. $this->current_query = preg_replace( "/(?:'\B|\B')([^']+)$/", '', $this->current_query );
  378. }
  379. if ( $phrase_prefix ) {
  380. $phrases[] = $phrase_prefix;
  381. }
  382. if ( empty( $phrases ) ) {
  383. return false;
  384. }
  385. foreach ( $phrases as $p ) {
  386. $this->add_query( array(
  387. 'multi_match' => array(
  388. 'fields' => $args['must_query_fields'],
  389. 'query' => $p,
  390. 'type' => 'phrase',
  391. ) ) );
  392. if ( ! empty( $args['boost_query_fields'] ) ) {
  393. $this->add_query( array(
  394. 'multi_match' => array(
  395. 'fields' => $args['boost_query_fields'],
  396. 'query' => $p,
  397. 'operator' => 'and',
  398. ) ), 'should' );
  399. }
  400. }
  401. return true;
  402. }
  403. /*
  404. * Query fields based on the remaining parts of the query
  405. * This could be the final AND part of the query terms to match, or it
  406. * could be boosting certain elements of the query
  407. *
  408. * args:
  409. * must_query_fields: array of fields that must match the remaining terms (optional)
  410. * boost_query_fields: array of fields to boost the remaining terms on (optional)
  411. *
  412. */
  413. public function remaining_query( $args ) {
  414. $defaults = array(
  415. 'must_query_fields' => null,
  416. 'boost_query_fields' => null,
  417. 'boost_operator' => 'and',
  418. 'boost_query_type' => 'best_fields',
  419. );
  420. $args = wp_parse_args( $args, $defaults );
  421. if ( empty( $this->current_query ) || ctype_space( $this->current_query ) ) {
  422. return;
  423. }
  424. if ( ! empty( $args['must_query_fields'] ) ) {
  425. $this->add_query( array(
  426. 'multi_match' => array(
  427. 'fields' => $args['must_query_fields'],
  428. 'query' => $this->current_query,
  429. 'operator' => 'and',
  430. ) ) );
  431. }
  432. if ( ! empty( $args['boost_query_fields'] ) ) {
  433. $this->add_query( array(
  434. 'multi_match' => array(
  435. 'fields' => $args['boost_query_fields'],
  436. 'query' => $this->current_query,
  437. 'operator' => $args['boost_operator'],
  438. 'type' => $args['boost_query_type'],
  439. ) ), 'should' );
  440. }
  441. }
  442. /*
  443. * Query fields using a prefix query (alphabetical expansions on the index).
  444. * This is not recommended. Slower performance and worse relevancy.
  445. *
  446. * (UNTESTED! Copied from old prefix expansion code)
  447. *
  448. * args:
  449. * must_query_fields: array of fields that must match the remaining terms (optional)
  450. * boost_query_fields: array of fields to boost the remaining terms on (optional)
  451. *
  452. */
  453. public function remaining_prefix_query( $args ) {
  454. $defaults = array(
  455. 'must_query_fields' => array( 'all_content' ),
  456. 'boost_query_fields' => array( 'title' ),
  457. 'boost_operator' => 'and',
  458. 'boost_query_type' => 'best_fields',
  459. );
  460. $args = wp_parse_args( $args, $defaults );
  461. if ( empty( $this->current_query ) || ctype_space( $this->current_query ) ) {
  462. return;
  463. }
  464. //////////////////////////////////
  465. // Example cases to think about:
  466. // "elasticse"
  467. // "elasticsearch"
  468. // "elasticsearch "
  469. // "elasticsearch lucen"
  470. // "elasticsearch lucene"
  471. // "the future" - note the stopword which will match nothing!
  472. // "F1" - an exact match that also has tons of expansions
  473. // "こんにちは" ja "hello"
  474. // "こんにちは友人" ja "hello friend" - we just rely on the prefix phrase and ES to split words
  475. // - this could still be better I bet. Maybe we need to analyze with ES first?
  476. //
  477. /////////////////////////////
  478. //extract pieces of query
  479. // eg: "PREFIXREMAINDER PREFIXWORD"
  480. // "elasticsearch lucen"
  481. $prefix_word = false;
  482. $prefix_remainder = false;
  483. if ( preg_match_all( '/([^ ]+)$/', $this->current_query, $matches ) ) {
  484. $prefix_word = $matches[1][0];
  485. }
  486. $prefix_remainder = preg_replace( '/([^ ]+)$/', '', $this->current_query );
  487. if ( ctype_space( $prefix_remainder ) ) {
  488. $prefix_remainder = false;
  489. }
  490. if ( ! $prefix_word ) {
  491. //Space at the end of the query, so skip using a prefix query
  492. if ( ! empty( $args['must_query_fields'] ) ) {
  493. $this->add_query( array(
  494. 'multi_match' => array(
  495. 'fields' => $args['must_query_fields'],
  496. 'query' => $this->current_query,
  497. 'operator' => 'and',
  498. ) ) );
  499. }
  500. if ( ! empty( $args['boost_query_fields'] ) ) {
  501. $this->add_query( array(
  502. 'multi_match' => array(
  503. 'fields' => $args['boost_query_fields'],
  504. 'query' => $this->current_query,
  505. 'operator' => $args['boost_operator'],
  506. 'type' => $args['boost_query_type'],
  507. ) ), 'should' );
  508. }
  509. } else {
  510. //must match the prefix word and the prefix remainder
  511. if ( ! empty( $args['must_query_fields'] ) ) {
  512. //need to do an OR across a few fields to handle all cases
  513. $must_q = array( 'bool' => array( 'should' => array( ), 'minimum_should_match' => 1 ) );
  514. //treat all words as an exact search (boosts complete word like "news"
  515. //from prefixes of "newspaper")
  516. $must_q['bool']['should'][] = array( 'multi_match' => array(
  517. 'fields' => $this->all_fields,
  518. 'query' => $full_text,
  519. 'operator' => 'and',
  520. 'type' => 'cross_fields',
  521. ) );
  522. //always optimistically try and match the full text as a phrase
  523. //prefix "the futu" should try to match "the future"
  524. //otherwise the first stopword kinda breaks
  525. //This also works as the prefix match for a single word "elasticsea"
  526. $must_q['bool']['should'][] = array( 'multi_match' => array(
  527. 'fields' => $this->phrase_fields,
  528. 'query' => $full_text,
  529. 'operator' => 'and',
  530. 'type' => 'phrase_prefix',
  531. 'max_expansions' => 100,
  532. ) );
  533. if ( $prefix_remainder ) {
  534. //Multiple words found, so treat each word on its own and not just as
  535. //a part of a phrase
  536. //"elasticsearch lucen" => "elasticsearch" exact AND "lucen" prefix
  537. $q['bool']['should'][] = array( 'bool' => array(
  538. 'must' => array(
  539. array( 'multi_match' => array(
  540. 'fields' => $this->phrase_fields,
  541. 'query' => $prefix_word,
  542. 'operator' => 'and',
  543. 'type' => 'phrase_prefix',
  544. 'max_expansions' => 100,
  545. ) ),
  546. array( 'multi_match' => array(
  547. 'fields' => $this->all_fields,
  548. 'query' => $prefix_remainder,
  549. 'operator' => 'and',
  550. 'type' => 'cross_fields',
  551. ) ),
  552. )
  553. ) );
  554. }
  555. $this->add_query( $must_q );
  556. }
  557. //Now add any boosting of the query
  558. if ( ! empty( $args['boost_query_fields'] ) ) {
  559. //treat all words as an exact search (boosts complete word like "news"
  560. //from prefixes of "newspaper")
  561. $this->add_query( array(
  562. 'multi_match' => array(
  563. 'fields' => $args['boost_query_fields'],
  564. 'query' => $this->current_query,
  565. 'operator' => $args['boost_query_operator'],
  566. 'type' => $args['boost_query_type'],
  567. ) ), 'should' );
  568. //optimistically boost the full phrase prefix match
  569. $this->add_query( array(
  570. 'multi_match' => array(
  571. 'fields' => $args['boost_query_fields'],
  572. 'query' => $this->current_query,
  573. 'operator' => 'and',
  574. 'type' => 'phrase_prefix',
  575. 'max_expansions' => 100,
  576. ) ) );
  577. }
  578. }
  579. }
  580. /*
  581. * Boost results based on the lang probability overlaps
  582. *
  583. * args:
  584. * langs2prob: list of languages to search in with associated boosts
  585. */
  586. public function boost_lang_probs( $langs2prob ) {
  587. foreach( $langs2prob as $l => $p ) {
  588. $this->add_function( 'field_value_factor', array(
  589. 'modifier' => 'none',
  590. 'factor' => $p,
  591. 'missing' => 0.01, //1% chance doc did not have right lang detected
  592. ) );
  593. }
  594. }
  595. ////////////////////////////////////
  596. // Helper Methods
  597. //Get the text after some prefix. eg @gibrown, or @"Greg Brown"
  598. protected function get_fields( $field_prefix ) {
  599. $regex = '/' . $field_prefix . '(("[^"]+")|([^\\p{Z}]+))/';
  600. if ( preg_match_all( $regex, $this->current_query, $match ) ) {
  601. return $match[1];
  602. }
  603. return false;
  604. }
  605. //Remove the prefix and text from the query
  606. protected function remove_fields( $field_name ) {
  607. $regex = '/' . $field_name . '(("[^"]+")|([^\\p{Z}]+))/';
  608. $this->current_query = preg_replace( $regex, '', $this->current_query );
  609. }
  610. //Best effort string truncation that splits on word breaks
  611. protected function truncate_string( $string, $limit, $break=" " ) {
  612. if ( mb_strwidth( $string ) <= $limit ) {
  613. return $string;
  614. }
  615. // walk backwards from $limit to find first break
  616. $breakpoint = $limit;
  617. $broken = false;
  618. while ( $breakpoint > 0 ) {
  619. if ( $break === mb_strimwidth( $string, $breakpoint, 1 ) ) {
  620. $string = mb_strimwidth( $string, 0, $breakpoint );
  621. $broken = true;
  622. break;
  623. }
  624. $breakpoint--;
  625. }
  626. // if we weren't able to find a break, need to chop mid-word
  627. if ( !$broken ) {
  628. $string = mb_strimwidth( $string, 0, $limit );
  629. }
  630. return $string;
  631. }
  632. }