Нет описания

jetpack-wpes-query-builder.php 9.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. <?php
  2. /**
  3. * Provides an interface for easily building a complex search query that
  4. * combines multiple ranking signals.
  5. *
  6. *
  7. * $bldr = new Jetpack_WPES_Query_Builder();
  8. * $bldr->add_filter( ... );
  9. * $bldr->add_filter( ... );
  10. * $bldr->add_query( ... );
  11. * $es_query = $bldr->build_query();
  12. *
  13. *
  14. * All ES queries take a standard form with main query (with some filters),
  15. * wrapped in a function_score
  16. *
  17. * Most functions are chainable, e.g. $bldr->add_filter( ... )->add_query( ... )->build_query();
  18. *
  19. * Bucketed queries use an aggregation to diversify results. eg a bunch
  20. * of separate filters where to get different sets of results.
  21. *
  22. */
  23. class Jetpack_WPES_Query_Builder {
  24. protected $es_filters = array();
  25. // Custom boosting with function_score
  26. protected $functions = array();
  27. protected $weighting_functions = array();
  28. protected $decays = array();
  29. protected $scripts = array();
  30. protected $functions_max_boost = 2.0;
  31. protected $functions_score_mode = 'multiply';
  32. protected $functions_boost_mode = 'multiply';
  33. protected $query_bool_boost = null;
  34. // General aggregations for buckets and metrics
  35. protected $aggs_query = false;
  36. protected $aggs = array();
  37. // The set of top level text queries to combine
  38. protected $must_queries = array();
  39. protected $should_queries = array();
  40. protected $dis_max_queries = array();
  41. protected $diverse_buckets_query = false;
  42. protected $bucket_filters = array();
  43. protected $bucket_sub_aggs = array();
  44. public function get_langs() {
  45. if ( isset( $this->langs ) ) {
  46. return $this->langs;
  47. }
  48. return false;
  49. }
  50. ////////////////////////////////////
  51. // Methods for building a query
  52. public function add_filter( $filter ) {
  53. $this->es_filters[] = $filter;
  54. return $this;
  55. }
  56. public function add_query( $query, $type = 'must' ) {
  57. switch ( $type ) {
  58. case 'dis_max':
  59. $this->dis_max_queries[] = $query;
  60. break;
  61. case 'should':
  62. $this->should_queries[] = $query;
  63. break;
  64. case 'must':
  65. default:
  66. $this->must_queries[] = $query;
  67. break;
  68. }
  69. return $this;
  70. }
  71. /**
  72. * Add any weighting function to the query
  73. *
  74. * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html
  75. *
  76. * @param $function array A function structure to apply to the query
  77. *
  78. * @return void
  79. */
  80. public function add_weighting_function( $function ) {
  81. // check for danger.
  82. if ( isset( $function['random_score'] ) ) {
  83. return $this;
  84. }
  85. if ( isset( $function['script_score'] ) ) {
  86. return $this;
  87. }
  88. $this->weighting_functions[] = $function;
  89. return $this;
  90. }
  91. /**
  92. * Add a scoring function to the query
  93. *
  94. * NOTE: For decays (linear, exp, or gauss), use Jetpack_WPES_Query_Builder::add_decay() instead
  95. *
  96. * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html
  97. *
  98. * @param $function string name of the function
  99. * @param $params array functions parameters
  100. *
  101. * @return void
  102. */
  103. public function add_function( $function, $params ) {
  104. $this->functions[ $function ][] = $params;
  105. return $this;
  106. }
  107. /**
  108. * Add a decay function to score results
  109. *
  110. * This method should be used instead of Jetpack_WPES_Query_Builder::add_function() for decays, as the internal ES structure
  111. * is slightly different for them.
  112. *
  113. * @see https://www.elastic.co/guide/en/elasticsearch/guide/current/decay-functions.html
  114. *
  115. * @param $function string name of the decay function - linear, exp, or gauss
  116. * @param $params array The decay functions parameters, passed to ES directly
  117. *
  118. * @return void
  119. */
  120. public function add_decay( $function, $params ) {
  121. $this->decays[ $function ][] = $params;
  122. return $this;
  123. }
  124. /**
  125. * Add a scoring mode to the query
  126. *
  127. * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html
  128. *
  129. * @param $mode string name of how to score
  130. *
  131. * @return void
  132. */
  133. public function add_score_mode_to_functions( $mode='multiply' ) {
  134. $this->functions_score_mode = $mode;
  135. return $this;
  136. }
  137. public function add_boost_mode_to_functions( $mode='multiply' ) {
  138. $this->functions_boost_mode = $mode;
  139. return $this;
  140. }
  141. public function add_max_boost_to_functions( $boost ) {
  142. $this->functions_max_boost = $boost;
  143. return $this;
  144. }
  145. public function add_boost_to_query_bool( $boost ) {
  146. $this->query_bool_boost = $boost;
  147. return $this;
  148. }
  149. public function add_aggs( $aggs_name, $aggs ) {
  150. $this->aggs_query = true;
  151. $this->aggs[$aggs_name] = $aggs;
  152. return $this;
  153. }
  154. public function set_all_aggs( $aggs ) {
  155. $this->aggs_query = true;
  156. $this->aggs = $aggs;
  157. return $this;
  158. }
  159. public function add_aggs_sub_aggs( $aggs_name, $sub_aggs ) {
  160. if ( ! array_key_exists( 'aggs', $this->aggs[$aggs_name] ) ) {
  161. $this->aggs[$aggs_name]['aggs'] = array();
  162. }
  163. $this->aggs[$aggs_name]['aggs'] = $sub_aggs;
  164. return $this;
  165. }
  166. public function add_bucketed_query( $name, $query ) {
  167. $this->_add_bucket_filter( $name, $query );
  168. $this->add_query( $query, 'dis_max' );
  169. return $this;
  170. }
  171. public function add_bucketed_terms( $name, $field, $terms, $boost = 1 ) {
  172. if ( ! is_array( $terms ) ) {
  173. $terms = array( $terms );
  174. }
  175. $this->_add_bucket_filter( $name, array(
  176. 'terms' => array(
  177. $field => $terms,
  178. ),
  179. ));
  180. $this->add_query( array(
  181. 'constant_score' => array(
  182. 'filter' => array(
  183. 'terms' => array(
  184. $field => $terms,
  185. ),
  186. ),
  187. 'boost' => $boost,
  188. ),
  189. ), 'dis_max' );
  190. return $this;
  191. }
  192. public function add_bucket_sub_aggs( $agg ) {
  193. $this->bucket_sub_aggs = array_merge( $this->bucket_sub_aggs, $agg );
  194. return $this;
  195. }
  196. protected function _add_bucket_filter( $name, $filter ) {
  197. $this->diverse_buckets_query = true;
  198. $this->bucket_filters[ $name ] = $filter;
  199. }
  200. ////////////////////////////////////
  201. // Building Final Query
  202. /**
  203. * Combine all the queries, functions, decays, scripts, and max_boost into an ES query
  204. *
  205. * @return array Array representation of the built ES query
  206. */
  207. public function build_query() {
  208. $query = array();
  209. //dis_max queries just become a single must query
  210. if ( ! empty( $this->dis_max_queries ) ) {
  211. $this->must_queries[] = array(
  212. 'dis_max' => array(
  213. 'queries' => $this->dis_max_queries,
  214. ),
  215. );
  216. }
  217. if ( empty( $this->must_queries ) ) {
  218. $this->must_queries = array(
  219. array(
  220. 'match_all' => array(),
  221. ),
  222. );
  223. }
  224. if ( empty( $this->should_queries ) ) {
  225. $query = array(
  226. 'bool' => array(
  227. 'must' => $this->must_queries,
  228. ),
  229. );
  230. } else {
  231. $query = array(
  232. 'bool' => array(
  233. 'must' => $this->must_queries,
  234. 'should' => $this->should_queries,
  235. ),
  236. );
  237. }
  238. if ( ! is_null( $this->query_bool_boost ) && isset( $query['bool'] ) ) {
  239. $query['bool']['boost'] = $this->query_bool_boost;
  240. }
  241. // If there are any function score adjustments, then combine those
  242. if ( $this->functions || $this->decays || $this->scripts || $this->weighting_functions ) {
  243. $weighting_functions = $this->weighting_functions;
  244. if ( $this->functions ) {
  245. foreach ( $this->functions as $function_type => $configs ) {
  246. foreach ( $configs as $config ) {
  247. foreach ( $config as $field => $params ) {
  248. $func_arr = $params;
  249. $func_arr['field'] = $field;
  250. $weighting_functions[] = array(
  251. $function_type => $func_arr,
  252. );
  253. }
  254. }
  255. }
  256. }
  257. if ( $this->decays ) {
  258. foreach ( $this->decays as $decay_type => $configs ) {
  259. foreach ( $configs as $config ) {
  260. foreach ( $config as $field => $params ) {
  261. $weighting_functions[] = array(
  262. $decay_type => array(
  263. $field => $params,
  264. ),
  265. );
  266. }
  267. }
  268. }
  269. }
  270. if ( $this->scripts ) {
  271. foreach ( $this->scripts as $script ) {
  272. $weighting_functions[] = array(
  273. 'script_score' => array(
  274. 'script' => $script,
  275. ),
  276. );
  277. }
  278. }
  279. $query = array(
  280. 'function_score' => array(
  281. 'query' => $query,
  282. 'functions' => $weighting_functions,
  283. 'max_boost' => $this->functions_max_boost,
  284. 'score_mode' => $this->functions_score_mode,
  285. 'boost_mode' => $this->functions_boost_mode,
  286. ),
  287. );
  288. } // End if().
  289. return $query;
  290. }
  291. /**
  292. * Assemble the 'filter' portion of an ES query, from all registered filters
  293. *
  294. * @return array|null Combined ES filters, or null if none have been defined
  295. */
  296. public function build_filter() {
  297. if ( empty( $this->es_filters ) ) {
  298. $filter = null;
  299. } elseif ( 1 == count( $this->es_filters ) ) {
  300. $filter = $this->es_filters[0];
  301. } else {
  302. $filter = array(
  303. 'and' => $this->es_filters,
  304. );
  305. }
  306. return $filter;
  307. }
  308. /**
  309. * Assemble the 'aggregation' portion of an ES query, from all general aggregations.
  310. *
  311. * @return array An aggregation query as an array of topics, filters, and bucket names
  312. */
  313. public function build_aggregation() {
  314. if ( empty( $this->bucket_sub_aggs ) && empty( $this->aggs_query ) ) {
  315. return array();
  316. }
  317. if ( ! $this->diverse_buckets_query && empty( $this->aggs_query ) ) {
  318. return $this->bucket_sub_aggs;
  319. }
  320. $aggregations = array(
  321. 'topics' => array(
  322. 'filters' => array(
  323. 'filters' => array(),
  324. ),
  325. ),
  326. );
  327. if ( ! empty( $this->bucket_sub_aggs ) ) {
  328. $aggregations['topics']['aggs'] = $this->bucket_sub_aggs;
  329. }
  330. foreach ( $this->bucket_filters as $bucket_name => $filter ) {
  331. $aggregations['topics']['filters']['filters'][ $bucket_name ] = $filter;
  332. }
  333. if ( ! empty( $this->aggs_query ) ) {
  334. $aggregations = $this->aggs;
  335. }
  336. return $aggregations;
  337. }
  338. }