diff -r 07239de796bb -r e756a8c72c3d cms/drupal/modules/search/search.extender.inc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cms/drupal/modules/search/search.extender.inc Fri Sep 08 12:04:06 2017 +0200 @@ -0,0 +1,546 @@ + array(), 'negative' => array()); + + /** + * Indicates whether the first pass query requires complex conditions (LIKE). + * + * @var boolean. + */ + protected $simple = TRUE; + + /** + * Conditions that are used for exact searches. + * + * This is always used for the second pass query but not for the first pass, + * unless $this->simple is FALSE. + * + * @var DatabaseCondition + */ + protected $conditions; + + /** + * Indicates how many matches for a search query are necessary. + * + * @var int + */ + protected $matches = 0; + + /** + * Array of search words. + * + * These words have to match against {search_index}.word. + * + * @var array + */ + protected $words = array(); + + /** + * Multiplier for the normalized search score. + * + * This value is calculated by the first pass query and multiplied with the + * actual score of a specific word to make sure that the resulting calculated + * score is between 0 and 1. + * + * @var float + */ + protected $normalize; + + /** + * Indicates whether the first pass query has been executed. + * + * @var boolean + */ + protected $executedFirstPass = FALSE; + + /** + * Stores score expressions. + * + * @var array + * + * @see addScore() + */ + protected $scores = array(); + + /** + * Stores arguments for score expressions. + * + * @var array + */ + protected $scoresArguments = array(); + + /** + * Stores multipliers for score expressions. + * + * @var array + */ + protected $multiply = array(); + + /** + * Whether or not search expressions were ignored. + * + * The maximum number of AND/OR combinations exceeded can be configured to + * avoid Denial-of-Service attacks. Expressions beyond the limit are ignored. + * + * @var boolean + */ + protected $expressionsIgnored = FALSE; + + /** + * Sets up the search query expression. + * + * @param $query + * A search query string, which can contain options. + * @param $module + * The search module. This maps to {search_index}.type in the database. + * + * @return + * The SearchQuery object. + */ + public function searchExpression($expression, $module) { + $this->searchExpression = $expression; + $this->type = $module; + + // Add a search_* tag. This needs to be added before any preExecute methods + // for decorated queries are called, as $this->prepared will be set to TRUE + // and tags added in the execute method will never get used. For example, + // if $query is extended by 'SearchQuery' then 'PagerDefault', the + // search-specific tag will be added too late (when preExecute() has + // already been called from the PagerDefault extender), and as a + // consequence will not be available to hook_query_alter() implementations, + // nor will the correct hook_query_TAG_alter() implementations get invoked. + // See node_search_execute(). + $this->addTag('search_' . $module); + + return $this; + } + + /** + * Applies a search option and removes it from the search query string. + * + * These options are in the form option:value,value2,value3. + * + * @param $option + * Name of the option. + * @param $column + * Name of the database column to which the value should be applied. + * + * @return + * TRUE if a value for that option was found, FALSE if not. + */ + public function setOption($option, $column) { + if ($values = search_expression_extract($this->searchExpression, $option)) { + $or = db_or(); + foreach (explode(',', $values) as $value) { + $or->condition($column, $value); + } + $this->condition($or); + $this->searchExpression = search_expression_insert($this->searchExpression, $option); + return TRUE; + } + return FALSE; + } + + /** + * Parses the search query into SQL conditions. + * + * We build two queries that match the dataset bodies. + */ + protected function parseSearchExpression() { + // Matchs words optionally prefixed by a dash. A word in this case is + // something between two spaces, optionally quoted. + preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' ' . $this->searchExpression , $keywords, PREG_SET_ORDER); + + if (count($keywords) == 0) { + return; + } + + // Classify tokens. + $or = FALSE; + $warning = ''; + $limit_combinations = variable_get('search_and_or_limit', 7); + // The first search expression does not count as AND. + $and_count = -1; + $or_count = 0; + foreach ($keywords as $match) { + if ($or_count && $and_count + $or_count >= $limit_combinations) { + // Ignore all further search expressions to prevent Denial-of-Service + // attacks using a high number of AND/OR combinations. + $this->expressionsIgnored = TRUE; + break; + } + $phrase = FALSE; + // Strip off phrase quotes. + if ($match[2]{0} == '"') { + $match[2] = substr($match[2], 1, -1); + $phrase = TRUE; + $this->simple = FALSE; + } + // Simplify keyword according to indexing rules and external + // preprocessors. Use same process as during search indexing, so it + // will match search index. + $words = search_simplify($match[2]); + // Re-explode in case simplification added more words, except when + // matching a phrase. + $words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY); + // Negative matches. + if ($match[1] == '-') { + $this->keys['negative'] = array_merge($this->keys['negative'], $words); + } + // OR operator: instead of a single keyword, we store an array of all + // OR'd keywords. + elseif ($match[2] == 'OR' && count($this->keys['positive'])) { + $last = array_pop($this->keys['positive']); + // Starting a new OR? + if (!is_array($last)) { + $last = array($last); + } + $this->keys['positive'][] = $last; + $or = TRUE; + $or_count++; + continue; + } + // AND operator: implied, so just ignore it. + elseif ($match[2] == 'AND' || $match[2] == 'and') { + $warning = $match[2]; + continue; + } + + // Plain keyword. + else { + if ($match[2] == 'or') { + $warning = $match[2]; + } + if ($or) { + // Add to last element (which is an array). + $this->keys['positive'][count($this->keys['positive']) - 1] = array_merge($this->keys['positive'][count($this->keys['positive']) - 1], $words); + } + else { + $this->keys['positive'] = array_merge($this->keys['positive'], $words); + $and_count++; + } + } + $or = FALSE; + } + + // Convert keywords into SQL statements. + $this->conditions = db_and(); + $simple_and = FALSE; + $simple_or = FALSE; + // Positive matches. + foreach ($this->keys['positive'] as $key) { + // Group of ORed terms. + if (is_array($key) && count($key)) { + $simple_or = TRUE; + $any = FALSE; + $queryor = db_or(); + foreach ($key as $or) { + list($num_new_scores) = $this->parseWord($or); + $any |= $num_new_scores; + $queryor->condition('d.data', "% $or %", 'LIKE'); + } + if (count($queryor)) { + $this->conditions->condition($queryor); + // A group of OR keywords only needs to match once. + $this->matches += ($any > 0); + } + } + // Single ANDed term. + else { + $simple_and = TRUE; + list($num_new_scores, $num_valid_words) = $this->parseWord($key); + $this->conditions->condition('d.data', "% $key %", 'LIKE'); + if (!$num_valid_words) { + $this->simple = FALSE; + } + // Each AND keyword needs to match at least once. + $this->matches += $num_new_scores; + } + } + if ($simple_and && $simple_or) { + $this->simple = FALSE; + } + // Negative matches. + foreach ($this->keys['negative'] as $key) { + $this->conditions->condition('d.data', "% $key %", 'NOT LIKE'); + $this->simple = FALSE; + } + + if ($warning == 'or') { + drupal_set_message(t('Search for either of the two terms with uppercase OR. For example, cats OR dogs.')); + } + } + + /** + * Helper function for parseQuery(). + */ + protected function parseWord($word) { + $num_new_scores = 0; + $num_valid_words = 0; + // Determine the scorewords of this word/phrase. + $split = explode(' ', $word); + foreach ($split as $s) { + $num = is_numeric($s); + if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 3)) { + if (!isset($this->words[$s])) { + $this->words[$s] = $s; + $num_new_scores++; + } + $num_valid_words++; + } + } + // Return matching snippet and number of added words. + return array($num_new_scores, $num_valid_words); + } + + /** + * Executes the first pass query. + * + * This can either be done explicitly, so that additional scores and + * conditions can be applied to the second pass query, or implicitly by + * addScore() or execute(). + * + * @return + * TRUE if search items exist, FALSE if not. + */ + public function executeFirstPass() { + $this->parseSearchExpression(); + + if (count($this->words) == 0) { + form_set_error('keys', format_plural(variable_get('minimum_word_size', 3), 'You must include at least one positive keyword with 1 character or more.', 'You must include at least one positive keyword with @count characters or more.')); + return FALSE; + } + if ($this->expressionsIgnored) { + drupal_set_message(t('Your search used too many AND/OR expressions. Only the first @count terms were included in this search.', array('@count' => variable_get('search_and_or_limit', 7))), 'warning'); + } + $this->executedFirstPass = TRUE; + + if (!empty($this->words)) { + $or = db_or(); + foreach ($this->words as $word) { + $or->condition('i.word', $word); + } + $this->condition($or); + } + // Build query for keyword normalization. + $this->join('search_total', 't', 'i.word = t.word'); + $this + ->condition('i.type', $this->type) + ->groupBy('i.type') + ->groupBy('i.sid') + ->having('COUNT(*) >= :matches', array(':matches' => $this->matches)); + + // Clone the query object to do the firstPass query; + $first = clone $this->query; + + // For complex search queries, add the LIKE conditions to the first pass query. + if (!$this->simple) { + $first->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type'); + $first->condition($this->conditions); + } + + // Calculate maximum keyword relevance, to normalize it. + $first->addExpression('SUM(i.score * t.count)', 'calculated_score'); + $this->normalize = $first + ->range(0, 1) + ->orderBy('calculated_score', 'DESC') + ->execute() + ->fetchField(); + + if ($this->normalize) { + return TRUE; + } + return FALSE; + } + + /** + * Adds a custom score expression to the search query. + * + * Score expressions are used to order search results. If no calls to + * addScore() have taken place, a default keyword relevance score will be + * used. However, if at least one call to addScore() has taken place, the + * keyword relevance score is not automatically added. + * + * Note that you must use this method to add ordering to your searches, and + * not call orderBy() directly, when using the SearchQuery extender. This is + * because of the two-pass system the SearchQuery class uses to normalize + * scores. + * + * @param $score + * The score expression, which should evaluate to a number between 0 and 1. + * The string 'i.relevance' in a score expression will be replaced by a + * measure of keyword relevance between 0 and 1. + * @param $arguments + * Query arguments needed to provide values to the score expression. + * @param $multiply + * If set, the score is multiplied with this value. However, all scores + * with multipliers are then divided by the total of all multipliers, so + * that overall, the normalization is maintained. + * + * @return object + * The updated query object. + */ + public function addScore($score, $arguments = array(), $multiply = FALSE) { + if ($multiply) { + $i = count($this->multiply); + // Modify the score expression so it is multiplied by the multiplier, + // with a divisor to renormalize. + $score = "CAST(:multiply_$i AS DECIMAL) * COALESCE(( " . $score . "), 0) / CAST(:total_$i AS DECIMAL)"; + // Add an argument for the multiplier. The :total_$i argument is taken + // care of in the execute() method, which is when the total divisor is + // calculated. + $arguments[':multiply_' . $i] = $multiply; + $this->multiply[] = $multiply; + } + + $this->scores[] = $score; + $this->scoresArguments += $arguments; + + return $this; + } + + /** + * Executes the search. + * + * If not already done, this executes the first pass query. Then the complex + * conditions are applied to the query including score expressions and + * ordering. + * + * @return + * FALSE if the first pass query returned no results, and a database result + * set if there were results. + */ + public function execute() + { + if (!$this->executedFirstPass) { + $this->executeFirstPass(); + } + if (!$this->normalize) { + return new DatabaseStatementEmpty(); + } + + // Add conditions to query. + $this->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type'); + $this->condition($this->conditions); + + if (empty($this->scores)) { + // Add default score. + $this->addScore('i.relevance'); + } + + if (count($this->multiply)) { + // Re-normalize scores with multipliers by dividing by the total of all + // multipliers. The expressions were altered in addScore(), so here just + // add the arguments for the total. + $i = 0; + $sum = array_sum($this->multiply); + foreach ($this->multiply as $total) { + $this->scoresArguments[':total_' . $i] = $sum; + $i++; + } + } + + // Replace the pseudo-expression 'i.relevance' with a measure of keyword + // relevance in all score expressions, using string replacement. Careful + // though! If you just print out a float, some locales use ',' as the + // decimal separator in PHP, while SQL always uses '.'. So, make sure to + // set the number format correctly. + $relevance = number_format((1.0 / $this->normalize), 10, '.', ''); + $this->scores = str_replace('i.relevance', '(' . $relevance . ' * i.score * t.count)', $this->scores); + + // Add all scores together to form a query field. + $this->addExpression('SUM(' . implode(' + ', $this->scores) . ')', 'calculated_score', $this->scoresArguments); + + // If an order has not yet been set for this query, add a default order + // that sorts by the calculated sum of scores. + if (count($this->getOrderBy()) == 0) { + $this->orderBy('calculated_score', 'DESC'); + } + + // Add useful metadata. + $this + ->addMetaData('normalize', $this->normalize) + ->fields('i', array('type', 'sid')); + + return $this->query->execute(); + } + + /** + * Builds the default count query for SearchQuery. + * + * Since SearchQuery always uses GROUP BY, we can default to a subquery. We + * also add the same conditions as execute() because countQuery() is called + * first. + */ + public function countQuery() { + // Clone the inner query. + $inner = clone $this->query; + + // Add conditions to query. + $inner->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type'); + $inner->condition($this->conditions); + + // Remove existing fields and expressions, they are not needed for a count + // query. + $fields =& $inner->getFields(); + $fields = array(); + $expressions =& $inner->getExpressions(); + $expressions = array(); + + // Add the sid as the only field and count them as a subquery. + $count = db_select($inner->fields('i', array('sid')), NULL, array('target' => 'slave')); + + // Add the COUNT() expression. + $count->addExpression('COUNT(*)'); + + return $count; + } +}