diff options
Diffstat (limited to 'mod/search/start.php')
| -rw-r--r-- | mod/search/start.php | 133 |
1 files changed, 76 insertions, 57 deletions
diff --git a/mod/search/start.php b/mod/search/start.php index 3467420b7..8a112a3a3 100644 --- a/mod/search/start.php +++ b/mod/search/start.php @@ -1,45 +1,41 @@ <?php /** - * Elgg core search. + * Elgg search plugin * - * @package Elgg - * @subpackage Core - * @author Curverider Ltd <info@elgg.com>, The MITRE Corporation <http://www.mitre.org> - * @link http://elgg.org/ */ +elgg_register_event_handler('init','system','search_init'); + /** - * Initialise search helper functions. - * + * Initialize search plugin */ function search_init() { global $CONFIG; require_once 'search_hooks.php'; // page handler for search actions and results - register_page_handler('search','search_page_handler'); + elgg_register_page_handler('search', 'search_page_handler'); // register some default search hooks - register_plugin_hook('search', 'object', 'search_objects_hook'); - register_plugin_hook('search', 'user', 'search_users_hook'); - - // @todo pull this out into groups - register_plugin_hook('search', 'group', 'search_groups_hook'); + elgg_register_plugin_hook_handler('search', 'object', 'search_objects_hook'); + elgg_register_plugin_hook_handler('search', 'user', 'search_users_hook'); + elgg_register_plugin_hook_handler('search', 'group', 'search_groups_hook'); // tags and comments are a bit different. // register a search types and a hooks for them. - register_plugin_hook('search_types', 'get_types', 'search_custom_types_tags_hook'); - register_plugin_hook('search', 'tags', 'search_tags_hook'); + elgg_register_plugin_hook_handler('search_types', 'get_types', 'search_custom_types_tags_hook'); + elgg_register_plugin_hook_handler('search', 'tags', 'search_tags_hook'); - register_plugin_hook('search_types', 'get_types', 'search_custom_types_comments_hook'); - register_plugin_hook('search', 'comments', 'search_comments_hook'); + elgg_register_plugin_hook_handler('search_types', 'get_types', 'search_custom_types_comments_hook'); + elgg_register_plugin_hook_handler('search', 'comments', 'search_comments_hook'); // get server min and max allowed chars for ft searching $CONFIG->search_info = array(); // can't use get_data() here because some servers don't have these globals set, // which throws a db exception. - $r = mysql_query('SELECT @@ft_min_word_len as min, @@ft_max_word_len as max'); + $dblink = get_db_link('read'); + $r = mysql_query('SELECT @@ft_min_word_len as min, @@ft_max_word_len as max', $dblink); if ($r && ($word_lens = mysql_fetch_assoc($r))) { $CONFIG->search_info['min_chars'] = $word_lens['min']; $CONFIG->search_info['max_chars'] = $word_lens['max']; @@ -50,19 +46,19 @@ function search_init() { } // add in CSS for search elements - elgg_extend_view('css', 'search/css'); + elgg_extend_view('css/elgg', 'search/css'); // extend view for elgg topbar search box - elgg_extend_view('header/extend', 'search/search_box'); + elgg_extend_view('page/elements/header', 'search/header'); } /** * Page handler for search * - * @param array $page Page elements from pain page handler + * @param array $page Page elements from core page handler + * @return bool */ function search_page_handler($page) { - global $CONFIG; // if there is no q set, we're being called from a legacy installation // it expects a search by tags. @@ -73,26 +69,34 @@ function search_page_handler($page) { //set_input('search_type', 'tags'); } - include_once('index.php'); + $base_dir = elgg_get_plugins_path() . 'search/pages/search'; + + include_once("$base_dir/index.php"); + return true; } /** * Return a string with highlighted matched queries and relevant context - * Determins context based upon occurance and distance of words with each other. + * Determines context based upon occurance and distance of words with each other. * * @param string $haystack * @param string $query * @param int $min_match_context = 30 * @param int $max_length = 300 + * @param bool $tag_match Search is for tags. Don't ignore words. * @return string */ -function search_get_highlighted_relevant_substrings($haystack, $query, $min_match_context = 30, $max_length = 300) { - global $CONFIG; +function search_get_highlighted_relevant_substrings($haystack, $query, $min_match_context = 30, $max_length = 300, $tag_match = false) { + $haystack = strip_tags($haystack); $haystack_length = elgg_strlen($haystack); $haystack_lc = elgg_strtolower($haystack); - $words = search_remove_ignored_words($query, 'array'); + if (!$tag_match) { + $words = search_remove_ignored_words($query, 'array'); + } else { + $words = array(); + } // if haystack < $max_length return the entire haystack w/formatting immediately if ($haystack_length <= $max_length) { @@ -108,6 +112,7 @@ function search_get_highlighted_relevant_substrings($haystack, $query, $min_matc $word = elgg_strtolower($word); $count = elgg_substr_count($haystack_lc, $word); $word_len = elgg_strlen($word); + $haystack_len = elgg_strlen($haystack_lc); // find the start positions for the words if ($count > 1) { @@ -118,6 +123,10 @@ function search_get_highlighted_relevant_substrings($haystack, $query, $min_matc $stop = $pos + $word_len + $min_match_context; $lengths[] = $stop - $start; $offset += $pos + $word_len; + + if ($offset >= $haystack_len) { + break; + } } } else { $pos = elgg_strpos($haystack_lc, $word); @@ -135,7 +144,7 @@ function search_get_highlighted_relevant_substrings($haystack, $query, $min_matc $total_length = array_sum($offsets); $add_length = 0; - if ($total_length < $max_length) { + if ($total_length < $max_length && $offsets) { $add_length = floor((($max_length - $total_length) / count($offsets)) / 2); $starts = array(); @@ -225,7 +234,7 @@ function search_consolidate_substrings($offsets, $lengths) { $end_pos = $offset + $length; // find the next entry that doesn't overlap - while(array_key_exists($i+1, $offsets) && $end_pos > $offsets[$i+1]) { + while (array_key_exists($i+1, $offsets) && $end_pos > $offsets[$i+1]) { $i++; if (!array_key_exists($i, $offsets)) { break; @@ -254,23 +263,30 @@ function search_highlight_words($words, $string) { $replace_html = array( 'strong' => rand(10000, 99999), 'class' => rand(10000, 99999), - 'searchMatch' => rand(10000, 99999), - 'searchMatchColor' => rand(10000, 99999) + 'search-highlight' => rand(10000, 99999), + 'search-highlight-color' => rand(10000, 99999) ); foreach ($words as $word) { + // remove any boolean mode operators + $word = preg_replace("/([\-\+~])([\w]+)/i", '$2', $word); + + // escape the delimiter and any other regexp special chars + $word = preg_quote($word, '/'); + $search = "/($word)/i"; + // @todo // must replace with placeholders in case one of the search terms is // in the html string. // later, will replace the placeholders with the actual html. // Yeah this is hacky. I'm tired. $strong = $replace_html['strong']; $class = $replace_html['class']; - $searchMatch = $replace_html['searchMatch']; - $searchMatchColor = $replace_html['searchMatchColor']; + $highlight = $replace_html['search-highlight']; + $color = $replace_html['search-highlight-color']; - $replace = "<$strong $class=\"$searchMatch $searchMatchColor{$i}\">$1</$strong>"; + $replace = "<$strong $class=\"$highlight $color{$i}\">$1</$strong>"; $string = preg_replace($search, $replace, $string); $i++; } @@ -295,7 +311,9 @@ function search_remove_ignored_words($query, $format = 'array') { global $CONFIG; // don't worry about "s or boolean operators - $query = str_replace(array('"', '-', '+', '~'), '', stripslashes(strip_tags($query))); + //$query = str_replace(array('"', '-', '+', '~'), '', stripslashes(strip_tags($query))); + $query = stripslashes(strip_tags($query)); + $words = explode(' ', $query); $min_chars = $CONFIG->search_info['min_chars']; @@ -323,12 +341,12 @@ function search_remove_ignored_words($query, $format = 'array') { * * @param array $results * @param array $params - * @param string $view_type = listing, entity or listing + * @param string $view_type = list, entity or layout * @return string */ function search_get_search_view($params, $view_type) { switch ($view_type) { - case 'listing': + case 'list': case 'entity': case 'layout': break; @@ -339,7 +357,7 @@ function search_get_search_view($params, $view_type) { $view_order = array(); - // check if there's a special search listing view for this type:subtype + // check if there's a special search list view for this type:subtype if (isset($params['type']) && $params['type'] && isset($params['subtype']) && $params['subtype']) { $view_order[] = "search/{$params['type']}/{$params['subtype']}/$view_type"; } @@ -354,7 +372,7 @@ function search_get_search_view($params, $view_type) { $view_order[] = "search/{$params['search_type']}/$view_type"; } - // finally default to a search listing default + // finally default to a search list default $view_order[] = "search/$view_type"; foreach ($view_order as $view) { @@ -384,12 +402,8 @@ function search_get_where_sql($table, $fields, $params, $use_fulltext = TRUE) { $fields[$i] = "$table.$field"; } } - - // if we're not using full text, rewrite the query for bool mode. - // exploiting a feature(ish) of bool mode where +-word is the same as -word - if (!$use_fulltext) { - $query = '+' . str_replace(' ', ' +', $query); - } + + $where = ''; // if query is shorter than the min for fts words // it's likely a single acronym or similar @@ -403,22 +417,30 @@ function search_get_where_sql($table, $fields, $params, $use_fulltext = TRUE) { $likes_str = implode(' OR ', $likes); $where = "($likes_str)"; } else { - // if using advanced or paired "s, switch into boolean mode - if (!$use_fulltext - || (isset($params['advanced_search']) && $params['advanced_search']) - || elgg_substr_count($query, '"') >= 2 ) { + // if we're not using full text, rewrite the query for bool mode. + // exploiting a feature(ish) of bool mode where +-word is the same as -word + if (!$use_fulltext) { + $query = '+' . str_replace(' ', ' +', $query); + } + + // if using advanced, boolean operators, or paired "s, switch into boolean mode + $booleans_used = preg_match("/([\-\+~])([\w]+)/i", $query); + $advanced_search = (isset($params['advanced_search']) && $params['advanced_search']); + $quotes_used = (elgg_substr_count($query, '"') >= 2); + + if (!$use_fulltext || $booleans_used || $advanced_search || $quotes_used) { $options = 'IN BOOLEAN MODE'; } else { // natural language mode is default and this keyword isn't supported in < 5.1 //$options = 'IN NATURAL LANGUAGE MODE'; $options = ''; } - + // if short query, use query expansion. // @todo doesn't seem to be working well. - if (elgg_strlen($query) < 5) { - //$options .= ' WITH QUERY EXPANSION'; - } +// if (elgg_strlen($query) < 5) { +// $options .= ' WITH QUERY EXPANSION'; +// } $query = sanitise_string($query); $fields_str = implode(',', $fields); @@ -446,7 +468,7 @@ function search_get_order_by_sql($entities_table, $type_table, $sort, $order) { default: case 'relevance': // default is relevance descending. - // acending relevancy is silly and complicated. + // ascending relevancy is silly and complicated. $on = ''; break; case 'created': @@ -478,6 +500,3 @@ function search_get_order_by_sql($entities_table, $type_table, $sort, $order) { return $order_by; } -/** Register init system event **/ - -register_elgg_event_handler('init','system','search_init'); |
