00001 <?php
00013 class SearchEngine {
00014         var $limit = 10;
00015         var $offset = 0;
00016         var $prefix = '';
00017         var $searchTerms = array();
00018         var $namespaces = array( NS_MAIN );
00019         var $showRedirects = false;
00020 
00030         function searchText( $term ) {
00031                 return null;
00032         }
00033 
00043         function searchTitle( $term ) {
00044                 return null;
00045         }
00046         
00048         function acceptListRedirects() {
00049                 return true;
00050         }
00051         
00056         function transformSearchTerm( $term ) {
00057                 return $term;
00058         }
00059         
00067         public static function getNearMatch( $searchterm ) {
00068                 global $wgContLang;
00069 
00070                 $allSearchTerms = array($searchterm);
00071 
00072                 if($wgContLang->hasVariants()){
00073                         $allSearchTerms = array_merge($allSearchTerms,$wgContLang->convertLinkToAllVariants($searchterm));
00074                 }
00075 
00076                 foreach($allSearchTerms as $term){
00077 
00078                         # Exact match? No need to look further.
00079                         $title = Title::newFromText( $term );
00080                         if (is_null($title))
00081                                 return NULL;
00082 
00083                         if ( $title->getNamespace() == NS_SPECIAL || $title->isExternal() || $title->exists() ) {
00084                                 return $title;
00085                         }
00086                         
00087                         # See if it still otherwise has content is some sane sense
00088                         $article = MediaWiki::articleFromTitle( $title );
00089                         if( $article->hasViewableContent() ) {
00090                                 return $title;
00091                         }
00092 
00093                         # Now try all lower case (i.e. first letter capitalized)
00094                         #
00095                         $title = Title::newFromText( $wgContLang->lc( $term ) );
00096                         if ( $title && $title->exists() ) {
00097                                 return $title;
00098                         }
00099 
00100                         # Now try capitalized string
00101                         #
00102                         $title = Title::newFromText( $wgContLang->ucwords( $term ) );
00103                         if ( $title && $title->exists() ) {
00104                                 return $title;
00105                         }
00106 
00107                         # Now try all upper case
00108                         #
00109                         $title = Title::newFromText( $wgContLang->uc( $term ) );
00110                         if ( $title && $title->exists() ) {
00111                                 return $title;
00112                         }
00113 
00114                         # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc
00115                         $title = Title::newFromText( $wgContLang->ucwordbreaks($term) );
00116                         if ( $title && $title->exists() ) {
00117                                 return $title;
00118                         }
00119 
00120                         
00121                         $title = null;
00122                         if( !wfRunHooks( 'SearchGetNearMatch', array( $term, &$title ) ) ) {
00123                                 return $title;
00124                         }
00125                 }
00126 
00127                 $title = Title::newFromText( $searchterm );
00128 
00129                 # Entering an IP address goes to the contributions page
00130                 if ( ( $title->getNamespace() == NS_USER && User::isIP($title->getText() ) )
00131                         || User::isIP( trim( $searchterm ) ) ) {
00132                         return SpecialPage::getTitleFor( 'Contributions', $title->getDBkey() );
00133                 }
00134 
00135 
00136                 # Entering a user goes to the user page whether it's there or not
00137                 if ( $title->getNamespace() == NS_USER ) {
00138                         return $title;
00139                 }
00140 
00141                 # Go to images that exist even if there's no local page.
00142                 # There may have been a funny upload, or it may be on a shared
00143                 # file repository such as Wikimedia Commons.
00144                 if( $title->getNamespace() == NS_FILE ) {
00145                         $image = wfFindFile( $title );
00146                         if( $image ) {
00147                                 return $title;
00148                         }
00149                 }
00150 
00151                 # MediaWiki namespace? Page may be "implied" if not customized.
00152                 # Just return it, with caps forced as the message system likes it.
00153                 if( $title->getNamespace() == NS_MEDIAWIKI ) {
00154                         return Title::makeTitle( NS_MEDIAWIKI, $wgContLang->ucfirst( $title->getText() ) );
00155                 }
00156 
00157                 # Quoted term? Try without the quotes...
00158                 $matches = array();
00159                 if( preg_match( '/^"([^"]+)"$/', $searchterm, $matches ) ) {
00160                         return SearchEngine::getNearMatch( $matches[1] );
00161                 }
00162 
00163                 return NULL;
00164         }
00165 
00166         public static function legalSearchChars() {
00167                 return "A-Za-z_'.0-9\\x80-\\xFF\\-";
00168         }
00169 
00178         function setLimitOffset( $limit, $offset = 0 ) {
00179                 $this->limit = intval( $limit );
00180                 $this->offset = intval( $offset );
00181         }
00182 
00190         function setNamespaces( $namespaces ) {
00191                 $this->namespaces = $namespaces;
00192         }
00193 
00200         function replacePrefixes( $query ){
00201                 global $wgContLang;
00202 
00203                 if( strpos($query,':') === false )
00204                         return $query; 
00205 
00206                 $parsed = $query;
00207                 $allkeyword = wfMsgForContent('searchall').":";
00208                 if( strncmp($query, $allkeyword, strlen($allkeyword)) == 0 ){
00209                         $this->namespaces = null;
00210                         $parsed = substr($query,strlen($allkeyword));
00211                 } else if( strpos($query,':') !== false ) {
00212                         $prefix = substr($query,0,strpos($query,':'));
00213                         $index = $wgContLang->getNsIndex($prefix);
00214                         if($index !== false){
00215                                 $this->namespaces = array($index);
00216                                 $parsed = substr($query,strlen($prefix)+1);
00217                         }
00218                 }
00219                 if(trim($parsed) == '')
00220                         return $query; 
00221 
00222                 return $parsed;
00223         }
00224 
00229         public static function searchableNamespaces() {
00230                 global $wgContLang;
00231                 $arr = array();
00232                 foreach( $wgContLang->getNamespaces() as $ns => $name ) {
00233                         if( $ns >= NS_MAIN ) {
00234                                 $arr[$ns] = $name;
00235                         }
00236                 }
00237                 return $arr;
00238         }
00239         
00248         public static function userNamespaces( &$user ) {
00249                 $arr = array();
00250                 foreach( SearchEngine::searchableNamespaces() as $ns => $name ) {
00251                         if( $user->getOption( 'searchNs' . $ns ) ) {
00252                                 $arr[] = $ns;
00253                         }
00254                 }
00255                 return $arr;
00256         }
00257         
00265         public static function userHighlightPrefs( &$user ){
00266                 
00267                 
00268                 $contextlines = 2; 
00269                 $contextchars = 75; 
00270                 return array($contextlines, $contextchars);
00271         }
00272         
00279         public static function defaultNamespaces(){
00280                 global $wgNamespacesToBeSearchedDefault;
00281                 
00282                 return array_keys($wgNamespacesToBeSearchedDefault, true);
00283         }
00284         
00291         public static function namespacesAsText( $namespaces ){
00292                 global $wgContLang;
00293                 
00294                 $formatted = array_map( array($wgContLang,'getFormattedNsText'), $namespaces );
00295                 foreach( $formatted as $key => $ns ){
00296                         if ( empty($ns) )
00297                                 $formatted[$key] = wfMsg( 'blanknamespace' );
00298                 }
00299                 return $formatted;
00300         }
00301         
00309         public static function projectNamespaces() {
00310                 global $wgNamespacesToBeSearchedDefault, $wgNamespacesToBeSearchedProject;
00311                 
00312                 return array_keys( $wgNamespacesToBeSearchedProject, true );
00313         }
00314         
00322         public static function defaultAndProjectNamespaces() {
00323                 global $wgNamespacesToBeSearchedDefault, $wgNamespacesToBeSearchedProject;
00324                 
00325                 return array_keys( $wgNamespacesToBeSearchedDefault + 
00326                         $wgNamespacesToBeSearchedProject, true);
00327         }
00328         
00335         function filter( $text ) {
00336                 $lc = $this->legalSearchChars();
00337                 return trim( preg_replace( "/[^{$lc}]/", " ", $text ) );
00338         }
00345         public static function create() {
00346                 global $wgSearchType;
00347                 $dbr = wfGetDB( DB_SLAVE );
00348                 if( $wgSearchType ) {
00349                         $class = $wgSearchType;
00350                 } else {
00351                         $class = $dbr->getSearchEngine();
00352                 }
00353                 $search = new $class( $dbr );
00354                 $search->setLimitOffset(0,0);
00355                 return $search;
00356         }
00357 
00367         function update( $id, $title, $text ) {
00368                 
00369         }
00370 
00379         function updateTitle( $id, $title ) {
00380                 
00381         }
00382         
00389         public static function getOpenSearchTemplate() {
00390                 global $wgOpenSearchTemplate, $wgServer, $wgScriptPath;
00391                 if( $wgOpenSearchTemplate )     {       
00392                         return $wgOpenSearchTemplate;
00393                 } else { 
00394                         $ns = implode( '|', SearchEngine::defaultNamespaces() );
00395                         if( !$ns ) $ns = "0";
00396                         return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace='.$ns;
00397                 }
00398         }
00399         
00406         public static function getMWSuggestTemplate() {
00407                 global $wgMWSuggestTemplate, $wgServer, $wgScriptPath;
00408                 if($wgMWSuggestTemplate)                
00409                         return $wgMWSuggestTemplate;
00410                 else 
00411                         return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace={namespaces}&suggest';
00412         }
00413 }
00414 
00418 class SearchResultSet {
00427         function termMatches() {
00428                 return array();
00429         }
00430 
00431         function numRows() {
00432                 return 0;
00433         }
00434 
00440         function hasResults() {
00441                 return false;
00442         }
00443 
00455         function getTotalHits() {
00456                 return null;
00457         }
00458 
00466         function hasSuggestion() {
00467                 return false;
00468         }
00469 
00473         function getSuggestionQuery(){
00474                 return null;
00475         }
00476 
00480         function getSuggestionSnippet(){
00481                 return '';
00482         }
00483         
00490         function getInfo() {
00491                 return null;
00492         }
00493         
00499         function getInterwikiResults() {
00500                 return null;
00501         }
00502         
00508         function hasInterwikiResults() {
00509                 return $this->getInterwikiResults() != null;
00510         }
00511         
00512 
00519         function next() {
00520                 return false;
00521         }
00522 
00527         function free() {
00528                 
00529         }
00530 }
00531 
00532 
00536 class SearchResultTooMany {
00537         ## Some search engines may bail out if too many matches are found
00538 }
00539 
00540 
00547 class SearchResult {
00548         var $mRevision = null;
00549         var $mImage = null;
00550 
00551         function __construct( $row ) {
00552                 $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
00553                 if( !is_null($this->mTitle) ){
00554                         $this->mRevision = Revision::newFromTitle( $this->mTitle );
00555                         if( $this->mTitle->getNamespace() === NS_FILE )
00556                                 $this->mImage = wfFindFile( $this->mTitle );
00557                 }
00558         }
00559         
00566         function isBrokenTitle(){
00567                 if( is_null($this->mTitle) )
00568                         return true;
00569                 return false;
00570         }
00571         
00578         function isMissingRevision(){
00579                 return !$this->mRevision && !$this->mImage;
00580         }
00581 
00586         function getTitle() {
00587                 return $this->mTitle;
00588         }
00589 
00593         function getScore() {
00594                 return null;
00595         }
00596 
00600         protected function initText(){
00601                 if( !isset($this->mText) ){
00602                         if($this->mRevision != null)
00603                                 $this->mText = $this->mRevision->getText();
00604                         else 
00605                                 $this->mText = '';
00606                         
00607                 }
00608         }
00609         
00614         function getTextSnippet($terms){
00615                 global $wgUser, $wgAdvancedSearchHighlighting;
00616                 $this->initText();
00617                 list($contextlines,$contextchars) = SearchEngine::userHighlightPrefs($wgUser);
00618                 $h = new SearchHighlighter();
00619                 if( $wgAdvancedSearchHighlighting )
00620                         return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars );
00621                 else
00622                         return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars );
00623         }
00624         
00629         function getTitleSnippet($terms){
00630                 return '';
00631         }
00632 
00637         function getRedirectSnippet($terms){
00638                 return '';
00639         }
00640 
00644         function getRedirectTitle(){
00645                 return null;
00646         }
00647 
00651         function getSectionSnippet(){
00652                 return '';
00653         }
00654 
00658         function getSectionTitle(){
00659                 return null;
00660         }
00661 
00665         function getTimestamp(){
00666                 if( $this->mRevision )
00667                         return $this->mRevision->getTimestamp();
00668                 else if( $this->mImage )
00669                         return $this->mImage->getTimestamp();
00670                 return '';                      
00671         }
00672 
00676         function getWordCount(){
00677                 $this->initText();
00678                 return str_word_count( $this->mText );
00679         }
00680 
00684         function getByteSize(){
00685                 $this->initText();
00686                 return strlen( $this->mText );
00687         }
00688         
00692         function hasRelated(){
00693                 return false;
00694         }
00695         
00699         function getInterwikiPrefix(){
00700                 return '';
00701         }
00702 }
00703 
00709 class SearchHighlighter {       
00710         var $mCleanWikitext = true;
00711         
00712         function SearchHighlighter($cleanupWikitext = true){
00713                 $this->mCleanWikitext = $cleanupWikitext;
00714         }
00715         
00725         public function highlightText( $text, $terms, $contextlines, $contextchars ) {
00726                 global $wgLang, $wgContLang;
00727                 global $wgSearchHighlightBoundaries;
00728                 $fname = __METHOD__;
00729                 
00730                 if($text == '')
00731                         return '';
00732                                 
00733                 
00734                 $spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)";
00735                 
00736                 $endPatterns = array(
00737                         1 => '/(\{\{)|(\}\})/', 
00738                         2 => '/(\[\[)|(\]\])/', 
00739                         3 => "/(\n\\{\\|)|(\n\\|\\})/"); 
00740                          
00741                 
00742                 if(function_exists('wfCite')){
00743                         $spat .= '|(<ref>)'; 
00744                         $endPatterns[4] = '/(<ref>)|(<\/ref>)/';
00745                 }
00746                 $spat .= '/';
00747                 $textExt = array(); 
00748                 $otherExt = array();  
00749                 wfProfileIn( "$fname-split" );
00750                 $start = 0;
00751                 $textLen = strlen($text);
00752                 $count = 0; 
00753                 while( $start < $textLen ){
00754                         
00755                         if( preg_match( $spat, $text, $matches, PREG_OFFSET_CAPTURE, $start ) ){
00756                                 $epat = '';     
00757                                 foreach($matches as $key => $val){
00758                                         if($key > 0 && $val[1] != -1){
00759                                                 if($key == 2){
00760                                                         
00761                                                         $ns = substr($val[0],2,-1);
00762                                                         if( $wgContLang->getNsIndex($ns) != NS_FILE )
00763                                                                 break;
00764                                                         
00765                                                 }
00766                                                 $epat = $endPatterns[$key];
00767                                                 $this->splitAndAdd( $textExt, $count, substr( $text, $start, $val[1] - $start ) );                                              
00768                                                 $start = $val[1];
00769                                                 break;
00770                                         }
00771                                 }
00772                                 if( $epat ){
00773                                         
00774                                         $level = 0; 
00775                                         $offset = $start + 1;
00776                                         $found = false;
00777                                         while( preg_match( $epat, $text, $endMatches, PREG_OFFSET_CAPTURE, $offset ) ){
00778                                                 if( array_key_exists(2,$endMatches) ){
00779                                                         
00780                                                         if($level == 0){
00781                                                                 $len = strlen($endMatches[2][0]);
00782                                                                 $off = $endMatches[2][1];
00783                                                                 $this->splitAndAdd( $otherExt, $count, 
00784                                                                         substr( $text, $start, $off + $len  - $start ) );
00785                                                                 $start = $off + $len;
00786                                                                 $found = true;
00787                                                                 break;
00788                                                         } else{
00789                                                                 
00790                                                                 $level -= 1;
00791                                                         }
00792                                                 } else{
00793                                                         
00794                                                         $level += 1;
00795                                                 }
00796                                                 $offset = $endMatches[0][1] + strlen($endMatches[0][0]);
00797                                         }
00798                                         if( ! $found ){
00799                                                 
00800                                                 $this->splitAndAdd( $textExt, $count, substr( $text, $start, strlen($matches[0][0]) ) );
00801                                                 $start += strlen($matches[0][0]);
00802                                         }
00803                                         continue;
00804                                 }
00805                         }
00806                         
00807                         $this->splitAndAdd( $textExt, $count, substr($text,$start) );
00808                         break;
00809                 }
00810                 
00811                 $all = $textExt + $otherExt; 
00812                 
00813                 wfProfileOut( "$fname-split" );
00814                 
00815                 
00816                 foreach( $terms as $index => $term ) {
00817                         
00818                         if(preg_match('/[\x80-\xff]/', $term) ){
00819                                 $terms[$index] = preg_replace_callback('/./us',array($this,'caseCallback'),$terms[$index]);
00820                         } else {
00821                                 $terms[$index] = $term;
00822                         }
00823                 }
00824                 $anyterm = implode( '|', $terms );
00825                 $phrase = implode("$wgSearchHighlightBoundaries+", $terms );
00826 
00827                 
00828                 
00829                 
00830                 
00831                 $scale = strlen($anyterm) / mb_strlen($anyterm);
00832                 $contextchars = intval( $contextchars * $scale );
00833                 
00834                 $patPre = "(^|$wgSearchHighlightBoundaries)";
00835                 $patPost = "($wgSearchHighlightBoundaries|$)"; 
00836                 
00837                 $pat1 = "/(".$phrase.")/ui";
00838                 $pat2 = "/$patPre(".$anyterm.")$patPost/ui";
00839                 
00840                 wfProfileIn( "$fname-extract" );
00841                 
00842                 $left = $contextlines;
00843 
00844                 $snippets = array();
00845                 $offsets = array();             
00846                 
00847                 
00848                 $first = 0;
00849                 $firstText = '';
00850                 foreach($textExt as $index => $line){
00851                         if(strlen($line)>0 && $line[0] != ';' && $line[0] != ':'){
00852                                 $firstText = $this->extract( $line, 0, $contextchars * $contextlines );
00853                                 $first = $index;
00854                                 break;
00855                         }
00856                 }
00857                 if( $firstText ){
00858                         $succ = true;
00859                         
00860                         foreach($terms as $term){
00861                                 if( ! preg_match("/$patPre".$term."$patPost/ui", $firstText) ){
00862                                         $succ = false;
00863                                         break;
00864                                 }
00865                         }
00866                         if( $succ ){
00867                                 $snippets[$first] = $firstText;
00868                                 $offsets[$first] = 0; 
00869                         }
00870                 }
00871                 if( ! $snippets ) {             
00872                         
00873                         $this->process($pat1, $textExt, $left, $contextchars, $snippets, $offsets);
00874                         
00875                         $this->process($pat1, $otherExt, $left, $contextchars, $snippets, $offsets);
00876                         
00877                         $this->process($pat2, $textExt, $left, $contextchars, $snippets, $offsets);
00878                         
00879                         $this->process($pat2, $otherExt, $left, $contextchars, $snippets, $offsets);
00880                         
00881                         ksort($snippets);
00882                 }
00883                 
00884                 
00885                 $extended = array();                                            
00886                 if( count( $snippets ) == 0){
00887                         
00888                         $targetchars = $contextchars * $contextlines;
00889                         $snippets[$first] = '';
00890                         $offsets[$first] = 0;
00891                 } else{
00892                         
00893                         if( array_key_exists($first,$snippets) && preg_match($pat1,$snippets[$first]) 
00894                             && $offsets[$first] < $contextchars * 2 ){
00895                                 $snippets = array ($first => $snippets[$first]);
00896                         }
00897                         
00898                         
00899                         $targetchars = intval( ($contextchars * $contextlines) / count ( $snippets ) );
00900                 }  
00901 
00902                 foreach($snippets as $index => $line){
00903                         $extended[$index] = $line;
00904                         $len = strlen($line);
00905                         if( $len < $targetchars - 20 ){
00906                                 
00907                                 if($len < strlen( $all[$index] )){
00908                                         $extended[$index] = $this->extract( $all[$index], $offsets[$index], $offsets[$index]+$targetchars, $offsets[$index]);
00909                                         $len = strlen( $extended[$index] );
00910                                 }
00911                                 
00912                                 
00913                                 $add = $index + 1;
00914                                 while( $len < $targetchars - 20 
00915                                        && array_key_exists($add,$all) 
00916                                        && !array_key_exists($add,$snippets) ){
00917                                     $offsets[$add] = 0;
00918                                     $tt = "\n".$this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] );
00919                                         $extended[$add] = $tt;
00920                                         $len += strlen( $tt );
00921                                         $add++;                                         
00922                                 }
00923                         } 
00924                 }
00925                 
00926                 
00927                 $snippets = $extended;
00928                 $last = -1;
00929                 $extract = '';
00930                 foreach($snippets as $index => $line){
00931                         if($last == -1) 
00932                                 $extract .= $line; 
00933                         elseif($last+1 == $index && $offsets[$last]+strlen($snippets[$last]) >= strlen($all[$last]))
00934                                 $extract .= " ".$line; 
00935                         else
00936                                 $extract .= '<b> ... </b>' . $line;
00937 
00938                         $last = $index;
00939                 }
00940                 if( $extract )
00941                         $extract .= '<b> ... </b>';
00942                 
00943                 $processed = array();
00944                 foreach($terms as $term){
00945                         if( ! isset($processed[$term]) ){
00946                                 $pat3 = "/$patPre(".$term.")$patPost/ui"; 
00947                                 $extract = preg_replace( $pat3,
00948                                         "\\1<span class='searchmatch'>\\2</span>\\3", $extract );
00949                                 $processed[$term] = true;
00950                         }
00951                 }
00952                 
00953                 wfProfileOut( "$fname-extract" );
00954                 
00955                 return $extract;
00956         }
00957         
00965         function splitAndAdd(&$extracts, &$count, $text){
00966                 $split = explode( "\n", $this->mCleanWikitext? $this->removeWiki($text) : $text );
00967                 foreach($split as $line){
00968                         $tt = trim($line);
00969                         if( $tt )
00970                                 $extracts[$count++] = $tt;
00971                 }
00972         }
00973         
00979         function caseCallback($matches){
00980                 global $wgContLang;
00981                 if( strlen($matches[0]) > 1 ){
00982                         return '['.$wgContLang->lc($matches[0]).$wgContLang->uc($matches[0]).']';
00983                 } else
00984                         return $matches[0];
00985         }
00986         
00997         function extract($text, $start, $end, &$posStart = null, &$posEnd = null ){
00998                 global $wgContLang;             
00999                 
01000                 if( $start != 0)
01001                         $start = $this->position( $text, $start, 1 );
01002                 if( $end >= strlen($text) )
01003                         $end = strlen($text);
01004                 else
01005                         $end = $this->position( $text, $end );
01006                         
01007                 if(!is_null($posStart))
01008                         $posStart = $start;
01009                 if(!is_null($posEnd))
01010                         $posEnd = $end;
01011                 
01012                 if($end > $start)
01013                         return substr($text, $start, $end-$start);
01014                 else
01015                         return '';
01016         } 
01017         
01026         function position($text, $point, $offset=0 ){
01027                 $tolerance = 10;
01028                 $s = max( 0, $point - $tolerance );
01029                 $l = min( strlen($text), $point + $tolerance ) - $s;
01030                 $m = array();
01031                 if( preg_match('/[ ,.!?~!@#$%^&*\(\)+=\-\\\|\[\]"\'<>]/', substr($text,$s,$l), $m, PREG_OFFSET_CAPTURE ) ){
01032                         return $m[0][1] + $s + $offset;
01033                 } else{
01034                         
01035                         $char = ord( $text[$point] );
01036                         while( $char >= 0x80 && $char < 0xc0 ) {
01037                                 
01038                                 $point++;
01039                                 if($point >= strlen($text))
01040                                         return strlen($text);
01041                                 $char = ord( $text[$point] );
01042                         }
01043                         return $point;
01044                         
01045                 }
01046         }
01047         
01059         function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ){
01060                 if($linesleft == 0)
01061                         return; 
01062                 foreach($extracts as $index => $line){                  
01063                         if( array_key_exists($index,$out) )
01064                                 continue; 
01065                                 
01066                         $m = array();
01067                         if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) )
01068                                 continue;
01069                                 
01070                         $offset = $m[0][1];
01071                         $len = strlen($m[0][0]);
01072                         if($offset + $len < $contextchars)
01073                                 $begin = 0; 
01074                         elseif( $len > $contextchars)
01075                                 $begin = $offset;
01076                         else
01077                                 $begin = $offset + intval( ($len - $contextchars) / 2 );
01078                         
01079                         $end = $begin + $contextchars;
01080                         
01081                         $posBegin = $begin;
01082                         
01083                         $out[$index] = $this->extract($line,$begin,$end,$posBegin);
01084                         $offsets[$index] = $posBegin;
01085                         $linesleft--;                   
01086                         if($linesleft == 0)
01087                                 return;
01088                 }
01089         }
01090         
01095         function removeWiki($text) {
01096                 $fname = __METHOD__;
01097                 wfProfileIn( $fname );
01098                 
01099                 
01100                 
01101                 
01102                 
01103                 
01104                 
01105                 $text = preg_replace("/\\{\\{([^|]+?)\\}\\}/", "", $text);
01106                 $text = preg_replace("/\\{\\{([^|]+\\|)(.*?)\\}\\}/", "\\2", $text);
01107                 $text = preg_replace("/\\[\\[([^|]+?)\\]\\]/", "\\1", $text);           
01108                 $text = preg_replace_callback("/\\[\\[([^|]+\\|)(.*?)\\]\\]/", array($this,'linkReplace'), $text);
01109                 
01110                 $text = preg_replace("/<\/?[^>]+>/", "", $text);
01111                 $text = preg_replace("/'''''/", "", $text);
01112                 $text = preg_replace("/('''|<\/?[iIuUbB]>)/", "", $text);
01113                 $text = preg_replace("/''/", "", $text);
01114                 
01115                 wfProfileOut( $fname );
01116                 return $text;
01117         }
01118         
01125         function linkReplace($matches){
01126                 $colon = strpos( $matches[1], ':' ); 
01127                 if( $colon === false )
01128                         return $matches[2]; 
01129                 global $wgContLang;
01130                 $ns = substr( $matches[1], 0, $colon );
01131                 $index = $wgContLang->getNsIndex($ns);
01132                 if( $index !== false && ($index == NS_FILE || $index == NS_CATEGORY) )
01133                         return $matches[0]; 
01134                 else
01135                         return $matches[2];
01136                 
01137         }
01138 
01149     public function highlightSimple( $text, $terms, $contextlines, $contextchars ) {
01150         global $wgLang, $wgContLang;
01151         $fname = __METHOD__;
01152 
01153         $lines = explode( "\n", $text );
01154         
01155         $terms = implode( '|', $terms );
01156         $max = intval( $contextchars ) + 1;
01157         $pat1 = "/(.*)($terms)(.{0,$max})/i";
01158 
01159         $lineno = 0;
01160 
01161         $extract = "";
01162         wfProfileIn( "$fname-extract" );
01163         foreach ( $lines as $line ) {
01164             if ( 0 == $contextlines ) {
01165                 break;
01166             }
01167             ++$lineno;
01168             $m = array();
01169             if ( ! preg_match( $pat1, $line, $m ) ) {
01170                 continue;
01171             }
01172             --$contextlines;
01173             $pre = $wgContLang->truncate( $m[1], -$contextchars );
01174 
01175             if ( count( $m ) < 3 ) {
01176                 $post = '';
01177             } else {
01178                 $post = $wgContLang->truncate( $m[3], $contextchars );
01179             }
01180 
01181             $found = $m[2];
01182 
01183             $line = htmlspecialchars( $pre . $found . $post );
01184             $pat2 = '/(' . $terms . ")/i";
01185             $line = preg_replace( $pat2,
01186               "<span class='searchmatch'>\\1</span>", $line );
01187 
01188             $extract .= "${line}\n";
01189         }
01190         wfProfileOut( "$fname-extract" );
01191         
01192         return $extract;
01193     }
01194         
01195 }
01196 
01203 class SearchEngineDummy extends SearchEngine {
01204         
01205 }