00001 <?php
00008 function refreshLinks( $start, $newOnly = false, $maxLag = false, $end = 0, $redirectsOnly = false, $oldRedirectsOnly = false ) {
00009         global $wgUser, $wgParser, $wgUseTidy;
00010 
00011         $reportingInterval = 100;
00012         $fname = 'refreshLinks';
00013         $dbr = wfGetDB( DB_SLAVE );
00014         $start = intval( $start );
00015 
00016         # Don't generate TeX PNGs (lack of a sensible current directory causes errors anyway)
00017         $wgUser->setOption('math', MW_MATH_SOURCE);
00018 
00019         # Don't generate extension images (e.g. Timeline)
00020         if( method_exists( $wgParser, "clearTagHooks" ) ) {
00021                 $wgParser->clearTagHooks();
00022         }
00023 
00024         # Don't use HTML tidy
00025         $wgUseTidy = false;
00026 
00027         $what = $redirectsOnly ? "redirects" : "links";
00028 
00029         if( $oldRedirectsOnly ) {
00030                 # This entire code path is cut-and-pasted from below.  Hurrah.
00031                 $res = $dbr->query(
00032                         "SELECT page_id ".
00033                         "FROM page ".
00034                         "LEFT JOIN redirect ON page_id=rd_from ".
00035                         "WHERE page_is_redirect=1 AND rd_from IS NULL AND ".
00036                         ($end == 0 ? "page_id >= $start"
00037                                    : "page_id BETWEEN $start AND $end"),
00038                         $fname
00039                 );
00040                 $num = $dbr->numRows( $res );
00041                 print "Refreshing $num old redirects from $start...\n";
00042 
00043                 while( $row = $dbr->fetchObject( $res ) ) {
00044                         if ( !( ++$i % $reportingInterval ) ) {
00045                                 print "$i\n";
00046                                 wfWaitForSlaves( $maxLag );
00047                         }
00048                         fixRedirect( $row->page_id );
00049                 }
00050         } elseif( $newOnly ) {
00051                 print "Refreshing $what from ";
00052                 $res = $dbr->select( 'page',
00053                         array( 'page_id' ),
00054                         array(
00055                                 'page_is_new' => 1,
00056                                 "page_id >= $start" ),
00057                         $fname
00058                 );
00059                 $num = $dbr->numRows( $res );
00060                 print "$num new articles...\n";
00061 
00062                 $i = 0;
00063                 while ( $row = $dbr->fetchObject( $res ) ) {
00064                         if ( !( ++$i % $reportingInterval ) ) {
00065                                 print "$i\n";
00066                                 wfWaitForSlaves( $maxLag );
00067                         }
00068                         if($redirectsOnly)
00069                                 fixRedirect( $row->page_id );
00070                         else
00071                                 fixLinksFromArticle( $row->page_id );
00072                 }
00073         } else {
00074                 print "Refreshing $what table.\n";
00075                 if ( !$end ) {
00076                         $end = $dbr->selectField( 'page', 'max(page_id)', false );
00077                 }
00078                 print("Starting from page_id $start of $end.\n");
00079 
00080                 for ($id = $start; $id <= $end; $id++) {
00081 
00082                         if ( !($id % $reportingInterval) ) {
00083                                 print "$id\n";
00084                                 wfWaitForSlaves( $maxLag );
00085                         }
00086                         if($redirectsOnly)
00087                                 fixRedirect( $id );
00088                         else
00089                                 fixLinksFromArticle( $id );
00090                 }
00091         }
00092 }
00093 
00094 function fixRedirect( $id ){
00095         global $wgTitle, $wgArticle;
00096 
00097         $wgTitle = Title::newFromID( $id );
00098         $dbw = wfGetDB( DB_MASTER );
00099 
00100         if ( is_null( $wgTitle ) ) {
00101                 return;
00102         }
00103         $wgArticle = new Article($wgTitle);
00104 
00105         $rt = $wgArticle->followRedirect();
00106 
00107         if($rt == false || !is_object($rt))
00108                 return;
00109 
00110         $wgArticle->updateRedirectOn($dbw,$rt);
00111 }
00112 
00113 function fixLinksFromArticle( $id ) {
00114         global $wgTitle, $wgParser;
00115 
00116         $wgTitle = Title::newFromID( $id );
00117         $dbw = wfGetDB( DB_MASTER );
00118 
00119         $linkCache =& LinkCache::singleton();
00120         $linkCache->clear();
00121 
00122         if ( is_null( $wgTitle ) ) {
00123                 return;
00124         }
00125         $dbw->begin();
00126 
00127         $revision = Revision::newFromTitle( $wgTitle );
00128         if ( !$revision ) {
00129                 return;
00130         }
00131 
00132         $options = new ParserOptions;
00133         $parserOutput = $wgParser->parse( $revision->getText(), $wgTitle, $options, true, true, $revision->getId() );
00134         $update = new LinksUpdate( $wgTitle, $parserOutput, false );
00135         $update->doUpdate();
00136         $dbw->immediateCommit();
00137 }
00138 
00139 
00140 
00141 
00142 
00143 
00144 
00145 
00146 
00147 
00148 function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) {
00149         wfWaitForSlaves( $maxLag );
00150         
00151         $dbw = wfGetDB( DB_MASTER );
00152 
00153         $lb = wfGetLBFactory()->newMainLB();
00154         $dbr = $lb->getConnection( DB_SLAVE );
00155         $dbr->bufferResults( false );
00156         
00157         $linksTables = array( 
00158                 'pagelinks' => 'pl_from',
00159                 'imagelinks' => 'il_from',
00160                 'categorylinks' => 'cl_from',
00161                 'templatelinks' => 'tl_from',
00162                 'externallinks' => 'el_from',
00163         );
00164         
00165         foreach ( $linksTables as $table => $field ) {
00166                 print "Retrieving illegal entries from $table... ";
00167                 
00168                 
00169                 $results = $dbr->select( array( $table, 'page' ),
00170                               $field,
00171                               array('page_id' => null ),
00172                               __METHOD__,
00173                               'DISTINCT',
00174                               array( 'page' => array( 'LEFT JOIN', "$field=page_id"))
00175                 );
00176                 
00177                 $counter = 0;
00178                 $list = array();
00179                 print "0..";
00180                 
00181                 foreach( $results as $row ) {
00182                         $counter++;
00183                         $list[] = $row->$field;
00184                         if ( ( $counter % $batchSize ) == 0 ) {
00185                                 wfWaitForSlaves(5);
00186                                 $dbw->delete( $table, array( $field => $list ), __METHOD__ );
00187                                 
00188                                 print $counter . "..";
00189                                 $list = array();
00190                         }
00191                 }
00192                 
00193                 print $counter;
00194                 if (count($list) > 0) {
00195                         $dbw->delete( $table, array( $field => $list ), __METHOD__ );
00196                 }
00197                 
00198                 print "\n";
00199         }
00200         
00201         $lb->closeAll();
00202 }