00001 <?php
00002 # Copyright (C) 2005 Brion Vibber <brion@pobox.com>
00003 # http://www.mediawiki.org/
00004 #
00005 # This program is free software; you can redistribute it and/or modify
00006 # it under the terms of the GNU General Public License as published by
00007 # the Free Software Foundation; either version 2 of the License, or
00008 # (at your option) any later version.
00009 #
00010 # This program is distributed in the hope that it will be useful,
00011 # but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00013 # GNU General Public License for more details.
00014 #
00015 # You should have received a copy of the GNU General Public License along
00016 # with this program; if not, write to the Free Software Foundation, Inc.,
00017 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00018 # http://www.gnu.org/copyleft/gpl.html
00019
00031 $options = array( 'fix' );
00032
00034 require_once( 'commandLine.inc' );
00035 $wgTitle = Title::newFromText( 'Orphan revision cleanup script' );
00036
00037 checkOrphans( isset( $options['fix'] ) );
00038 checkSeparation( isset( $options['fix'] ) );
00039 #checkWidows( isset( $options['fix'] ) );
00040
00041 # ------
00042
00043 function checkOrphans( $fix ) {
00044 $dbw = wfGetDB( DB_MASTER );
00045 $page = $dbw->tableName( 'page' );
00046 $revision = $dbw->tableName( 'revision' );
00047
00048 if( $fix ) {
00049 $dbw->query( "LOCK TABLES $page WRITE, $revision WRITE" );
00050 }
00051
00052 echo "Checking for orphan revision table entries... (this may take a while on a large wiki)\n";
00053 $result = $dbw->query( "
00054 SELECT *
00055 FROM $revision LEFT OUTER JOIN $page ON rev_page=page_id
00056 WHERE page_id IS NULL
00057 ");
00058 $orphans = $dbw->numRows( $result );
00059 if( $orphans > 0 ) {
00060 global $wgContLang;
00061 echo "$orphans orphan revisions...\n";
00062 printf( "%10s %10s %14s %20s %s\n", 'rev_id', 'rev_page', 'rev_timestamp', 'rev_user_text', 'rev_comment' );
00063 while( $row = $dbw->fetchObject( $result ) ) {
00064 $comment = ( $row->rev_comment == '' )
00065 ? ''
00066 : '(' . $wgContLang->truncate( $row->rev_comment, 40 ) . ')';
00067 printf( "%10d %10d %14s %20s %s\n",
00068 $row->rev_id,
00069 $row->rev_page,
00070 $row->rev_timestamp,
00071 $wgContLang->truncate( $row->rev_user_text, 17 ),
00072 $comment );
00073 if( $fix ) {
00074 $dbw->delete( 'revision', array( 'rev_id' => $row->rev_id ) );
00075 }
00076 }
00077 if( !$fix ) {
00078 echo "Run again with --fix to remove these entries automatically.\n";
00079 }
00080 } else {
00081 echo "No orphans! Yay!\n";
00082 }
00083
00084 if( $fix ) {
00085 $dbw->query( "UNLOCK TABLES" );
00086 }
00087 }
00088
00094 function checkWidows( $fix ) {
00095 $dbw = wfGetDB( DB_MASTER );
00096 $page = $dbw->tableName( 'page' );
00097 $revision = $dbw->tableName( 'revision' );
00098
00099 if( $fix ) {
00100 $dbw->query( "LOCK TABLES $page WRITE, $revision WRITE" );
00101 }
00102
00103 echo "\nChecking for childless page table entries... (this may take a while on a large wiki)\n";
00104 $result = $dbw->query( "
00105 SELECT *
00106 FROM $page LEFT OUTER JOIN $revision ON page_latest=rev_id
00107 WHERE rev_id IS NULL
00108 ");
00109 $widows = $dbw->numRows( $result );
00110 if( $widows > 0 ) {
00111 global $wgContLang;
00112 echo "$widows childless pages...\n";
00113 printf( "%10s %11s %2s %s\n", 'page_id', 'page_latest', 'ns', 'page_title' );
00114 while( $row = $dbw->fetchObject( $result ) ) {
00115 printf( "%10d %11d %2d %s\n",
00116 $row->page_id,
00117 $row->page_latest,
00118 $row->page_namespace,
00119 $row->page_title );
00120 if( $fix ) {
00121 $dbw->delete( 'page', array( 'page_id' => $row->page_id ) );
00122 }
00123 }
00124 if( !$fix ) {
00125 echo "Run again with --fix to remove these entries automatically.\n";
00126 }
00127 } else {
00128 echo "No childless pages! Yay!\n";
00129 }
00130
00131 if( $fix ) {
00132 $dbw->query( "UNLOCK TABLES" );
00133 }
00134 }
00135
00136
00137 function checkSeparation( $fix ) {
00138 $dbw = wfGetDB( DB_MASTER );
00139 $page = $dbw->tableName( 'page' );
00140 $revision = $dbw->tableName( 'revision' );
00141 $text = $dbw->tableName( 'text' );
00142
00143 if( $fix ) {
00144 $dbw->query( "LOCK TABLES $page WRITE, $revision WRITE, $text WRITE" );
00145 }
00146
00147 echo "\nChecking for pages whose page_latest links are incorrect... (this may take a while on a large wiki)\n";
00148 $result = $dbw->query( "
00149 SELECT *
00150 FROM $page LEFT OUTER JOIN $revision ON page_latest=rev_id
00151 ");
00152 $found = 0;
00153 while( $row = $dbw->fetchObject( $result ) ) {
00154 $result2 = $dbw->query( "
00155 SELECT MAX(rev_timestamp) as max_timestamp
00156 FROM $revision
00157 WHERE rev_page=$row->page_id
00158 " );
00159 $row2 = $dbw->fetchObject( $result2 );
00160 $dbw->freeResult( $result2 );
00161 if( $row2 ) {
00162 if( $row->rev_timestamp != $row2->max_timestamp ) {
00163 if( $found == 0 ) {
00164 printf( "%10s %10s %14s %14s\n",
00165 'page_id', 'rev_id', 'timestamp', 'max timestamp' );
00166 }
00167 ++$found;
00168 printf( "%10d %10d %14s %14s\n",
00169 $row->page_id,
00170 $row->page_latest,
00171 $row->rev_timestamp,
00172 $row2->max_timestamp );
00173 if( $fix ) {
00174 # ...
00175 $maxId = $dbw->selectField(
00176 'revision',
00177 'rev_id',
00178 array(
00179 'rev_page' => $row->page_id,
00180 'rev_timestamp' => $row2->max_timestamp ) );
00181 echo "... updating to revision $maxId\n";
00182 $maxRev = Revision::newFromId( $maxId );
00183 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
00184 $article = new Article( $title );
00185 $article->updateRevisionOn( $dbw, $maxRev );
00186 }
00187 }
00188 } else {
00189 echo "wtf\n";
00190 }
00191 }
00192
00193 if( $found ) {
00194 echo "Found $found pages with incorrect latest revision.\n";
00195 } else {
00196 echo "No pages with incorrect latest revision. Yay!\n";
00197 }
00198 if( !$fix && $found > 0 ) {
00199 echo "Run again with --fix to remove these entries automatically.\n";
00200 }
00201
00202 if( $fix ) {
00203 $dbw->query( "UNLOCK TABLES" );
00204 }
00205 }
00206