00001 <?php
00002 # Copyright (C) 2005 Brion Vibber <brion@pobox.com>
00003 # http://www.mediawiki.org/
00004 #
00005 # This program is free software; you can redistribute it and/or modify
00006 # it under the terms of the GNU General Public License as published by
00007 # the Free Software Foundation; either version 2 of the License, or
00008 # (at your option) any later version.
00009 #
00010 # This program is distributed in the hope that it will be useful,
00011 # but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00013 # GNU General Public License for more details.
00014 #
00015 # You should have received a copy of the GNU General Public License along
00016 # with this program; if not, write to the Free Software Foundation, Inc.,
00017 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00018 # http://www.gnu.org/copyleft/gpl.html
00019
00029 class UserDupes {
00030 var $db;
00031 var $reassigned;
00032 var $trimmed;
00033 var $failed;
00034
00035 function UserDupes( &$database ) {
00036 $this->db =& $database;
00037 }
00038
00044 function hasUniqueIndex() {
00045 $fname = 'UserDupes::hasUniqueIndex';
00046 $info = $this->db->indexInfo( 'user', 'user_name', $fname );
00047 if( !$info ) {
00048 wfOut( "WARNING: doesn't seem to have user_name index at all!\n" );
00049 return false;
00050 }
00051
00052 # Confusingly, 'Non_unique' is 0 for *unique* indexes,
00053 # and 1 for *non-unique* indexes. Pass the crack, MySQL,
00054 # it's obviously some good stuff!
00055 return ( $info[0]->Non_unique == 0 );
00056 }
00057
00069 function clearDupes() {
00070 return $this->checkDupes( true );
00071 }
00072
00087 function checkDupes( $doDelete = false ) {
00088 if( $this->hasUniqueIndex() ) {
00089 echo wfWikiID()." already has a unique index on its user table.\n";
00090 return true;
00091 }
00092
00093 $this->lock();
00094
00095 wfOut( "Checking for duplicate accounts...\n" );
00096 $dupes = $this->getDupes();
00097 $count = count( $dupes );
00098
00099 wfOut( "Found $count accounts with duplicate records on ".wfWikiID().".\n" );
00100 $this->trimmed = 0;
00101 $this->reassigned = 0;
00102 $this->failed = 0;
00103 foreach( $dupes as $name ) {
00104 $this->examine( $name, $doDelete );
00105 }
00106
00107 $this->unlock();
00108
00109 wfOut( "\n" );
00110
00111 if( $this->reassigned > 0 ) {
00112 if( $doDelete ) {
00113 wfOut( "$this->reassigned duplicate accounts had edits reassigned to a canonical record id.\n" );
00114 } else {
00115 wfOut( "$this->reassigned duplicate accounts need to have edits reassigned.\n" );
00116 }
00117 }
00118
00119 if( $this->trimmed > 0 ) {
00120 if( $doDelete ) {
00121 wfOut( "$this->trimmed duplicate user records were deleted from ".wfWikiID().".\n" );
00122 } else {
00123 wfOut( "$this->trimmed duplicate user accounts were found on ".wfWikiID()." which can be removed safely.\n" );
00124 }
00125 }
00126
00127 if( $this->failed > 0 ) {
00128 wfOut( "Something terribly awry; $this->failed duplicate accounts were not removed.\n" );
00129 return false;
00130 }
00131
00132 if( $this->trimmed == 0 || $doDelete ) {
00133 wfOut( "It is now safe to apply the unique index on user_name.\n" );
00134 return true;
00135 } else {
00136 wfOut( "Run this script again with the --fix option to automatically delete them.\n" );
00137 return false;
00138 }
00139 }
00140
00145 function lock() {
00146 $fname = 'UserDupes::lock';
00147 if( $this->newSchema() ) {
00148 $set = array( 'user', 'revision' );
00149 } else {
00150 $set = array( 'user', 'cur', 'old' );
00151 }
00152 $names = array_map( array( $this, 'lockTable' ), $set );
00153 $tables = implode( ',', $names );
00154
00155 $this->db->query( "LOCK TABLES $tables", $fname );
00156 }
00157
00158 function lockTable( $table ) {
00159 return $this->db->tableName( $table ) . ' WRITE';
00160 }
00161
00166 function newSchema() {
00167 return class_exists( 'Revision' );
00168 }
00169
00173 function unlock() {
00174 $fname = 'UserDupes::unlock';
00175 $this->db->query( "UNLOCK TABLES", $fname );
00176 }
00177
00183 function getDupes() {
00184 $fname = 'UserDupes::listDupes';
00185 $user = $this->db->tableName( 'user' );
00186 $result = $this->db->query(
00187 "SELECT user_name,COUNT(*) AS n
00188 FROM $user
00189 GROUP BY user_name
00190 HAVING n > 1", $fname );
00191
00192 $list = array();
00193 while( $row = $this->db->fetchObject( $result ) ) {
00194 $list[] = $row->user_name;
00195 }
00196 $this->db->freeResult( $result );
00197
00198 return $list;
00199 }
00200
00209 function examine( $name, $doDelete ) {
00210 $fname = 'UserDupes::listDupes';
00211 $result = $this->db->select( 'user',
00212 array( 'user_id' ),
00213 array( 'user_name' => $name ),
00214 $fname );
00215
00216 $firstRow = $this->db->fetchObject( $result );
00217 $firstId = $firstRow->user_id;
00218 wfOut( "Record that will be used for '$name' is user_id=$firstId\n" );
00219
00220 while( $row = $this->db->fetchObject( $result ) ) {
00221 $dupeId = $row->user_id;
00222 wfOut( "... dupe id $dupeId: " );
00223 $edits = $this->editCount( $dupeId );
00224 if( $edits > 0 ) {
00225 $this->reassigned++;
00226 wfOut( "has $edits edits! " );
00227 if( $doDelete ) {
00228 $this->reassignEdits( $dupeId, $firstId );
00229 $newEdits = $this->editCount( $dupeId );
00230 if( $newEdits == 0 ) {
00231 wfOut( "confirmed cleaned. " );
00232 } else {
00233 $this->failed++;
00234 wfOut( "WARNING! $newEdits remaining edits for $dupeId; NOT deleting user.\n" );
00235 continue;
00236 }
00237 } else {
00238 wfOut( "(will need to reassign edits on fix)" );
00239 }
00240 } else {
00241 wfOut( "ok, no edits. " );
00242 }
00243 $this->trimmed++;
00244 if( $doDelete ) {
00245 $this->trimAccount( $dupeId );
00246 }
00247 wfOut( "\n" );
00248 }
00249 $this->db->freeResult( $result );
00250 }
00251
00260 function editCount( $userid ) {
00261 if( $this->newSchema() ) {
00262 return $this->editCountOn( 'revision', 'rev_user', $userid );
00263 } else {
00264 return $this->editCountOn( 'cur', 'cur_user', $userid ) +
00265 $this->editCountOn( 'old', 'old_user', $userid );
00266 }
00267 }
00268
00277 function editCountOn( $table, $field, $userid ) {
00278 $fname = 'UserDupes::editCountOn';
00279 return intval( $this->db->selectField(
00280 $table,
00281 'COUNT(*)',
00282 array( $field => $userid ),
00283 $fname ) );
00284 }
00285
00291 function reassignEdits( $from, $to ) {
00292 $set = $this->newSchema()
00293 ? array( 'revision' => 'rev_user' )
00294 : array( 'cur' => 'cur_user', 'old' => 'old_user' );
00295 foreach( $set as $table => $field ) {
00296 $this->reassignEditsOn( $table, $field, $from, $to );
00297 }
00298 }
00299
00307 function reassignEditsOn( $table, $field, $from, $to ) {
00308 $fname = 'UserDupes::reassignEditsOn';
00309 wfOut( "reassigning on $table... " );
00310 $this->db->update( $table,
00311 array( $field => $to ),
00312 array( $field => $from ),
00313 $fname );
00314 wfOut( "ok. " );
00315 }
00316
00322 function trimAccount( $userid ) {
00323 $fname = 'UserDupes::trimAccount';
00324 wfOut( "deleting..." );
00325 $this->db->delete( 'user', array( 'user_id' => $userid ), $fname );
00326 wfOut( " ok" );
00327 }
00328
00329 }