00001 <?php
00002
00003 define( 'BATCH_SIZE', 200 );
00004
00005 function populate_rev_parent_id( $db ) {
00006 wfOut( "Populating rev_parent_id column\n" );
00007 $start = $db->selectField( 'revision', 'MIN(rev_id)', false, __FUNCTION__ );
00008 $end = $db->selectField( 'revision', 'MAX(rev_id)', false, __FUNCTION__ );
00009 if( is_null( $start ) || is_null( $end ) ){
00010 wfOut( "...revision table seems to be empty.\n" );
00011 $db->insert( 'updatelog',
00012 array( 'ul_key' => 'populate rev_parent_id' ),
00013 __FUNCTION__,
00014 'IGNORE' );
00015 return;
00016 }
00017 # Do remaining chunk
00018 $end += BATCH_SIZE - 1;
00019 $blockStart = intval( $start );
00020 $blockEnd = intval( $start ) + BATCH_SIZE - 1;
00021 $count = 0;
00022 $changed = 0;
00023 while( $blockEnd <= $end ) {
00024 wfOut( "...doing rev_id from $blockStart to $blockEnd\n" );
00025 $cond = "rev_id BETWEEN $blockStart AND $blockEnd";
00026 $res = $db->select( 'revision',
00027 array('rev_id','rev_page','rev_timestamp','rev_parent_id'),
00028 $cond, __FUNCTION__ );
00029 # Go through and update rev_parent_id from these rows.
00030 # Assume that the previous revision of the title was
00031 # the original previous revision of the title when the
00032 # edit was made...
00033 foreach( $res as $row ) {
00034 # First, check rows with the same timestamp other than this one
00035 # with a smaller rev ID. The highest ID "wins". This avoids loops
00036 # as timestamp can only decrease and never loops with IDs (from parent to parent)
00037 $previousID = $db->selectField( 'revision', 'rev_id',
00038 array( 'rev_page' => $row->rev_page, 'rev_timestamp' => $row->rev_timestamp,
00039 "rev_id < " . intval( $row->rev_id ) ),
00040 __FUNCTION__,
00041 array( 'ORDER BY' => 'rev_id DESC' ) );
00042 # If there are none, check the the highest ID with a lower timestamp
00043 if( !$previousID ) {
00044 # Get the highest older timestamp
00045 $lastTimestamp = $db->selectField( 'revision', 'rev_timestamp',
00046 array( 'rev_page' => $row->rev_page, "rev_timestamp < " . $db->addQuotes( $row->rev_timestamp ) ),
00047 __FUNCTION__,
00048 array( 'ORDER BY' => 'rev_timestamp DESC' ) );
00049 # If there is one, let the highest rev ID win
00050 if( $lastTimestamp ) {
00051 $previousID = $db->selectField( 'revision', 'rev_id',
00052 array( 'rev_page' => $row->rev_page, 'rev_timestamp' => $lastTimestamp ),
00053 __FUNCTION__,
00054 array( 'ORDER BY' => 'rev_id DESC' ) );
00055 }
00056 }
00057 $previousID = intval($previousID);
00058 if( $previousID != $row->rev_parent_id )
00059 $changed++;
00060 # Update the row...
00061 $db->update( 'revision',
00062 array( 'rev_parent_id' => $previousID ),
00063 array( 'rev_id' => $row->rev_id ),
00064 __FUNCTION__ );
00065 $count++;
00066 }
00067 $blockStart += BATCH_SIZE - 1;
00068 $blockEnd += BATCH_SIZE - 1;
00069 wfWaitForSlaves( 5 );
00070 }
00071 $logged = $db->insert( 'updatelog',
00072 array( 'ul_key' => 'populate rev_parent_id' ),
00073 __FUNCTION__,
00074 'IGNORE' );
00075 if( $logged ) {
00076 wfOut( "rev_parent_id population complete ... {$count} rows [{$changed} changed]\n" );
00077 return true;
00078 } else {
00079 wfOut( "Could not insert rev_parent_id population row.\n" );
00080 return false;
00081 }
00082 }
00083