00001 <?php 00002 00003 define( 'BATCH_SIZE', 200 ); 00004 00005 function populate_rev_parent_id( $db ) { 00006 wfOut( "Populating rev_parent_id column\n" ); 00007 $start = $db->selectField( 'revision', 'MIN(rev_id)', false, __FUNCTION__ ); 00008 $end = $db->selectField( 'revision', 'MAX(rev_id)', false, __FUNCTION__ ); 00009 if( is_null( $start ) || is_null( $end ) ){ 00010 wfOut( "...revision table seems to be empty.\n" ); 00011 $db->insert( 'updatelog', 00012 array( 'ul_key' => 'populate rev_parent_id' ), 00013 __FUNCTION__, 00014 'IGNORE' ); 00015 return; 00016 } 00017 # Do remaining chunk 00018 $end += BATCH_SIZE - 1; 00019 $blockStart = intval( $start ); 00020 $blockEnd = intval( $start ) + BATCH_SIZE - 1; 00021 $count = 0; 00022 $changed = 0; 00023 while( $blockEnd <= $end ) { 00024 wfOut( "...doing rev_id from $blockStart to $blockEnd\n" ); 00025 $cond = "rev_id BETWEEN $blockStart AND $blockEnd"; 00026 $res = $db->select( 'revision', 00027 array('rev_id','rev_page','rev_timestamp','rev_parent_id'), 00028 $cond, __FUNCTION__ ); 00029 # Go through and update rev_parent_id from these rows. 00030 # Assume that the previous revision of the title was 00031 # the original previous revision of the title when the 00032 # edit was made... 00033 foreach( $res as $row ) { 00034 # First, check rows with the same timestamp other than this one 00035 # with a smaller rev ID. The highest ID "wins". This avoids loops 00036 # as timestamp can only decrease and never loops with IDs (from parent to parent) 00037 $previousID = $db->selectField( 'revision', 'rev_id', 00038 array( 'rev_page' => $row->rev_page, 'rev_timestamp' => $row->rev_timestamp, 00039 "rev_id < " . intval( $row->rev_id ) ), 00040 __FUNCTION__, 00041 array( 'ORDER BY' => 'rev_id DESC' ) ); 00042 # If there are none, check the the highest ID with a lower timestamp 00043 if( !$previousID ) { 00044 # Get the highest older timestamp 00045 $lastTimestamp = $db->selectField( 'revision', 'rev_timestamp', 00046 array( 'rev_page' => $row->rev_page, "rev_timestamp < " . $db->addQuotes( $row->rev_timestamp ) ), 00047 __FUNCTION__, 00048 array( 'ORDER BY' => 'rev_timestamp DESC' ) ); 00049 # If there is one, let the highest rev ID win 00050 if( $lastTimestamp ) { 00051 $previousID = $db->selectField( 'revision', 'rev_id', 00052 array( 'rev_page' => $row->rev_page, 'rev_timestamp' => $lastTimestamp ), 00053 __FUNCTION__, 00054 array( 'ORDER BY' => 'rev_id DESC' ) ); 00055 } 00056 } 00057 $previousID = intval($previousID); 00058 if( $previousID != $row->rev_parent_id ) 00059 $changed++; 00060 # Update the row... 00061 $db->update( 'revision', 00062 array( 'rev_parent_id' => $previousID ), 00063 array( 'rev_id' => $row->rev_id ), 00064 __FUNCTION__ ); 00065 $count++; 00066 } 00067 $blockStart += BATCH_SIZE - 1; 00068 $blockEnd += BATCH_SIZE - 1; 00069 wfWaitForSlaves( 5 ); 00070 } 00071 $logged = $db->insert( 'updatelog', 00072 array( 'ul_key' => 'populate rev_parent_id' ), 00073 __FUNCTION__, 00074 'IGNORE' ); 00075 if( $logged ) { 00076 wfOut( "rev_parent_id population complete ... {$count} rows [{$changed} changed]\n" ); 00077 return true; 00078 } else { 00079 wfOut( "Could not insert rev_parent_id population row.\n" ); 00080 return false; 00081 } 00082 } 00083