00001 <?php
00028 class DumpDBZip2Output extends DumpPipeOutput {
00029 function DumpDBZip2Output( $file ) {
00030 parent::DumpPipeOutput( "dbzip2", $file );
00031 }
00032 }
00033
00037 class BackupDumper {
00038 var $reportingInterval = 100;
00039 var $reporting = true;
00040 var $pageCount = 0;
00041 var $revCount = 0;
00042 var $server = null;
00043 var $pages = null;
00044 var $skipHeader = false;
00045 var $skipFooter = false;
00046 var $startId = 0;
00047 var $endId = 0;
00048 var $sink = null;
00049 var $stubText = false;
00050 var $dumpUploads = false;
00051
00052 function BackupDumper( $args ) {
00053 $this->stderr = fopen( "php://stderr", "wt" );
00054
00055
00056 $this->registerOutput( 'file', 'DumpFileOutput' );
00057 $this->registerOutput( 'gzip', 'DumpGZipOutput' );
00058 $this->registerOutput( 'bzip2', 'DumpBZip2Output' );
00059 $this->registerOutput( 'dbzip2', 'DumpDBZip2Output' );
00060 $this->registerOutput( '7zip', 'Dump7ZipOutput' );
00061
00062 $this->registerFilter( 'latest', 'DumpLatestFilter' );
00063 $this->registerFilter( 'notalk', 'DumpNotalkFilter' );
00064 $this->registerFilter( 'namespace', 'DumpNamespaceFilter' );
00065
00066 $this->sink = $this->processArgs( $args );
00067 }
00068
00073 function registerOutput( $name, $class ) {
00074 $this->outputTypes[$name] = $class;
00075 }
00076
00081 function registerFilter( $name, $class ) {
00082 $this->filterTypes[$name] = $class;
00083 }
00084
00091 function loadPlugin( $class, $file ) {
00092 if( $file != '' ) {
00093 require_once( $file );
00094 }
00095 $register = array( $class, 'register' );
00096 call_user_func_array( $register, array( &$this ) );
00097 }
00098
00104 function processArgs( $args ) {
00105 $sink = null;
00106 $sinks = array();
00107 foreach( $args as $arg ) {
00108 $matches = array();
00109 if( preg_match( '/^--(.+?)(?:=(.+?)(?::(.+?))?)?$/', $arg, $matches ) ) {
00110 @list( , $opt, $val, $param ) = $matches;
00111 switch( $opt ) {
00112 case "plugin":
00113 $this->loadPlugin( $val, $param );
00114 break;
00115 case "output":
00116 if( !is_null( $sink ) ) {
00117 $sinks[] = $sink;
00118 }
00119 if( !isset( $this->outputTypes[$val] ) ) {
00120 wfDie( "Unrecognized output sink type '$val'\n" );
00121 }
00122 $type = $this->outputTypes[$val];
00123 $sink = new $type( $param );
00124 break;
00125 case "filter":
00126 if( is_null( $sink ) ) {
00127 $this->progress( "Warning: assuming stdout for filter output\n" );
00128 $sink = new DumpOutput();
00129 }
00130 if( !isset( $this->filterTypes[$val] ) ) {
00131 wfDie( "Unrecognized filter type '$val'\n" );
00132 }
00133 $type = $this->filterTypes[$val];
00134 $filter = new $type( $sink, $param );
00135
00136
00137 unset( $sink );
00138 $sink = $filter;
00139
00140 break;
00141 case "report":
00142 $this->reportingInterval = intval( $val );
00143 break;
00144 case "server":
00145 $this->server = $val;
00146 break;
00147 case "force-normal":
00148 if( !function_exists( 'utf8_normalize' ) ) {
00149 dl( "php_utfnormal.so" );
00150 if( !function_exists( 'utf8_normalize' ) ) {
00151 wfDie( "Failed to load UTF-8 normalization extension. " .
00152 "Install or remove --force-normal parameter to use slower code.\n" );
00153 }
00154 }
00155 break;
00156 default:
00157 $this->processOption( $opt, $val, $param );
00158 }
00159 }
00160 }
00161
00162 if( is_null( $sink ) ) {
00163 $sink = new DumpOutput();
00164 }
00165 $sinks[] = $sink;
00166
00167 if( count( $sinks ) > 1 ) {
00168 return new DumpMultiWriter( $sinks );
00169 } else {
00170 return $sink;
00171 }
00172 }
00173
00174 function processOption( $opt, $val, $param ) {
00175
00176 }
00177
00178 function dump( $history, $text = WikiExporter::TEXT ) {
00179 # Notice messages will foul up your XML output even if they're
00180 # relatively harmless.
00181 if( ini_get( 'display_errors' ) )
00182 ini_set( 'display_errors', 'stderr' );
00183
00184 $this->initProgress( $history );
00185
00186 $db = $this->backupDb();
00187 $exporter = new WikiExporter( $db, $history, WikiExporter::STREAM, $text );
00188 $exporter->dumpUploads = $this->dumpUploads;
00189
00190 $wrapper = new ExportProgressFilter( $this->sink, $this );
00191 $exporter->setOutputSink( $wrapper );
00192
00193 if( !$this->skipHeader )
00194 $exporter->openStream();
00195 # Log item dumps: all or by range
00196 if( $history & WikiExporter::LOGS ) {
00197 if( $this->startId || $this->endId ) {
00198 $exporter->logsByRange( $this->startId, $this->endId );
00199 } else {
00200 $exporter->allLogs();
00201 }
00202 # Page dumps: all or by page ID range
00203 } else if( is_null( $this->pages ) ) {
00204 if( $this->startId || $this->endId ) {
00205 $exporter->pagesByRange( $this->startId, $this->endId );
00206 } else {
00207 $exporter->allPages();
00208 }
00209 # Dump of specific pages
00210 } else {
00211 $exporter->pagesByName( $this->pages );
00212 }
00213
00214 if( !$this->skipFooter )
00215 $exporter->closeStream();
00216
00217 $this->report( true );
00218 }
00219
00226 function initProgress( $history = WikiExporter::FULL ) {
00227 $table = ($history == WikiExporter::CURRENT) ? 'page' : 'revision';
00228 $field = ($history == WikiExporter::CURRENT) ? 'page_id' : 'rev_id';
00229
00230 $dbr = wfGetDB( DB_SLAVE );
00231 $this->maxCount = $dbr->selectField( $table, "MAX($field)", '', 'BackupDumper::dump' );
00232 $this->startTime = wfTime();
00233 }
00234
00235 function backupDb() {
00236 global $wgDBadminuser, $wgDBadminpassword;
00237 global $wgDBname, $wgDebugDumpSql, $wgDBtype;
00238 $flags = ($wgDebugDumpSql ? DBO_DEBUG : 0) | DBO_DEFAULT;
00239
00240 $class = 'Database' . ucfirst($wgDBtype);
00241 $db = new $class( $this->backupServer(), $wgDBadminuser, $wgDBadminpassword, $wgDBname, false, $flags );
00242
00243
00244
00245 $db->setTimeout( 3600 * 24 );
00246
00247 return $db;
00248 }
00249
00250 function backupServer() {
00251 global $wgDBserver;
00252 return $this->server
00253 ? $this->server
00254 : $wgDBserver;
00255 }
00256
00257 function reportPage() {
00258 $this->pageCount++;
00259 }
00260
00261 function revCount() {
00262 $this->revCount++;
00263 $this->report();
00264 }
00265
00266 function report( $final = false ) {
00267 if( $final xor ( $this->revCount % $this->reportingInterval == 0 ) ) {
00268 $this->showReport();
00269 }
00270 }
00271
00272 function showReport() {
00273 if( $this->reporting ) {
00274 $delta = wfTime() - $this->startTime;
00275 $now = wfTimestamp( TS_DB );
00276 if( $delta ) {
00277 $rate = $this->pageCount / $delta;
00278 $revrate = $this->revCount / $delta;
00279 $portion = $this->revCount / $this->maxCount;
00280 $eta = $this->startTime + $delta / $portion;
00281 $etats = wfTimestamp( TS_DB, intval( $eta ) );
00282 } else {
00283 $rate = '-';
00284 $revrate = '-';
00285 $etats = '-';
00286 }
00287 $this->progress( sprintf( "%s: %s %d pages (%0.3f/sec), %d revs (%0.3f/sec), ETA %s [max %d]",
00288 $now, wfWikiID(), $this->pageCount, $rate, $this->revCount, $revrate, $etats, $this->maxCount ) );
00289 }
00290 }
00291
00292 function progress( $string ) {
00293 fwrite( $this->stderr, $string . "\n" );
00294 }
00295 }
00296
00297 class ExportProgressFilter extends DumpFilter {
00298 function ExportProgressFilter( &$sink, &$progress ) {
00299 parent::DumpFilter( $sink );
00300 $this->progress = $progress;
00301 }
00302
00303 function writeClosePage( $string ) {
00304 parent::writeClosePage( $string );
00305 $this->progress->reportPage();
00306 }
00307
00308 function writeRevision( $rev, $string ) {
00309 parent::writeRevision( $rev, $string );
00310 $this->progress->revCount();
00311 }
00312 }