00001 <?php
00002
00006 class Preprocessor_DOM implements Preprocessor {
00007 var $parser, $memoryLimit;
00008
00009 const CACHE_VERSION = 1;
00010
00011 function __construct( $parser ) {
00012 $this->parser = $parser;
00013 $mem = ini_get( 'memory_limit' );
00014 $this->memoryLimit = false;
00015 if ( strval( $mem ) !== '' && $mem != -1 ) {
00016 if ( preg_match( '/^\d+$/', $mem ) ) {
00017 $this->memoryLimit = $mem;
00018 } elseif ( preg_match( '/^(\d+)M$/i', $mem, $m ) ) {
00019 $this->memoryLimit = $m[1] * 1048576;
00020 }
00021 }
00022 }
00023
00024 function newFrame() {
00025 return new PPFrame_DOM( $this );
00026 }
00027
00028 function newCustomFrame( $args ) {
00029 return new PPCustomFrame_DOM( $this, $args );
00030 }
00031
00032 function memCheck() {
00033 if ( $this->memoryLimit === false ) {
00034 return;
00035 }
00036 $usage = memory_get_usage();
00037 if ( $usage > $this->memoryLimit * 0.9 ) {
00038 $limit = intval( $this->memoryLimit * 0.9 / 1048576 + 0.5 );
00039 throw new MWException( "Preprocessor hit 90% memory limit ($limit MB)" );
00040 }
00041 return $usage <= $this->memoryLimit * 0.8;
00042 }
00043
00066 function preprocessToObj( $text, $flags = 0 ) {
00067 wfProfileIn( __METHOD__ );
00068 global $wgMemc, $wgPreprocessorCacheThreshold;
00069
00070 $xml = false;
00071 $cacheable = strlen( $text ) > $wgPreprocessorCacheThreshold;
00072 if ( $cacheable ) {
00073 wfProfileIn( __METHOD__.'-cacheable' );
00074
00075 $cacheKey = wfMemcKey( 'preprocess-xml', md5($text), $flags );
00076 $cacheValue = $wgMemc->get( $cacheKey );
00077 if ( $cacheValue ) {
00078 $version = substr( $cacheValue, 0, 8 );
00079 if ( intval( $version ) == self::CACHE_VERSION ) {
00080 $xml = substr( $cacheValue, 8 );
00081
00082 wfDebugLog( "Preprocessor", "Loaded preprocessor XML from memcached (key $cacheKey)" );
00083 }
00084 }
00085 }
00086 if ( $xml === false ) {
00087 if ( $cacheable ) {
00088 wfProfileIn( __METHOD__.'-cache-miss' );
00089 $xml = $this->preprocessToXml( $text, $flags );
00090 $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . $xml;
00091 $wgMemc->set( $cacheKey, $cacheValue, 86400 );
00092 wfProfileOut( __METHOD__.'-cache-miss' );
00093 wfDebugLog( "Preprocessor", "Saved preprocessor XML to memcached (key $cacheKey)" );
00094 } else {
00095 $xml = $this->preprocessToXml( $text, $flags );
00096 }
00097
00098 }
00099 wfProfileIn( __METHOD__.'-loadXML' );
00100 $dom = new DOMDocument;
00101 wfSuppressWarnings();
00102 $result = $dom->loadXML( $xml );
00103 wfRestoreWarnings();
00104 if ( !$result ) {
00105
00106 $xml = UtfNormal::cleanUp( $xml );
00107 $result = $dom->loadXML( $xml );
00108 if ( !$result ) {
00109 throw new MWException( __METHOD__.' generated invalid XML' );
00110 }
00111 }
00112 $obj = new PPNode_DOM( $dom->documentElement );
00113 wfProfileOut( __METHOD__.'-loadXML' );
00114 if ( $cacheable ) {
00115 wfProfileOut( __METHOD__.'-cacheable' );
00116 }
00117 wfProfileOut( __METHOD__ );
00118 return $obj;
00119 }
00120
00121 function preprocessToXml( $text, $flags = 0 ) {
00122 wfProfileIn( __METHOD__ );
00123 $rules = array(
00124 '{' => array(
00125 'end' => '}',
00126 'names' => array(
00127 2 => 'template',
00128 3 => 'tplarg',
00129 ),
00130 'min' => 2,
00131 'max' => 3,
00132 ),
00133 '[' => array(
00134 'end' => ']',
00135 'names' => array( 2 => null ),
00136 'min' => 2,
00137 'max' => 2,
00138 )
00139 );
00140
00141 $forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
00142
00143 $xmlishElements = $this->parser->getStripList();
00144 $enableOnlyinclude = false;
00145 if ( $forInclusion ) {
00146 $ignoredTags = array( 'includeonly', '/includeonly' );
00147 $ignoredElements = array( 'noinclude' );
00148 $xmlishElements[] = 'noinclude';
00149 if ( strpos( $text, '<onlyinclude>' ) !== false && strpos( $text, '</onlyinclude>' ) !== false ) {
00150 $enableOnlyinclude = true;
00151 }
00152 } else {
00153 $ignoredTags = array( 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' );
00154 $ignoredElements = array( 'includeonly' );
00155 $xmlishElements[] = 'includeonly';
00156 }
00157 $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
00158
00159
00160 $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
00161
00162 $stack = new PPDStack;
00163
00164 $searchBase = "[{<\n"; #}
00165 $revText = strrev( $text );
00166
00167 $i = 0; # Input pointer, starts out pointing to a pseudo-newline before the start
00168 $accum =& $stack->getAccum(); # Current accumulator
00169 $accum = '<root>';
00170 $findEquals = false; # True to find equals signs in arguments
00171 $findPipe = false; # True to take notice of pipe characters
00172 $headingIndex = 1;
00173 $inHeading = false; # True if $i is inside a possible heading
00174 $noMoreGT = false; # True if there are no more greater-than (>) signs right of $i
00175 $findOnlyinclude = $enableOnlyinclude; # True to ignore all input up to the next <onlyinclude>
00176 $fakeLineStart = true; # Do a line-start run without outputting an LF character
00177
00178 while ( true ) {
00179
00180
00181 if ( $findOnlyinclude ) {
00182
00183 $startPos = strpos( $text, '<onlyinclude>', $i );
00184 if ( $startPos === false ) {
00185
00186 $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i ) ) . '</ignore>';
00187 break;
00188 }
00189 $tagEndPos = $startPos + strlen( '<onlyinclude>' );
00190 $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i ) ) . '</ignore>';
00191 $i = $tagEndPos;
00192 $findOnlyinclude = false;
00193 }
00194
00195 if ( $fakeLineStart ) {
00196 $found = 'line-start';
00197 $curChar = '';
00198 } else {
00199 # Find next opening brace, closing brace or pipe
00200 $search = $searchBase;
00201 if ( $stack->top === false ) {
00202 $currentClosing = '';
00203 } else {
00204 $currentClosing = $stack->top->close;
00205 $search .= $currentClosing;
00206 }
00207 if ( $findPipe ) {
00208 $search .= '|';
00209 }
00210 if ( $findEquals ) {
00211
00212 $search .= '=';
00213 }
00214 $rule = null;
00215 # Output literal section, advance input counter
00216 $literalLength = strcspn( $text, $search, $i );
00217 if ( $literalLength > 0 ) {
00218 $accum .= htmlspecialchars( substr( $text, $i, $literalLength ) );
00219 $i += $literalLength;
00220 }
00221 if ( $i >= strlen( $text ) ) {
00222 if ( $currentClosing == "\n" ) {
00223
00224 $curChar = '';
00225 $found = 'line-end';
00226 } else {
00227 # All done
00228 break;
00229 }
00230 } else {
00231 $curChar = $text[$i];
00232 if ( $curChar == '|' ) {
00233 $found = 'pipe';
00234 } elseif ( $curChar == '=' ) {
00235 $found = 'equals';
00236 } elseif ( $curChar == '<' ) {
00237 $found = 'angle';
00238 } elseif ( $curChar == "\n" ) {
00239 if ( $inHeading ) {
00240 $found = 'line-end';
00241 } else {
00242 $found = 'line-start';
00243 }
00244 } elseif ( $curChar == $currentClosing ) {
00245 $found = 'close';
00246 } elseif ( isset( $rules[$curChar] ) ) {
00247 $found = 'open';
00248 $rule = $rules[$curChar];
00249 } else {
00250 # Some versions of PHP have a strcspn which stops on null characters
00251 # Ignore and continue
00252 ++$i;
00253 continue;
00254 }
00255 }
00256 }
00257
00258 if ( $found == 'angle' ) {
00259 $matches = false;
00260
00261 if ( $enableOnlyinclude && substr( $text, $i, strlen( '</onlyinclude>' ) ) == '</onlyinclude>' ) {
00262 $findOnlyinclude = true;
00263 continue;
00264 }
00265
00266
00267 if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) {
00268
00269 $accum .= '<';
00270 ++$i;
00271 continue;
00272 }
00273
00274 if ( isset( $matches[2] ) && $matches[2] == '!--' ) {
00275
00276
00277
00278
00279
00280 $endPos = strpos( $text, '-->', $i + 4 );
00281 if ( $endPos === false ) {
00282
00283 $inner = substr( $text, $i );
00284 $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
00285 $i = strlen( $text );
00286 } else {
00287
00288 $wsStart = $i ? ( $i - strspn( $revText, ' ', strlen( $text ) - $i ) ) : 0;
00289
00290
00291 $wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 );
00292
00293
00294
00295
00296 if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n"
00297 && substr( $text, $wsEnd + 1, 1 ) == "\n" )
00298 {
00299 $startPos = $wsStart;
00300 $endPos = $wsEnd + 1;
00301
00302
00303 $wsLength = $i - $wsStart;
00304 if ( $wsLength > 0 && substr( $accum, -$wsLength ) === str_repeat( ' ', $wsLength ) ) {
00305 $accum = substr( $accum, 0, -$wsLength );
00306 }
00307
00308 $fakeLineStart = true;
00309 } else {
00310
00311 $startPos = $i;
00312 $endPos += 2;
00313 }
00314
00315 if ( $stack->top ) {
00316 $part = $stack->top->getCurrentPart();
00317 if ( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) {
00318
00319 $part->commentEnd = $wsEnd;
00320 } else {
00321 $part->visualEnd = $wsStart;
00322 $part->commentEnd = $endPos;
00323 }
00324 }
00325 $i = $endPos + 1;
00326 $inner = substr( $text, $startPos, $endPos - $startPos + 1 );
00327 $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
00328 }
00329 continue;
00330 }
00331 $name = $matches[1];
00332 $lowerName = strtolower( $name );
00333 $attrStart = $i + strlen( $name ) + 1;
00334
00335
00336 $tagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart );
00337 if ( $tagEndPos === false ) {
00338
00339
00340 $noMoreGT = true;
00341 $accum .= '<';
00342 ++$i;
00343 continue;
00344 }
00345
00346
00347 if ( in_array( $lowerName, $ignoredTags ) ) {
00348 $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i + 1 ) ) . '</ignore>';
00349 $i = $tagEndPos + 1;
00350 continue;
00351 }
00352
00353 $tagStartPos = $i;
00354 if ( $text[$tagEndPos-1] == '/' ) {
00355 $attrEnd = $tagEndPos - 1;
00356 $inner = null;
00357 $i = $tagEndPos + 1;
00358 $close = '';
00359 } else {
00360 $attrEnd = $tagEndPos;
00361
00362 if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
00363 $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) )
00364 {
00365 $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
00366 $i = $matches[0][1] + strlen( $matches[0][0] );
00367 $close = '<close>' . htmlspecialchars( $matches[0][0] ) . '</close>';
00368 } else {
00369
00370 $inner = substr( $text, $tagEndPos + 1 );
00371 $i = strlen( $text );
00372 $close = '';
00373 }
00374 }
00375
00376 if ( in_array( $lowerName, $ignoredElements ) ) {
00377 $accum .= '<ignore>' . htmlspecialchars( substr( $text, $tagStartPos, $i - $tagStartPos ) )
00378 . '</ignore>';
00379 continue;
00380 }
00381
00382 $accum .= '<ext>';
00383 if ( $attrEnd <= $attrStart ) {
00384 $attr = '';
00385 } else {
00386 $attr = substr( $text, $attrStart, $attrEnd - $attrStart );
00387 }
00388 $accum .= '<name>' . htmlspecialchars( $name ) . '</name>' .
00389
00390
00391 '<attr>' . htmlspecialchars( $attr ) . '</attr>';
00392 if ( $inner !== null ) {
00393 $accum .= '<inner>' . htmlspecialchars( $inner ) . '</inner>';
00394 }
00395 $accum .= $close . '</ext>';
00396 }
00397
00398 elseif ( $found == 'line-start' ) {
00399
00400
00401 if ( $fakeLineStart ) {
00402 $fakeLineStart = false;
00403 } else {
00404 $accum .= $curChar;
00405 $i++;
00406 }
00407
00408 $count = strspn( $text, '=', $i, 6 );
00409 if ( $count == 1 && $findEquals ) {
00410
00411
00412
00413 } elseif ( $count > 0 ) {
00414 $piece = array(
00415 'open' => "\n",
00416 'close' => "\n",
00417 'parts' => array( new PPDPart( str_repeat( '=', $count ) ) ),
00418 'startPos' => $i,
00419 'count' => $count );
00420 $stack->push( $piece );
00421 $accum =& $stack->getAccum();
00422 extract( $stack->getFlags() );
00423 $i += $count;
00424 }
00425 }
00426
00427 elseif ( $found == 'line-end' ) {
00428 $piece = $stack->top;
00429
00430 assert( $piece->open == "\n" );
00431 $part = $piece->getCurrentPart();
00432
00433
00434 $wsLength = strspn( $revText, " \t", strlen( $text ) - $i );
00435 $searchStart = $i - $wsLength;
00436 if ( isset( $part->commentEnd ) && $searchStart - 1 == $part->commentEnd ) {
00437
00438
00439 $searchStart = $part->visualEnd;
00440 $searchStart -= strspn( $revText, " \t", strlen( $text ) - $searchStart );
00441 }
00442 $count = $piece->count;
00443 $equalsLength = strspn( $revText, '=', strlen( $text ) - $searchStart );
00444 if ( $equalsLength > 0 ) {
00445 if ( $i - $equalsLength == $piece->startPos ) {
00446
00447
00448
00449 $count = $equalsLength;
00450 if ( $count < 3 ) {
00451 $count = 0;
00452 } else {
00453 $count = min( 6, intval( ( $count - 1 ) / 2 ) );
00454 }
00455 } else {
00456 $count = min( $equalsLength, $count );
00457 }
00458 if ( $count > 0 ) {
00459
00460 $element = "<h level=\"$count\" i=\"$headingIndex\">$accum</h>";
00461 $headingIndex++;
00462 } else {
00463
00464 $element = $accum;
00465 }
00466 } else {
00467
00468 $element = $accum;
00469 }
00470
00471 $stack->pop();
00472 $accum =& $stack->getAccum();
00473 extract( $stack->getFlags() );
00474
00475
00476 $accum .= $element;
00477
00478
00479
00480
00481
00482 }
00483
00484 elseif ( $found == 'open' ) {
00485 # count opening brace characters
00486 $count = strspn( $text, $curChar, $i );
00487
00488 # we need to add to stack only if opening brace count is enough for one of the rules
00489 if ( $count >= $rule['min'] ) {
00490 # Add it to the stack
00491 $piece = array(
00492 'open' => $curChar,
00493 'close' => $rule['end'],
00494 'count' => $count,
00495 'lineStart' => ($i > 0 && $text[$i-1] == "\n"),
00496 );
00497
00498 $stack->push( $piece );
00499 $accum =& $stack->getAccum();
00500 extract( $stack->getFlags() );
00501 } else {
00502 # Add literal brace(s)
00503 $accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
00504 }
00505 $i += $count;
00506 }
00507
00508 elseif ( $found == 'close' ) {
00509 $piece = $stack->top;
00510 # lets check if there are enough characters for closing brace
00511 $maxCount = $piece->count;
00512 $count = strspn( $text, $curChar, $i, $maxCount );
00513
00514 # check for maximum matching characters (if there are 5 closing
00515 # characters, we will probably need only 3 - depending on the rules)
00516 $matchingCount = 0;
00517 $rule = $rules[$piece->open];
00518 if ( $count > $rule['max'] ) {
00519 # The specified maximum exists in the callback array, unless the caller
00520 # has made an error
00521 $matchingCount = $rule['max'];
00522 } else {
00523 # Count is less than the maximum
00524 # Skip any gaps in the callback array to find the true largest match
00525 # Need to use array_key_exists not isset because the callback can be null
00526 $matchingCount = $count;
00527 while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) {
00528 --$matchingCount;
00529 }
00530 }
00531
00532 if ($matchingCount <= 0) {
00533 # No matching element found in callback array
00534 # Output a literal closing brace and continue
00535 $accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
00536 $i += $count;
00537 continue;
00538 }
00539 $name = $rule['names'][$matchingCount];
00540 if ( $name === null ) {
00541
00542 $element = $piece->breakSyntax( $matchingCount ) . str_repeat( $rule['end'], $matchingCount );
00543 } else {
00544 # Create XML element
00545 # Note: $parts is already XML, does not need to be encoded further
00546 $parts = $piece->parts;
00547 $title = $parts[0]->out;
00548 unset( $parts[0] );
00549
00550 # The invocation is at the start of the line if lineStart is set in
00551 # the stack, and all opening brackets are used up.
00552 if ( $maxCount == $matchingCount && !empty( $piece->lineStart ) ) {
00553 $attr = ' lineStart="1"';
00554 } else {
00555 $attr = '';
00556 }
00557
00558 $element = "<$name$attr>";
00559 $element .= "<title>$title</title>";
00560 $argIndex = 1;
00561 foreach ( $parts as $partIndex => $part ) {
00562 if ( isset( $part->eqpos ) ) {
00563 $argName = substr( $part->out, 0, $part->eqpos );
00564 $argValue = substr( $part->out, $part->eqpos + 1 );
00565 $element .= "<part><name>$argName</name>=<value>$argValue</value></part>";
00566 } else {
00567 $element .= "<part><name index=\"$argIndex\" /><value>{$part->out}</value></part>";
00568 $argIndex++;
00569 }
00570 }
00571 $element .= "</$name>";
00572 }
00573
00574 # Advance input pointer
00575 $i += $matchingCount;
00576
00577 # Unwind the stack
00578 $stack->pop();
00579 $accum =& $stack->getAccum();
00580
00581 # Re-add the old stack element if it still has unmatched opening characters remaining
00582 if ($matchingCount < $piece->count) {
00583 $piece->parts = array( new PPDPart );
00584 $piece->count -= $matchingCount;
00585 # do we still qualify for any callback with remaining count?
00586 $names = $rules[$piece->open]['names'];
00587 $skippedBraces = 0;
00588 $enclosingAccum =& $accum;
00589 while ( $piece->count ) {
00590 if ( array_key_exists( $piece->count, $names ) ) {
00591 $stack->push( $piece );
00592 $accum =& $stack->getAccum();
00593 break;
00594 }
00595 --$piece->count;
00596 $skippedBraces ++;
00597 }
00598 $enclosingAccum .= str_repeat( $piece->open, $skippedBraces );
00599 }
00600
00601 extract( $stack->getFlags() );
00602
00603 # Add XML element to the enclosing accumulator
00604 $accum .= $element;
00605 }
00606
00607 elseif ( $found == 'pipe' ) {
00608 $findEquals = true;
00609 $stack->addPart();
00610 $accum =& $stack->getAccum();
00611 ++$i;
00612 }
00613
00614 elseif ( $found == 'equals' ) {
00615 $findEquals = false;
00616 $stack->getCurrentPart()->eqpos = strlen( $accum );
00617 $accum .= '=';
00618 ++$i;
00619 }
00620 }
00621
00622 # Output any remaining unclosed brackets
00623 foreach ( $stack->stack as $piece ) {
00624 $stack->rootAccum .= $piece->breakSyntax();
00625 }
00626 $stack->rootAccum .= '</root>';
00627 $xml = $stack->rootAccum;
00628
00629 wfProfileOut( __METHOD__ );
00630
00631 return $xml;
00632 }
00633 }
00634
00639 class PPDStack {
00640 var $stack, $rootAccum, $top;
00641 var $out;
00642 var $elementClass = 'PPDStackElement';
00643
00644 static $false = false;
00645
00646 function __construct() {
00647 $this->stack = array();
00648 $this->top = false;
00649 $this->rootAccum = '';
00650 $this->accum =& $this->rootAccum;
00651 }
00652
00653 function count() {
00654 return count( $this->stack );
00655 }
00656
00657 function &getAccum() {
00658 return $this->accum;
00659 }
00660
00661 function getCurrentPart() {
00662 if ( $this->top === false ) {
00663 return false;
00664 } else {
00665 return $this->top->getCurrentPart();
00666 }
00667 }
00668
00669 function push( $data ) {
00670 if ( $data instanceof $this->elementClass ) {
00671 $this->stack[] = $data;
00672 } else {
00673 $class = $this->elementClass;
00674 $this->stack[] = new $class( $data );
00675 }
00676 $this->top = $this->stack[ count( $this->stack ) - 1 ];
00677 $this->accum =& $this->top->getAccum();
00678 }
00679
00680 function pop() {
00681 if ( !count( $this->stack ) ) {
00682 throw new MWException( __METHOD__.': no elements remaining' );
00683 }
00684 $temp = array_pop( $this->stack );
00685
00686 if ( count( $this->stack ) ) {
00687 $this->top = $this->stack[ count( $this->stack ) - 1 ];
00688 $this->accum =& $this->top->getAccum();
00689 } else {
00690 $this->top = self::$false;
00691 $this->accum =& $this->rootAccum;
00692 }
00693 return $temp;
00694 }
00695
00696 function addPart( $s = '' ) {
00697 $this->top->addPart( $s );
00698 $this->accum =& $this->top->getAccum();
00699 }
00700
00701 function getFlags() {
00702 if ( !count( $this->stack ) ) {
00703 return array(
00704 'findEquals' => false,
00705 'findPipe' => false,
00706 'inHeading' => false,
00707 );
00708 } else {
00709 return $this->top->getFlags();
00710 }
00711 }
00712 }
00713
00717 class PPDStackElement {
00718 var $open,
00719 $close,
00720 $count,
00721 $parts,
00722 $lineStart;
00723
00724 var $partClass = 'PPDPart';
00725
00726 function __construct( $data = array() ) {
00727 $class = $this->partClass;
00728 $this->parts = array( new $class );
00729
00730 foreach ( $data as $name => $value ) {
00731 $this->$name = $value;
00732 }
00733 }
00734
00735 function &getAccum() {
00736 return $this->parts[count($this->parts) - 1]->out;
00737 }
00738
00739 function addPart( $s = '' ) {
00740 $class = $this->partClass;
00741 $this->parts[] = new $class( $s );
00742 }
00743
00744 function getCurrentPart() {
00745 return $this->parts[count($this->parts) - 1];
00746 }
00747
00748 function getFlags() {
00749 $partCount = count( $this->parts );
00750 $findPipe = $this->open != "\n" && $this->open != '[';
00751 return array(
00752 'findPipe' => $findPipe,
00753 'findEquals' => $findPipe && $partCount > 1 && !isset( $this->parts[$partCount - 1]->eqpos ),
00754 'inHeading' => $this->open == "\n",
00755 );
00756 }
00757
00761 function breakSyntax( $openingCount = false ) {
00762 if ( $this->open == "\n" ) {
00763 $s = $this->parts[0]->out;
00764 } else {
00765 if ( $openingCount === false ) {
00766 $openingCount = $this->count;
00767 }
00768 $s = str_repeat( $this->open, $openingCount );
00769 $first = true;
00770 foreach ( $this->parts as $part ) {
00771 if ( $first ) {
00772 $first = false;
00773 } else {
00774 $s .= '|';
00775 }
00776 $s .= $part->out;
00777 }
00778 }
00779 return $s;
00780 }
00781 }
00782
00786 class PPDPart {
00787 var $out;
00788
00789
00790
00791
00792
00793
00794 function __construct( $out = '' ) {
00795 $this->out = $out;
00796 }
00797 }
00798
00803 class PPFrame_DOM implements PPFrame {
00804 var $preprocessor, $parser, $title;
00805 var $titleCache;
00806
00811 var $loopCheckHash;
00812
00817 var $depth;
00818
00819
00824 function __construct( $preprocessor ) {
00825 $this->preprocessor = $preprocessor;
00826 $this->parser = $preprocessor->parser;
00827 $this->title = $this->parser->mTitle;
00828 $this->titleCache = array( $this->title ? $this->title->getPrefixedDBkey() : false );
00829 $this->loopCheckHash = array();
00830 $this->depth = 0;
00831 }
00832
00837 function newChild( $args = false, $title = false ) {
00838 $namedArgs = array();
00839 $numberedArgs = array();
00840 if ( $title === false ) {
00841 $title = $this->title;
00842 }
00843 if ( $args !== false ) {
00844 $xpath = false;
00845 if ( $args instanceof PPNode ) {
00846 $args = $args->node;
00847 }
00848 foreach ( $args as $arg ) {
00849 if ( !$xpath ) {
00850 $xpath = new DOMXPath( $arg->ownerDocument );
00851 }
00852
00853 $nameNodes = $xpath->query( 'name', $arg );
00854 $value = $xpath->query( 'value', $arg );
00855 if ( $nameNodes->item( 0 )->hasAttributes() ) {
00856
00857 $index = $nameNodes->item( 0 )->attributes->getNamedItem( 'index' )->textContent;
00858 $numberedArgs[$index] = $value->item( 0 );
00859 unset( $namedArgs[$index] );
00860 } else {
00861
00862 $name = trim( $this->expand( $nameNodes->item( 0 ), PPFrame::STRIP_COMMENTS ) );
00863 $namedArgs[$name] = $value->item( 0 );
00864 unset( $numberedArgs[$name] );
00865 }
00866 }
00867 }
00868 return new PPTemplateFrame_DOM( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title );
00869 }
00870
00871 function expand( $root, $flags = 0 ) {
00872 static $expansionDepth = 0;
00873 if ( is_string( $root ) ) {
00874 return $root;
00875 }
00876
00877 if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->mMaxPPNodeCount )
00878 {
00879 return '<span class="error">Node-count limit exceeded</span>';
00880 }
00881
00882 if ( $expansionDepth > $this->parser->mOptions->mMaxPPExpandDepth ) {
00883 return '<span class="error">Expansion depth limit exceeded</span>';
00884 }
00885 wfProfileIn( __METHOD__ );
00886 ++$expansionDepth;
00887
00888 if ( $root instanceof PPNode_DOM ) {
00889 $root = $root->node;
00890 }
00891 if ( $root instanceof DOMDocument ) {
00892 $root = $root->documentElement;
00893 }
00894
00895 $outStack = array( '', '' );
00896 $iteratorStack = array( false, $root );
00897 $indexStack = array( 0, 0 );
00898
00899 while ( count( $iteratorStack ) > 1 ) {
00900 $level = count( $outStack ) - 1;
00901 $iteratorNode =& $iteratorStack[ $level ];
00902 $out =& $outStack[$level];
00903 $index =& $indexStack[$level];
00904
00905 if ( $iteratorNode instanceof PPNode_DOM ) $iteratorNode = $iteratorNode->node;
00906
00907 if ( is_array( $iteratorNode ) ) {
00908 if ( $index >= count( $iteratorNode ) ) {
00909
00910 $iteratorStack[$level] = false;
00911 $contextNode = false;
00912 } else {
00913 $contextNode = $iteratorNode[$index];
00914 $index++;
00915 }
00916 } elseif ( $iteratorNode instanceof DOMNodeList ) {
00917 if ( $index >= $iteratorNode->length ) {
00918
00919 $iteratorStack[$level] = false;
00920 $contextNode = false;
00921 } else {
00922 $contextNode = $iteratorNode->item( $index );
00923 $index++;
00924 }
00925 } else {
00926
00927
00928 $contextNode = $iteratorStack[$level];
00929 $iteratorStack[$level] = false;
00930 }
00931
00932 if ( $contextNode instanceof PPNode_DOM ) $contextNode = $contextNode->node;
00933
00934 $newIterator = false;
00935
00936 if ( $contextNode === false ) {
00937
00938 } elseif ( is_string( $contextNode ) ) {
00939 $out .= $contextNode;
00940 } elseif ( is_array( $contextNode ) || $contextNode instanceof DOMNodeList ) {
00941 $newIterator = $contextNode;
00942 } elseif ( $contextNode instanceof DOMNode ) {
00943 if ( $contextNode->nodeType == XML_TEXT_NODE ) {
00944 $out .= $contextNode->nodeValue;
00945 } elseif ( $contextNode->nodeName == 'template' ) {
00946 # Double-brace expansion
00947 $xpath = new DOMXPath( $contextNode->ownerDocument );
00948 $titles = $xpath->query( 'title', $contextNode );
00949 $title = $titles->item( 0 );
00950 $parts = $xpath->query( 'part', $contextNode );
00951 if ( $flags & self::NO_TEMPLATES ) {
00952 $newIterator = $this->virtualBracketedImplode( '{{', '|', '}}', $title, $parts );
00953 } else {
00954 $lineStart = $contextNode->getAttribute( 'lineStart' );
00955 $params = array(
00956 'title' => new PPNode_DOM( $title ),
00957 'parts' => new PPNode_DOM( $parts ),
00958 'lineStart' => $lineStart );
00959 $ret = $this->parser->braceSubstitution( $params, $this );
00960 if ( isset( $ret['object'] ) ) {
00961 $newIterator = $ret['object'];
00962 } else {
00963 $out .= $ret['text'];
00964 }
00965 }
00966 } elseif ( $contextNode->nodeName == 'tplarg' ) {
00967 # Triple-brace expansion
00968 $xpath = new DOMXPath( $contextNode->ownerDocument );
00969 $titles = $xpath->query( 'title', $contextNode );
00970 $title = $titles->item( 0 );
00971 $parts = $xpath->query( 'part', $contextNode );
00972 if ( $flags & self::NO_ARGS ) {
00973 $newIterator = $this->virtualBracketedImplode( '{{{', '|', '}}}', $title, $parts );
00974 } else {
00975 $params = array(
00976 'title' => new PPNode_DOM( $title ),
00977 'parts' => new PPNode_DOM( $parts ) );
00978 $ret = $this->parser->argSubstitution( $params, $this );
00979 if ( isset( $ret['object'] ) ) {
00980 $newIterator = $ret['object'];
00981 } else {
00982 $out .= $ret['text'];
00983 }
00984 }
00985 } elseif ( $contextNode->nodeName == 'comment' ) {
00986 # HTML-style comment
00987 # Remove it in HTML, pre+remove and STRIP_COMMENTS modes
00988 if ( $this->parser->ot['html']
00989 || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() )
00990 || ( $flags & self::STRIP_COMMENTS ) )
00991 {
00992 $out .= '';
00993 }
00994 # Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result
00995 # Not in RECOVER_COMMENTS mode (extractSections) though
00996 elseif ( $this->parser->ot['wiki'] && ! ( $flags & self::RECOVER_COMMENTS ) ) {
00997 $out .= $this->parser->insertStripItem( $contextNode->textContent );
00998 }
00999 # Recover the literal comment in RECOVER_COMMENTS and pre+no-remove
01000 else {
01001 $out .= $contextNode->textContent;
01002 }
01003 } elseif ( $contextNode->nodeName == 'ignore' ) {
01004 # Output suppression used by <includeonly> etc.
01005 # OT_WIKI will only respect <ignore> in substed templates.
01006 # The other output types respect it unless NO_IGNORE is set.
01007 # extractSections() sets NO_IGNORE and so never respects it.
01008 if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] ) || ( $flags & self::NO_IGNORE ) ) {
01009 $out .= $contextNode->textContent;
01010 } else {
01011 $out .= '';
01012 }
01013 } elseif ( $contextNode->nodeName == 'ext' ) {
01014 # Extension tag
01015 $xpath = new DOMXPath( $contextNode->ownerDocument );
01016 $names = $xpath->query( 'name', $contextNode );
01017 $attrs = $xpath->query( 'attr', $contextNode );
01018 $inners = $xpath->query( 'inner', $contextNode );
01019 $closes = $xpath->query( 'close', $contextNode );
01020 $params = array(
01021 'name' => new PPNode_DOM( $names->item( 0 ) ),
01022 'attr' => $attrs->length > 0 ? new PPNode_DOM( $attrs->item( 0 ) ) : null,
01023 'inner' => $inners->length > 0 ? new PPNode_DOM( $inners->item( 0 ) ) : null,
01024 'close' => $closes->length > 0 ? new PPNode_DOM( $closes->item( 0 ) ) : null,
01025 );
01026 $out .= $this->parser->extensionSubstitution( $params, $this );
01027 } elseif ( $contextNode->nodeName == 'h' ) {
01028 # Heading
01029 $s = $this->expand( $contextNode->childNodes, $flags );
01030
01031 # Insert a heading marker only for <h> children of <root>
01032 # This is to stop extractSections from going over multiple tree levels
01033 if ( $contextNode->parentNode->nodeName == 'root'
01034 && $this->parser->ot['html'] )
01035 {
01036 # Insert heading index marker
01037 $headingIndex = $contextNode->getAttribute( 'i' );
01038 $titleText = $this->title->getPrefixedDBkey();
01039 $this->parser->mHeadings[] = array( $titleText, $headingIndex );
01040 $serial = count( $this->parser->mHeadings ) - 1;
01041 $marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX;
01042 $count = $contextNode->getAttribute( 'level' );
01043 $s = substr( $s, 0, $count ) . $marker . substr( $s, $count );
01044 $this->parser->mStripState->general->setPair( $marker, '' );
01045 }
01046 $out .= $s;
01047 } else {
01048 # Generic recursive expansion
01049 $newIterator = $contextNode->childNodes;
01050 }
01051 } else {
01052 wfProfileOut( __METHOD__ );
01053 throw new MWException( __METHOD__.': Invalid parameter type' );
01054 }
01055
01056 if ( $newIterator !== false ) {
01057 if ( $newIterator instanceof PPNode_DOM ) {
01058 $newIterator = $newIterator->node;
01059 }
01060 $outStack[] = '';
01061 $iteratorStack[] = $newIterator;
01062 $indexStack[] = 0;
01063 } elseif ( $iteratorStack[$level] === false ) {
01064
01065
01066 while ( $iteratorStack[$level] === false && $level > 0 ) {
01067 $outStack[$level - 1] .= $out;
01068 array_pop( $outStack );
01069 array_pop( $iteratorStack );
01070 array_pop( $indexStack );
01071 $level--;
01072 }
01073 }
01074 }
01075 --$expansionDepth;
01076 wfProfileOut( __METHOD__ );
01077 return $outStack[0];
01078 }
01079
01080 function implodeWithFlags( $sep, $flags ) {
01081 $args = array_slice( func_get_args(), 2 );
01082
01083 $first = true;
01084 $s = '';
01085 foreach ( $args as $root ) {
01086 if ( $root instanceof PPNode_DOM ) $root = $root->node;
01087 if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
01088 $root = array( $root );
01089 }
01090 foreach ( $root as $node ) {
01091 if ( $first ) {
01092 $first = false;
01093 } else {
01094 $s .= $sep;
01095 }
01096 $s .= $this->expand( $node, $flags );
01097 }
01098 }
01099 return $s;
01100 }
01101
01106 function implode( $sep ) {
01107 $args = array_slice( func_get_args(), 1 );
01108
01109 $first = true;
01110 $s = '';
01111 foreach ( $args as $root ) {
01112 if ( $root instanceof PPNode_DOM ) $root = $root->node;
01113 if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
01114 $root = array( $root );
01115 }
01116 foreach ( $root as $node ) {
01117 if ( $first ) {
01118 $first = false;
01119 } else {
01120 $s .= $sep;
01121 }
01122 $s .= $this->expand( $node );
01123 }
01124 }
01125 return $s;
01126 }
01127
01132 function virtualImplode( $sep ) {
01133 $args = array_slice( func_get_args(), 1 );
01134 $out = array();
01135 $first = true;
01136 if ( $root instanceof PPNode_DOM ) $root = $root->node;
01137
01138 foreach ( $args as $root ) {
01139 if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
01140 $root = array( $root );
01141 }
01142 foreach ( $root as $node ) {
01143 if ( $first ) {
01144 $first = false;
01145 } else {
01146 $out[] = $sep;
01147 }
01148 $out[] = $node;
01149 }
01150 }
01151 return $out;
01152 }
01153
01157 function virtualBracketedImplode( $start, $sep, $end ) {
01158 $args = array_slice( func_get_args(), 3 );
01159 $out = array( $start );
01160 $first = true;
01161
01162 foreach ( $args as $root ) {
01163 if ( $root instanceof PPNode_DOM ) $root = $root->node;
01164 if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
01165 $root = array( $root );
01166 }
01167 foreach ( $root as $node ) {
01168 if ( $first ) {
01169 $first = false;
01170 } else {
01171 $out[] = $sep;
01172 }
01173 $out[] = $node;
01174 }
01175 }
01176 $out[] = $end;
01177 return $out;
01178 }
01179
01180 function __toString() {
01181 return 'frame{}';
01182 }
01183
01184 function getPDBK( $level = false ) {
01185 if ( $level === false ) {
01186 return $this->title->getPrefixedDBkey();
01187 } else {
01188 return isset( $this->titleCache[$level] ) ? $this->titleCache[$level] : false;
01189 }
01190 }
01191
01195 function isEmpty() {
01196 return true;
01197 }
01198
01199 function getArgument( $name ) {
01200 return false;
01201 }
01202
01206 function loopCheck( $title ) {
01207 return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] );
01208 }
01209
01213 function isTemplate() {
01214 return false;
01215 }
01216 }
01217
01222 class PPTemplateFrame_DOM extends PPFrame_DOM {
01223 var $numberedArgs, $namedArgs, $parent;
01224 var $numberedExpansionCache, $namedExpansionCache;
01225
01226 function __construct( $preprocessor, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) {
01227 $this->preprocessor = $preprocessor;
01228 $this->parser = $preprocessor->parser;
01229 $this->parent = $parent;
01230 $this->numberedArgs = $numberedArgs;
01231 $this->namedArgs = $namedArgs;
01232 $this->title = $title;
01233 $pdbk = $title ? $title->getPrefixedDBkey() : false;
01234 $this->titleCache = $parent->titleCache;
01235 $this->titleCache[] = $pdbk;
01236 $this->loopCheckHash = $parent->loopCheckHash;
01237 if ( $pdbk !== false ) {
01238 $this->loopCheckHash[$pdbk] = true;
01239 }
01240 $this->depth = $parent->depth + 1;
01241 $this->numberedExpansionCache = $this->namedExpansionCache = array();
01242 }
01243
01244 function __toString() {
01245 $s = 'tplframe{';
01246 $first = true;
01247 $args = $this->numberedArgs + $this->namedArgs;
01248 foreach ( $args as $name => $value ) {
01249 if ( $first ) {
01250 $first = false;
01251 } else {
01252 $s .= ', ';
01253 }
01254 $s .= "\"$name\":\"" .
01255 str_replace( '"', '\\"', $value->ownerDocument->saveXML( $value ) ) . '"';
01256 }
01257 $s .= '}';
01258 return $s;
01259 }
01263 function isEmpty() {
01264 return !count( $this->numberedArgs ) && !count( $this->namedArgs );
01265 }
01266
01267 function getArguments() {
01268 $arguments = array();
01269 foreach ( array_merge(
01270 array_keys($this->numberedArgs),
01271 array_keys($this->namedArgs)) as $key ) {
01272 $arguments[$key] = $this->getArgument($key);
01273 }
01274 return $arguments;
01275 }
01276
01277 function getNumberedArguments() {
01278 $arguments = array();
01279 foreach ( array_keys($this->numberedArgs) as $key ) {
01280 $arguments[$key] = $this->getArgument($key);
01281 }
01282 return $arguments;
01283 }
01284
01285 function getNamedArguments() {
01286 $arguments = array();
01287 foreach ( array_keys($this->namedArgs) as $key ) {
01288 $arguments[$key] = $this->getArgument($key);
01289 }
01290 return $arguments;
01291 }
01292
01293 function getNumberedArgument( $index ) {
01294 if ( !isset( $this->numberedArgs[$index] ) ) {
01295 return false;
01296 }
01297 if ( !isset( $this->numberedExpansionCache[$index] ) ) {
01298 # No trimming for unnamed arguments
01299 $this->numberedExpansionCache[$index] = $this->parent->expand( $this->numberedArgs[$index], self::STRIP_COMMENTS );
01300 }
01301 return $this->numberedExpansionCache[$index];
01302 }
01303
01304 function getNamedArgument( $name ) {
01305 if ( !isset( $this->namedArgs[$name] ) ) {
01306 return false;
01307 }
01308 if ( !isset( $this->namedExpansionCache[$name] ) ) {
01309 # Trim named arguments post-expand, for backwards compatibility
01310 $this->namedExpansionCache[$name] = trim(
01311 $this->parent->expand( $this->namedArgs[$name], self::STRIP_COMMENTS ) );
01312 }
01313 return $this->namedExpansionCache[$name];
01314 }
01315
01316 function getArgument( $name ) {
01317 $text = $this->getNumberedArgument( $name );
01318 if ( $text === false ) {
01319 $text = $this->getNamedArgument( $name );
01320 }
01321 return $text;
01322 }
01323
01327 function isTemplate() {
01328 return true;
01329 }
01330 }
01331
01336 class PPCustomFrame_DOM extends PPFrame_DOM {
01337 var $args;
01338
01339 function __construct( $preprocessor, $args ) {
01340 $this->preprocessor = $preprocessor;
01341 $this->parser = $preprocessor->parser;
01342 $this->args = $args;
01343 }
01344
01345 function __toString() {
01346 $s = 'cstmframe{';
01347 $first = true;
01348 foreach ( $this->args as $name => $value ) {
01349 if ( $first ) {
01350 $first = false;
01351 } else {
01352 $s .= ', ';
01353 }
01354 $s .= "\"$name\":\"" .
01355 str_replace( '"', '\\"', $value->__toString() ) . '"';
01356 }
01357 $s .= '}';
01358 return $s;
01359 }
01360
01361 function isEmpty() {
01362 return !count( $this->args );
01363 }
01364
01365 function getArgument( $index ) {
01366 if ( !isset( $this->args[$index] ) ) {
01367 return false;
01368 }
01369 return $this->args[$index];
01370 }
01371 }
01372
01376 class PPNode_DOM implements PPNode {
01377 var $node;
01378
01379 function __construct( $node, $xpath = false ) {
01380 $this->node = $node;
01381 }
01382
01383 function __get( $name ) {
01384 if ( $name == 'xpath' ) {
01385 $this->xpath = new DOMXPath( $this->node->ownerDocument );
01386 }
01387 return $this->xpath;
01388 }
01389
01390 function __toString() {
01391 if ( $this->node instanceof DOMNodeList ) {
01392 $s = '';
01393 foreach ( $this->node as $node ) {
01394 $s .= $node->ownerDocument->saveXML( $node );
01395 }
01396 } else {
01397 $s = $this->node->ownerDocument->saveXML( $this->node );
01398 }
01399 return $s;
01400 }
01401
01402 function getChildren() {
01403 return $this->node->childNodes ? new self( $this->node->childNodes ) : false;
01404 }
01405
01406 function getFirstChild() {
01407 return $this->node->firstChild ? new self( $this->node->firstChild ) : false;
01408 }
01409
01410 function getNextSibling() {
01411 return $this->node->nextSibling ? new self( $this->node->nextSibling ) : false;
01412 }
01413
01414 function getChildrenOfType( $type ) {
01415 return new self( $this->xpath->query( $type, $this->node ) );
01416 }
01417
01418 function getLength() {
01419 if ( $this->node instanceof DOMNodeList ) {
01420 return $this->node->length;
01421 } else {
01422 return false;
01423 }
01424 }
01425
01426 function item( $i ) {
01427 $item = $this->node->item( $i );
01428 return $item ? new self( $item ) : false;
01429 }
01430
01431 function getName() {
01432 if ( $this->node instanceof DOMNodeList ) {
01433 return '#nodelist';
01434 } else {
01435 return $this->node->nodeName;
01436 }
01437 }
01438
01445 function splitArg() {
01446 $names = $this->xpath->query( 'name', $this->node );
01447 $values = $this->xpath->query( 'value', $this->node );
01448 if ( !$names->length || !$values->length ) {
01449 throw new MWException( 'Invalid brace node passed to ' . __METHOD__ );
01450 }
01451 $name = $names->item( 0 );
01452 $index = $name->getAttribute( 'index' );
01453 return array(
01454 'name' => new self( $name ),
01455 'index' => $index,
01456 'value' => new self( $values->item( 0 ) ) );
01457 }
01458
01463 function splitExt() {
01464 $names = $this->xpath->query( 'name', $this->node );
01465 $attrs = $this->xpath->query( 'attr', $this->node );
01466 $inners = $this->xpath->query( 'inner', $this->node );
01467 $closes = $this->xpath->query( 'close', $this->node );
01468 if ( !$names->length || !$attrs->length ) {
01469 throw new MWException( 'Invalid ext node passed to ' . __METHOD__ );
01470 }
01471 $parts = array(
01472 'name' => new self( $names->item( 0 ) ),
01473 'attr' => new self( $attrs->item( 0 ) ) );
01474 if ( $inners->length ) {
01475 $parts['inner'] = new self( $inners->item( 0 ) );
01476 }
01477 if ( $closes->length ) {
01478 $parts['close'] = new self( $closes->item( 0 ) );
01479 }
01480 return $parts;
01481 }
01482
01486 function splitHeading() {
01487 if ( !$this->nodeName == 'h' ) {
01488 throw new MWException( 'Invalid h node passed to ' . __METHOD__ );
01489 }
01490 return array(
01491 'i' => $this->node->getAttribute( 'i' ),
01492 'level' => $this->node->getAttribute( 'level' ),
01493 'contents' => $this->getChildren()
01494 );
01495 }
01496 }