00001 <?php 00006 class CheckLanguageCLI { 00007 protected $code = null; 00008 protected $level = 2; 00009 protected $doLinks = false; 00010 protected $wikiCode = 'en'; 00011 protected $checkAll = false; 00012 protected $output = 'plain'; 00013 protected $checks = array(); 00014 protected $L = null; 00015 00016 protected $results = array(); 00017 00018 private $includeExif = false; 00019 00024 public function __construct( Array $options ) { 00025 if ( isset( $options['help'] ) ) { 00026 echo $this->help(); 00027 exit(); 00028 } 00029 00030 if ( isset( $options['lang'] ) ) { 00031 $this->code = $options['lang']; 00032 } else { 00033 global $wgLanguageCode; 00034 $this->code = $wgLanguageCode; 00035 } 00036 00037 if ( isset( $options['level'] ) ) { 00038 $this->level = $options['level']; 00039 } 00040 00041 $this->doLinks = isset( $options['links'] ); 00042 $this->includeExif = !isset( $options['noexif'] ); 00043 $this->checkAll = isset( $options['all'] ); 00044 00045 if ( isset( $options['wikilang'] ) ) { 00046 $this->wikiCode = $options['wikilang']; 00047 } 00048 00049 if ( isset( $options['whitelist'] ) ) { 00050 $this->checks = explode( ',', $options['whitelist'] ); 00051 } elseif ( isset( $options['blacklist'] ) ) { 00052 $this->checks = array_diff( 00053 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(), 00054 explode( ',', $options['blacklist'] ) 00055 ); 00056 } elseif ( isset( $options['easy'] ) ) { 00057 $this->checks = $this->easyChecks(); 00058 } else { 00059 $this->checks = $this->defaultChecks(); 00060 } 00061 00062 if ( isset( $options['output'] ) ) { 00063 $this->output = $options['output']; 00064 } 00065 00066 $this->L = new languages( $this->includeExif ); 00067 } 00068 00073 protected function defaultChecks() { 00074 return array( 00075 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural', 00076 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced', 'namespace', 00077 'projecttalk', 'magic', 'magic-old', 'magic-over', 'magic-case', 00078 'special', 'special-old', 00079 ); 00080 } 00081 00086 protected function nonMessageChecks() { 00087 return array( 00088 'namespace', 'projecttalk', 'magic', 'magic-old', 'magic-over', 00089 'magic-case', 'special', 'special-old', 00090 ); 00091 } 00092 00097 protected function easyChecks() { 00098 return array( 00099 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars', 'magic-old', 00100 'magic-over', 'magic-case', 'special-old', 00101 ); 00102 } 00103 00108 protected function getChecks() { 00109 return array( 00110 'untranslated' => 'getUntranslatedMessages', 00111 'duplicate' => 'getDuplicateMessages', 00112 'obsolete' => 'getObsoleteMessages', 00113 'variables' => 'getMessagesWithMismatchVariables', 00114 'plural' => 'getMessagesWithoutPlural', 00115 'empty' => 'getEmptyMessages', 00116 'whitespace' => 'getMessagesWithWhitespace', 00117 'xhtml' => 'getNonXHTMLMessages', 00118 'chars' => 'getMessagesWithWrongChars', 00119 'links' => 'getMessagesWithDubiousLinks', 00120 'unbalanced' => 'getMessagesWithUnbalanced', 00121 'namespace' => 'getUntranslatedNamespaces', 00122 'projecttalk' => 'getProblematicProjectTalks', 00123 'magic' => 'getUntranslatedMagicWords', 00124 'magic-old' => 'getObsoleteMagicWords', 00125 'magic-over' => 'getOverridingMagicWords', 00126 'magic-case' => 'getCaseMismatchMagicWords', 00127 'special' => 'getUntraslatedSpecialPages', 00128 'special-old' => 'getObsoleteSpecialPages', 00129 ); 00130 } 00131 00138 protected function getTotalCount() { 00139 return array( 00140 'namespace' => array( 'getNamespaceNames', 'en' ), 00141 'projecttalk' => null, 00142 'magic' => array( 'getMagicWords', 'en' ), 00143 'magic-old' => array( 'getMagicWords', null ), 00144 'magic-over' => array( 'getMagicWords', null ), 00145 'magic-case' => array( 'getMagicWords', null ), 00146 'special' => array( 'getSpecialPageAliases', 'en' ), 00147 'special-old' => array( 'getSpecialPageAliases', null ), 00148 ); 00149 } 00150 00155 protected function getDescriptions() { 00156 return array( 00157 'untranslated' => '$1 message(s) of $2 are not translated to $3, but exist in en:', 00158 'duplicate' => '$1 message(s) of $2 are translated the same in en and $3:', 00159 'obsolete' => '$1 message(s) of $2 do not exist in en or are in the ignore list, but exist in $3:', 00160 'variables' => '$1 message(s) of $2 in $3 don\'t match the variables used in en:', 00161 'plural' => '$1 message(s) of $2 in $3 don\'t use {{plural}} while en uses:', 00162 'empty' => '$1 message(s) of $2 in $3 are empty or -:', 00163 'whitespace' => '$1 message(s) of $2 in $3 have trailing whitespace:', 00164 'xhtml' => '$1 message(s) of $2 in $3 contain illegal XHTML:', 00165 'chars' => '$1 message(s) of $2 in $3 include hidden chars which should not be used in the messages:', 00166 'links' => '$1 message(s) of $2 in $3 have problematic link(s):', 00167 'unbalanced' => '$1 message(s) of $2 in $3 have unbalanced {[]}:', 00168 'namespace' => '$1 namespace name(s) of $2 are not translated to $3, but exist in en:', 00169 'projecttalk' => '$1 namespace name(s) and alias(es) in $3 are project talk namespaces without the parameter:', 00170 'magic' => '$1 magic word(s) of $2 are not translated to $3, but exist in en:', 00171 'magic-old' => '$1 magic word(s) of $2 do not exist in en, but exist in $3:', 00172 'magic-over' => '$1 magic word(s) of $2 in $3 do not contain the original en word(s):', 00173 'magic-case' => '$1 magic word(s) of $2 in $3 change the case-sensitivity of the original en word:', 00174 'special' => '$1 special page alias(es) of $2 are not translated to $3, but exist in en:', 00175 'special-old' => '$1 special page alias(es) of $2 do not exist in en, but exist in $3:', 00176 ); 00177 } 00178 00183 protected function help() { 00184 return <<<ENDS 00185 Run this script to check a specific language file, or all of them. 00186 Command line settings are in form --parameter[=value]. 00187 Parameters: 00188 * lang: Language code (default: the installation default language). 00189 * all: Check all customized languages. 00190 * help: Show this help. 00191 * level: Show the following display level (default: 2). 00192 * links: Link the message values (default off). 00193 * wikilang: For the links, what is the content language of the wiki to display the output in (default en). 00194 * whitelist: Do only the following checks (form: code,code). 00195 * blacklist: Don't do the following checks (form: code,code). 00196 * easy: Do only the easy checks, which can be treated by non-speakers of the language. 00197 * noexif: Don't check for EXIF messages (a bit hard and boring to translate), if you know that they are currently not translated and want to focus on other problems (default off). 00198 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc): 00199 * untranslated: Messages which are required to translate, but are not translated. 00200 * duplicate: Messages which translation equal to fallback 00201 * obsolete: Messages which are untranslatable or do not exist, but are translated. 00202 * variables: Messages without variables which should be used, or with variables which shouldn't be used. 00203 * empty: Empty messages and messages that contain only -. 00204 * whitespace: Messages which have trailing whitespace. 00205 * xhtml: Messages which are not well-formed XHTML (checks only few common errors). 00206 * chars: Messages with hidden characters. 00207 * links: Messages which contains broken links to pages (does not find all). 00208 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}. 00209 * namespace: Namespace names that were not translated. 00210 * projecttalk: Namespace names and aliases where the project talk does not contain $1. 00211 * magic: Magic words that were not translated. 00212 * magic-old: Magic words which do not exist. 00213 * magic-over: Magic words that override the original English word. 00214 * magic-case: Magic words whose translation changes the case-sensitivity of the original English word. 00215 * special: Special page names that were not translated. 00216 * special-old: Special page names which do not exist. 00217 Display levels (default: 2): 00218 * 0: Skip the checks (useful for checking syntax). 00219 * 1: Show only the stub headers and number of wrong messages, without list of messages. 00220 * 2: Show only the headers and the message keys, without the message values. 00221 * 3: Show both the headers and the complete messages, with both keys and values. 00222 00223 ENDS; 00224 } 00225 00229 public function execute() { 00230 $this->doChecks(); 00231 if ( $this->level > 0 ) { 00232 switch ( $this->output ) { 00233 case 'plain': 00234 $this->outputText(); 00235 break; 00236 case 'wiki': 00237 $this->outputWiki(); 00238 break; 00239 default: 00240 throw new MWException( "Invalid output type $this->output" ); 00241 } 00242 } 00243 } 00244 00248 protected function doChecks() { 00249 $ignoredCodes = array( 'en', 'enRTL' ); 00250 00251 $this->results = array(); 00252 # Check the language 00253 if ( $this->checkAll ) { 00254 foreach ( $this->L->getLanguages() as $language ) { 00255 if ( !in_array( $language, $ignoredCodes ) ) { 00256 $this->results[$language] = $this->checkLanguage( $language ); 00257 } 00258 } 00259 } else { 00260 if ( in_array( $this->code, $ignoredCodes ) ) { 00261 throw new MWException( "Cannot check code $this->code." ); 00262 } else { 00263 $this->results[$this->code] = $this->checkLanguage( $this->code ); 00264 } 00265 } 00266 } 00267 00272 protected function getCheckBlacklist() { 00273 global $checkBlacklist; 00274 return $checkBlacklist; 00275 } 00276 00282 protected function checkLanguage( $code ) { 00283 # Syntax check only 00284 if ( $this->level === 0 ) { 00285 $this->L->getMessages( $code ); 00286 return; 00287 } 00288 00289 $results = array(); 00290 $checkFunctions = $this->getChecks(); 00291 $checkBlacklist = $this->getCheckBlacklist(); 00292 foreach ( $this->checks as $check ) { 00293 if ( isset( $checkBlacklist[$code] ) && 00294 in_array( $check, $checkBlacklist[$code] ) ) { 00295 $result[$check] = array(); 00296 continue; 00297 } 00298 00299 $callback = array( $this->L, $checkFunctions[$check] ); 00300 if ( !is_callable( $callback ) ) { 00301 throw new MWException( "Unkown check $check." ); 00302 } 00303 $results[$check] = call_user_func( $callback, $code ); 00304 } 00305 00306 return $results; 00307 } 00308 00315 protected function formatKey( $key, $code ) { 00316 if ( $this->doLinks ) { 00317 $displayKey = ucfirst( $key ); 00318 if ( $code == $this->wikiCode ) { 00319 return "[[MediaWiki:$displayKey|$key]]"; 00320 } else { 00321 return "[[MediaWiki:$displayKey/$code|$key]]"; 00322 } 00323 } else { 00324 return $key; 00325 } 00326 } 00327 00332 protected function outputText() { 00333 foreach ( $this->results as $code => $results ) { 00334 $translated = $this->L->getMessages( $code ); 00335 $translated = count( $translated['translated'] ); 00336 foreach ( $results as $check => $messages ) { 00337 $count = count( $messages ); 00338 if ( $count ) { 00339 if ( $check == 'untranslated' ) { 00340 $translatable = $this->L->getGeneralMessages(); 00341 $total = count( $translatable['translatable'] ); 00342 } elseif ( in_array( $check, $this->nonMessageChecks() ) ) { 00343 $totalCount = $this->getTotalCount(); 00344 $totalCount = $totalCount[$check]; 00345 $callback = array( $this->L, $totalCount[0] ); 00346 $callCode = $totalCount[1] ? $totalCount[1] : $code; 00347 $total = count( call_user_func( $callback, $callCode ) ); 00348 } else { 00349 $total = $translated; 00350 } 00351 $search = array( '$1', '$2', '$3' ); 00352 $replace = array( $count, $total, $code ); 00353 $descriptions = $this->getDescriptions(); 00354 echo "\n" . str_replace( $search, $replace, $descriptions[$check] ) . "\n"; 00355 if ( $this->level == 1 ) { 00356 echo "[messages are hidden]\n"; 00357 } else { 00358 foreach ( $messages as $key => $value ) { 00359 if( !in_array( $check, $this->nonMessageChecks() ) ) { 00360 $key = $this->formatKey( $key, $code ); 00361 } 00362 if ( $this->level == 2 || empty( $value ) ) { 00363 echo "* $key\n"; 00364 } else { 00365 echo "* $key: '$value'\n"; 00366 } 00367 } 00368 } 00369 } 00370 } 00371 } 00372 } 00373 00378 function outputWiki() { 00379 global $wgContLang, $IP; 00380 $detailText = ''; 00381 $rows[] = '! Language !! Code !! Total !! ' . implode( ' !! ', $this->checks ); 00382 foreach ( $this->results as $code => $results ) { 00383 $detailTextForLang = "==$code==\n"; 00384 $numbers = array(); 00385 $problems = 0; 00386 $detailTextForLangChecks = array(); 00387 foreach ( $results as $check => $messages ) { 00388 if( in_array( $check, $this->nonMessageChecks() ) ) { 00389 continue; 00390 } 00391 $count = count( $messages ); 00392 if ( $count ) { 00393 $problems += $count; 00394 $messageDetails = array(); 00395 foreach ( $messages as $key => $details ) { 00396 $displayKey = $this->formatKey( $key, $code ); 00397 $messageDetails[] = $displayKey; 00398 } 00399 $detailTextForLangChecks[] = "=== $code-$check ===\n* " . implode( ', ', $messageDetails ); 00400 $numbers[] = "'''[[#$code-$check|$count]]'''"; 00401 } else { 00402 $numbers[] = $count; 00403 } 00404 00405 } 00406 00407 if ( count( $detailTextForLangChecks ) ) { 00408 $detailText .= $detailTextForLang . implode( "\n", $detailTextForLangChecks ) . "\n"; 00409 } 00410 00411 if ( !$problems ) { 00412 # Don't list languages without problems 00413 continue; 00414 } 00415 $language = $wgContLang->getLanguageName( $code ); 00416 $rows[] = "| $language || $code || $problems || " . implode( ' || ', $numbers ); 00417 } 00418 00419 $tableRows = implode( "\n|-\n", $rows ); 00420 00421 $version = SpecialVersion::getVersion( $IP ); 00422 echo <<<EOL 00423 '''Check results are for:''' <code>$version</code> 00424 00425 00426 {| class="sortable wikitable" border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse; clear: both;" 00427 $tableRows 00428 |} 00429 00430 $detailText 00431 00432 EOL; 00433 } 00434 00439 protected function isEmpty() { 00440 foreach( $this->results as $code => $results ) { 00441 foreach( $results as $check => $messages ) { 00442 if( !empty( $messages ) ) { 00443 return false; 00444 } 00445 } 00446 } 00447 return true; 00448 } 00449 } 00450 00451 class CheckExtensionsCLI extends CheckLanguageCLI { 00452 private $extensions; 00453 00459 public function __construct( Array $options, $extension ) { 00460 if ( isset( $options['help'] ) ) { 00461 echo $this->help(); 00462 exit(); 00463 } 00464 00465 if ( isset( $options['lang'] ) ) { 00466 $this->code = $options['lang']; 00467 } else { 00468 global $wgLanguageCode; 00469 $this->code = $wgLanguageCode; 00470 } 00471 00472 if ( isset( $options['level'] ) ) { 00473 $this->level = $options['level']; 00474 } 00475 00476 $this->doLinks = isset( $options['links'] ); 00477 00478 if ( isset( $options['wikilang'] ) ) { 00479 $this->wikiCode = $options['wikilang']; 00480 } 00481 00482 if ( isset( $options['whitelist'] ) ) { 00483 $this->checks = explode( ',', $options['whitelist'] ); 00484 } elseif ( isset( $options['blacklist'] ) ) { 00485 $this->checks = array_diff( 00486 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(), 00487 explode( ',', $options['blacklist'] ) 00488 ); 00489 } elseif ( isset( $options['easy'] ) ) { 00490 $this->checks = $this->easyChecks(); 00491 } else { 00492 $this->checks = $this->defaultChecks(); 00493 } 00494 00495 if ( isset( $options['output'] ) ) { 00496 $this->output = $options['output']; 00497 } 00498 00499 # Some additional checks not enabled by default 00500 if ( isset( $options['duplicate'] ) ) { 00501 $this->checks[] = 'duplicate'; 00502 } 00503 00504 $this->extensions = array(); 00505 $extensions = new PremadeMediawikiExtensionGroups(); 00506 $extensions->addAll(); 00507 if ( $extension == 'all' ) { 00508 foreach ( MessageGroups::singleton()->getGroups() as $group ) { 00509 if ( strpos( $group->getId(), 'ext-' ) === 0 && !$group->isMeta() ) { 00510 $this->extensions[] = new extensionLanguages( $group ); 00511 } 00512 } 00513 } elseif ( $extension == 'wikimedia' ) { 00514 $wikimedia = MessageGroups::getGroup( 'ext-0-wikimedia' ); 00515 foreach ( $wikimedia->wmfextensions() as $extension ) { 00516 $group = MessageGroups::getGroup( $extension ); 00517 $this->extensions[] = new extensionLanguages( $group ); 00518 } 00519 } elseif ( $extension == 'flaggedrevs' ) { 00520 foreach ( MessageGroups::singleton()->getGroups() as $group ) { 00521 if ( strpos( $group->getId(), 'ext-flaggedrevs-' ) === 0 && !$group->isMeta() ) { 00522 $this->extensions[] = new extensionLanguages( $group ); 00523 } 00524 } 00525 } else { 00526 $extensions = explode( ',', $extension ); 00527 foreach ( $extensions as $extension ) { 00528 $group = MessageGroups::getGroup( 'ext-' . $extension ); 00529 if ( $group ) { 00530 $extension = new extensionLanguages( $group ); 00531 $this->extensions[] = $extension; 00532 } else { 00533 print "No such extension $extension.\n"; 00534 } 00535 } 00536 } 00537 } 00538 00543 protected function defaultChecks() { 00544 return array( 00545 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural', 00546 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced', 00547 ); 00548 } 00549 00554 protected function nonMessageChecks() { 00555 return array(); 00556 } 00557 00562 protected function easyChecks() { 00563 return array( 00564 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars', 00565 ); 00566 } 00567 00572 protected function help() { 00573 return <<<ENDS 00574 Run this script to check the status of a specific language in extensions, or all of them. 00575 Command line settings are in form --parameter[=value], except for the first one. 00576 Parameters: 00577 * First parameter (mandatory): Extension name, multiple extension names (separated by commas), "all" for all the extensions, "wikimedia" for extensions used by Wikimedia or "flaggedrevs" for all FLaggedRevs extension messages. 00578 * lang: Language code (default: the installation default language). 00579 * help: Show this help. 00580 * level: Show the following display level (default: 2). 00581 * links: Link the message values (default off). 00582 * wikilang: For the links, what is the content language of the wiki to display the output in (default en). 00583 * whitelist: Do only the following checks (form: code,code). 00584 * blacklist: Do not perform the following checks (form: code,code). 00585 * easy: Do only the easy checks, which can be treated by non-speakers of the language. 00586 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc): 00587 * untranslated: Messages which are required to translate, but are not translated. 00588 * duplicate: Messages which translation equal to fallback 00589 * obsolete: Messages which are untranslatable, but translated. 00590 * variables: Messages without variables which should be used, or with variables which shouldn't be used. 00591 * empty: Empty messages. 00592 * whitespace: Messages which have trailing whitespace. 00593 * xhtml: Messages which are not well-formed XHTML (checks only few common errors). 00594 * chars: Messages with hidden characters. 00595 * links: Messages which contains broken links to pages (does not find all). 00596 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}. 00597 Display levels (default: 2): 00598 * 0: Skip the checks (useful for checking syntax). 00599 * 1: Show only the stub headers and number of wrong messages, without list of messages. 00600 * 2: Show only the headers and the message keys, without the message values. 00601 * 3: Show both the headers and the complete messages, with both keys and values. 00602 00603 ENDS; 00604 } 00605 00609 public function execute() { 00610 $this->doChecks(); 00611 } 00612 00617 protected function checkLanguage( $code ) { 00618 foreach( $this->extensions as $extension ) { 00619 $this->L = $extension; 00620 $this->results = array(); 00621 $this->results[$code] = parent::checkLanguage( $code ); 00622 00623 if( !$this->isEmpty() ) { 00624 echo $extension->name() . ":\n"; 00625 00626 if( $this->level > 0 ) { 00627 switch( $this->output ) { 00628 case 'plain': 00629 $this->outputText(); 00630 break; 00631 case 'wiki': 00632 $this->outputWiki(); 00633 break; 00634 default: 00635 throw new MWException( "Invalid output type $this->output" ); 00636 } 00637 } 00638 00639 echo "\n"; 00640 } 00641 } 00642 } 00643 } 00644 00645 # Blacklist some checks for some languages 00646 $checkBlacklist = array( 00647 #'code' => array( 'check1', 'check2' ... ) 00648 'gan' => array( 'plural' ), 00649 'gn' => array( 'plural' ), 00650 'hak' => array( 'plural' ), 00651 'hu' => array( 'plural' ), 00652 'ja' => array( 'plural' ), // Does not use plural 00653 'ka' => array( 'plural' ), 00654 'kk-arab' => array( 'plural' ), 00655 'kk-cyrl' => array( 'plural' ), 00656 'kk-latn' => array( 'plural' ), 00657 'ko' => array( 'plural' ), 00658 'mn' => array( 'plural' ), 00659 'ms' => array( 'plural' ), 00660 'my' => array( 'chars' ), // Uses a lot zwnj 00661 'sah' => array( 'plural' ), 00662 'sq' => array( 'plural' ), 00663 'tet' => array( 'plural' ), 00664 'th' => array( 'plural' ), 00665 'wuu' => array( 'plural' ), 00666 'xmf' => array( 'plural' ), 00667 'yue' => array( 'plural' ), 00668 'zh' => array( 'plural' ), 00669 'zh-classical' => array( 'plural' ), 00670 'zh-cn' => array( 'plural' ), 00671 'zh-hans' => array( 'plural' ), 00672 'zh-hant' => array( 'plural' ), 00673 'zh-hk' => array( 'plural' ), 00674 'zh-sg' => array( 'plural' ), 00675 'zh-tw' => array( 'plural' ), 00676 'zh-yue' => array( 'plural' ), 00677 );