00001 <?php
00006 class CheckLanguageCLI {
00007 protected $code = null;
00008 protected $level = 2;
00009 protected $doLinks = false;
00010 protected $wikiCode = 'en';
00011 protected $checkAll = false;
00012 protected $output = 'plain';
00013 protected $checks = array();
00014 protected $L = null;
00015
00016 protected $results = array();
00017
00018 private $includeExif = false;
00019
00024 public function __construct( Array $options ) {
00025 if ( isset( $options['help'] ) ) {
00026 echo $this->help();
00027 exit();
00028 }
00029
00030 if ( isset( $options['lang'] ) ) {
00031 $this->code = $options['lang'];
00032 } else {
00033 global $wgLanguageCode;
00034 $this->code = $wgLanguageCode;
00035 }
00036
00037 if ( isset( $options['level'] ) ) {
00038 $this->level = $options['level'];
00039 }
00040
00041 $this->doLinks = isset( $options['links'] );
00042 $this->includeExif = !isset( $options['noexif'] );
00043 $this->checkAll = isset( $options['all'] );
00044
00045 if ( isset( $options['wikilang'] ) ) {
00046 $this->wikiCode = $options['wikilang'];
00047 }
00048
00049 if ( isset( $options['whitelist'] ) ) {
00050 $this->checks = explode( ',', $options['whitelist'] );
00051 } elseif ( isset( $options['blacklist'] ) ) {
00052 $this->checks = array_diff(
00053 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(),
00054 explode( ',', $options['blacklist'] )
00055 );
00056 } elseif ( isset( $options['easy'] ) ) {
00057 $this->checks = $this->easyChecks();
00058 } else {
00059 $this->checks = $this->defaultChecks();
00060 }
00061
00062 if ( isset( $options['output'] ) ) {
00063 $this->output = $options['output'];
00064 }
00065
00066 $this->L = new languages( $this->includeExif );
00067 }
00068
00073 protected function defaultChecks() {
00074 return array(
00075 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural',
00076 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced', 'namespace',
00077 'projecttalk', 'magic', 'magic-old', 'magic-over', 'magic-case',
00078 'special', 'special-old',
00079 );
00080 }
00081
00086 protected function nonMessageChecks() {
00087 return array(
00088 'namespace', 'projecttalk', 'magic', 'magic-old', 'magic-over',
00089 'magic-case', 'special', 'special-old',
00090 );
00091 }
00092
00097 protected function easyChecks() {
00098 return array(
00099 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars', 'magic-old',
00100 'magic-over', 'magic-case', 'special-old',
00101 );
00102 }
00103
00108 protected function getChecks() {
00109 return array(
00110 'untranslated' => 'getUntranslatedMessages',
00111 'duplicate' => 'getDuplicateMessages',
00112 'obsolete' => 'getObsoleteMessages',
00113 'variables' => 'getMessagesWithMismatchVariables',
00114 'plural' => 'getMessagesWithoutPlural',
00115 'empty' => 'getEmptyMessages',
00116 'whitespace' => 'getMessagesWithWhitespace',
00117 'xhtml' => 'getNonXHTMLMessages',
00118 'chars' => 'getMessagesWithWrongChars',
00119 'links' => 'getMessagesWithDubiousLinks',
00120 'unbalanced' => 'getMessagesWithUnbalanced',
00121 'namespace' => 'getUntranslatedNamespaces',
00122 'projecttalk' => 'getProblematicProjectTalks',
00123 'magic' => 'getUntranslatedMagicWords',
00124 'magic-old' => 'getObsoleteMagicWords',
00125 'magic-over' => 'getOverridingMagicWords',
00126 'magic-case' => 'getCaseMismatchMagicWords',
00127 'special' => 'getUntraslatedSpecialPages',
00128 'special-old' => 'getObsoleteSpecialPages',
00129 );
00130 }
00131
00138 protected function getTotalCount() {
00139 return array(
00140 'namespace' => array( 'getNamespaceNames', 'en' ),
00141 'projecttalk' => null,
00142 'magic' => array( 'getMagicWords', 'en' ),
00143 'magic-old' => array( 'getMagicWords', null ),
00144 'magic-over' => array( 'getMagicWords', null ),
00145 'magic-case' => array( 'getMagicWords', null ),
00146 'special' => array( 'getSpecialPageAliases', 'en' ),
00147 'special-old' => array( 'getSpecialPageAliases', null ),
00148 );
00149 }
00150
00155 protected function getDescriptions() {
00156 return array(
00157 'untranslated' => '$1 message(s) of $2 are not translated to $3, but exist in en:',
00158 'duplicate' => '$1 message(s) of $2 are translated the same in en and $3:',
00159 'obsolete' => '$1 message(s) of $2 do not exist in en or are in the ignore list, but exist in $3:',
00160 'variables' => '$1 message(s) of $2 in $3 don\'t match the variables used in en:',
00161 'plural' => '$1 message(s) of $2 in $3 don\'t use {{plural}} while en uses:',
00162 'empty' => '$1 message(s) of $2 in $3 are empty or -:',
00163 'whitespace' => '$1 message(s) of $2 in $3 have trailing whitespace:',
00164 'xhtml' => '$1 message(s) of $2 in $3 contain illegal XHTML:',
00165 'chars' => '$1 message(s) of $2 in $3 include hidden chars which should not be used in the messages:',
00166 'links' => '$1 message(s) of $2 in $3 have problematic link(s):',
00167 'unbalanced' => '$1 message(s) of $2 in $3 have unbalanced {[]}:',
00168 'namespace' => '$1 namespace name(s) of $2 are not translated to $3, but exist in en:',
00169 'projecttalk' => '$1 namespace name(s) and alias(es) in $3 are project talk namespaces without the parameter:',
00170 'magic' => '$1 magic word(s) of $2 are not translated to $3, but exist in en:',
00171 'magic-old' => '$1 magic word(s) of $2 do not exist in en, but exist in $3:',
00172 'magic-over' => '$1 magic word(s) of $2 in $3 do not contain the original en word(s):',
00173 'magic-case' => '$1 magic word(s) of $2 in $3 change the case-sensitivity of the original en word:',
00174 'special' => '$1 special page alias(es) of $2 are not translated to $3, but exist in en:',
00175 'special-old' => '$1 special page alias(es) of $2 do not exist in en, but exist in $3:',
00176 );
00177 }
00178
00183 protected function help() {
00184 return <<<ENDS
00185 Run this script to check a specific language file, or all of them.
00186 Command line settings are in form --parameter[=value].
00187 Parameters:
00188 * lang: Language code (default: the installation default language).
00189 * all: Check all customized languages.
00190 * help: Show this help.
00191 * level: Show the following display level (default: 2).
00192 * links: Link the message values (default off).
00193 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
00194 * whitelist: Do only the following checks (form: code,code).
00195 * blacklist: Don't do the following checks (form: code,code).
00196 * easy: Do only the easy checks, which can be treated by non-speakers of the language.
00197 * noexif: Don't check for EXIF messages (a bit hard and boring to translate), if you know that they are currently not translated and want to focus on other problems (default off).
00198 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc):
00199 * untranslated: Messages which are required to translate, but are not translated.
00200 * duplicate: Messages which translation equal to fallback
00201 * obsolete: Messages which are untranslatable or do not exist, but are translated.
00202 * variables: Messages without variables which should be used, or with variables which shouldn't be used.
00203 * empty: Empty messages and messages that contain only -.
00204 * whitespace: Messages which have trailing whitespace.
00205 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
00206 * chars: Messages with hidden characters.
00207 * links: Messages which contains broken links to pages (does not find all).
00208 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
00209 * namespace: Namespace names that were not translated.
00210 * projecttalk: Namespace names and aliases where the project talk does not contain $1.
00211 * magic: Magic words that were not translated.
00212 * magic-old: Magic words which do not exist.
00213 * magic-over: Magic words that override the original English word.
00214 * magic-case: Magic words whose translation changes the case-sensitivity of the original English word.
00215 * special: Special page names that were not translated.
00216 * special-old: Special page names which do not exist.
00217 Display levels (default: 2):
00218 * 0: Skip the checks (useful for checking syntax).
00219 * 1: Show only the stub headers and number of wrong messages, without list of messages.
00220 * 2: Show only the headers and the message keys, without the message values.
00221 * 3: Show both the headers and the complete messages, with both keys and values.
00222
00223 ENDS;
00224 }
00225
00229 public function execute() {
00230 $this->doChecks();
00231 if ( $this->level > 0 ) {
00232 switch ( $this->output ) {
00233 case 'plain':
00234 $this->outputText();
00235 break;
00236 case 'wiki':
00237 $this->outputWiki();
00238 break;
00239 default:
00240 throw new MWException( "Invalid output type $this->output" );
00241 }
00242 }
00243 }
00244
00248 protected function doChecks() {
00249 $ignoredCodes = array( 'en', 'enRTL' );
00250
00251 $this->results = array();
00252 # Check the language
00253 if ( $this->checkAll ) {
00254 foreach ( $this->L->getLanguages() as $language ) {
00255 if ( !in_array( $language, $ignoredCodes ) ) {
00256 $this->results[$language] = $this->checkLanguage( $language );
00257 }
00258 }
00259 } else {
00260 if ( in_array( $this->code, $ignoredCodes ) ) {
00261 throw new MWException( "Cannot check code $this->code." );
00262 } else {
00263 $this->results[$this->code] = $this->checkLanguage( $this->code );
00264 }
00265 }
00266 }
00267
00272 protected function getCheckBlacklist() {
00273 global $checkBlacklist;
00274 return $checkBlacklist;
00275 }
00276
00282 protected function checkLanguage( $code ) {
00283 # Syntax check only
00284 if ( $this->level === 0 ) {
00285 $this->L->getMessages( $code );
00286 return;
00287 }
00288
00289 $results = array();
00290 $checkFunctions = $this->getChecks();
00291 $checkBlacklist = $this->getCheckBlacklist();
00292 foreach ( $this->checks as $check ) {
00293 if ( isset( $checkBlacklist[$code] ) &&
00294 in_array( $check, $checkBlacklist[$code] ) ) {
00295 $result[$check] = array();
00296 continue;
00297 }
00298
00299 $callback = array( $this->L, $checkFunctions[$check] );
00300 if ( !is_callable( $callback ) ) {
00301 throw new MWException( "Unkown check $check." );
00302 }
00303 $results[$check] = call_user_func( $callback, $code );
00304 }
00305
00306 return $results;
00307 }
00308
00315 protected function formatKey( $key, $code ) {
00316 if ( $this->doLinks ) {
00317 $displayKey = ucfirst( $key );
00318 if ( $code == $this->wikiCode ) {
00319 return "[[MediaWiki:$displayKey|$key]]";
00320 } else {
00321 return "[[MediaWiki:$displayKey/$code|$key]]";
00322 }
00323 } else {
00324 return $key;
00325 }
00326 }
00327
00332 protected function outputText() {
00333 foreach ( $this->results as $code => $results ) {
00334 $translated = $this->L->getMessages( $code );
00335 $translated = count( $translated['translated'] );
00336 foreach ( $results as $check => $messages ) {
00337 $count = count( $messages );
00338 if ( $count ) {
00339 if ( $check == 'untranslated' ) {
00340 $translatable = $this->L->getGeneralMessages();
00341 $total = count( $translatable['translatable'] );
00342 } elseif ( in_array( $check, $this->nonMessageChecks() ) ) {
00343 $totalCount = $this->getTotalCount();
00344 $totalCount = $totalCount[$check];
00345 $callback = array( $this->L, $totalCount[0] );
00346 $callCode = $totalCount[1] ? $totalCount[1] : $code;
00347 $total = count( call_user_func( $callback, $callCode ) );
00348 } else {
00349 $total = $translated;
00350 }
00351 $search = array( '$1', '$2', '$3' );
00352 $replace = array( $count, $total, $code );
00353 $descriptions = $this->getDescriptions();
00354 echo "\n" . str_replace( $search, $replace, $descriptions[$check] ) . "\n";
00355 if ( $this->level == 1 ) {
00356 echo "[messages are hidden]\n";
00357 } else {
00358 foreach ( $messages as $key => $value ) {
00359 if( !in_array( $check, $this->nonMessageChecks() ) ) {
00360 $key = $this->formatKey( $key, $code );
00361 }
00362 if ( $this->level == 2 || empty( $value ) ) {
00363 echo "* $key\n";
00364 } else {
00365 echo "* $key: '$value'\n";
00366 }
00367 }
00368 }
00369 }
00370 }
00371 }
00372 }
00373
00378 function outputWiki() {
00379 global $wgContLang, $IP;
00380 $detailText = '';
00381 $rows[] = '! Language !! Code !! Total !! ' . implode( ' !! ', $this->checks );
00382 foreach ( $this->results as $code => $results ) {
00383 $detailTextForLang = "==$code==\n";
00384 $numbers = array();
00385 $problems = 0;
00386 $detailTextForLangChecks = array();
00387 foreach ( $results as $check => $messages ) {
00388 if( in_array( $check, $this->nonMessageChecks() ) ) {
00389 continue;
00390 }
00391 $count = count( $messages );
00392 if ( $count ) {
00393 $problems += $count;
00394 $messageDetails = array();
00395 foreach ( $messages as $key => $details ) {
00396 $displayKey = $this->formatKey( $key, $code );
00397 $messageDetails[] = $displayKey;
00398 }
00399 $detailTextForLangChecks[] = "=== $code-$check ===\n* " . implode( ', ', $messageDetails );
00400 $numbers[] = "'''[[#$code-$check|$count]]'''";
00401 } else {
00402 $numbers[] = $count;
00403 }
00404
00405 }
00406
00407 if ( count( $detailTextForLangChecks ) ) {
00408 $detailText .= $detailTextForLang . implode( "\n", $detailTextForLangChecks ) . "\n";
00409 }
00410
00411 if ( !$problems ) {
00412 # Don't list languages without problems
00413 continue;
00414 }
00415 $language = $wgContLang->getLanguageName( $code );
00416 $rows[] = "| $language || $code || $problems || " . implode( ' || ', $numbers );
00417 }
00418
00419 $tableRows = implode( "\n|-\n", $rows );
00420
00421 $version = SpecialVersion::getVersion( $IP );
00422 echo <<<EOL
00423 '''Check results are for:''' <code>$version</code>
00424
00425
00426 {| class="sortable wikitable" border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse; clear: both;"
00427 $tableRows
00428 |}
00429
00430 $detailText
00431
00432 EOL;
00433 }
00434
00439 protected function isEmpty() {
00440 foreach( $this->results as $code => $results ) {
00441 foreach( $results as $check => $messages ) {
00442 if( !empty( $messages ) ) {
00443 return false;
00444 }
00445 }
00446 }
00447 return true;
00448 }
00449 }
00450
00451 class CheckExtensionsCLI extends CheckLanguageCLI {
00452 private $extensions;
00453
00459 public function __construct( Array $options, $extension ) {
00460 if ( isset( $options['help'] ) ) {
00461 echo $this->help();
00462 exit();
00463 }
00464
00465 if ( isset( $options['lang'] ) ) {
00466 $this->code = $options['lang'];
00467 } else {
00468 global $wgLanguageCode;
00469 $this->code = $wgLanguageCode;
00470 }
00471
00472 if ( isset( $options['level'] ) ) {
00473 $this->level = $options['level'];
00474 }
00475
00476 $this->doLinks = isset( $options['links'] );
00477
00478 if ( isset( $options['wikilang'] ) ) {
00479 $this->wikiCode = $options['wikilang'];
00480 }
00481
00482 if ( isset( $options['whitelist'] ) ) {
00483 $this->checks = explode( ',', $options['whitelist'] );
00484 } elseif ( isset( $options['blacklist'] ) ) {
00485 $this->checks = array_diff(
00486 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(),
00487 explode( ',', $options['blacklist'] )
00488 );
00489 } elseif ( isset( $options['easy'] ) ) {
00490 $this->checks = $this->easyChecks();
00491 } else {
00492 $this->checks = $this->defaultChecks();
00493 }
00494
00495 if ( isset( $options['output'] ) ) {
00496 $this->output = $options['output'];
00497 }
00498
00499 # Some additional checks not enabled by default
00500 if ( isset( $options['duplicate'] ) ) {
00501 $this->checks[] = 'duplicate';
00502 }
00503
00504 $this->extensions = array();
00505 $extensions = new PremadeMediawikiExtensionGroups();
00506 $extensions->addAll();
00507 if ( $extension == 'all' ) {
00508 foreach ( MessageGroups::singleton()->getGroups() as $group ) {
00509 if ( strpos( $group->getId(), 'ext-' ) === 0 && !$group->isMeta() ) {
00510 $this->extensions[] = new extensionLanguages( $group );
00511 }
00512 }
00513 } elseif ( $extension == 'wikimedia' ) {
00514 $wikimedia = MessageGroups::getGroup( 'ext-0-wikimedia' );
00515 foreach ( $wikimedia->wmfextensions() as $extension ) {
00516 $group = MessageGroups::getGroup( $extension );
00517 $this->extensions[] = new extensionLanguages( $group );
00518 }
00519 } elseif ( $extension == 'flaggedrevs' ) {
00520 foreach ( MessageGroups::singleton()->getGroups() as $group ) {
00521 if ( strpos( $group->getId(), 'ext-flaggedrevs-' ) === 0 && !$group->isMeta() ) {
00522 $this->extensions[] = new extensionLanguages( $group );
00523 }
00524 }
00525 } else {
00526 $extensions = explode( ',', $extension );
00527 foreach ( $extensions as $extension ) {
00528 $group = MessageGroups::getGroup( 'ext-' . $extension );
00529 if ( $group ) {
00530 $extension = new extensionLanguages( $group );
00531 $this->extensions[] = $extension;
00532 } else {
00533 print "No such extension $extension.\n";
00534 }
00535 }
00536 }
00537 }
00538
00543 protected function defaultChecks() {
00544 return array(
00545 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural',
00546 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced',
00547 );
00548 }
00549
00554 protected function nonMessageChecks() {
00555 return array();
00556 }
00557
00562 protected function easyChecks() {
00563 return array(
00564 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars',
00565 );
00566 }
00567
00572 protected function help() {
00573 return <<<ENDS
00574 Run this script to check the status of a specific language in extensions, or all of them.
00575 Command line settings are in form --parameter[=value], except for the first one.
00576 Parameters:
00577 * First parameter (mandatory): Extension name, multiple extension names (separated by commas), "all" for all the extensions, "wikimedia" for extensions used by Wikimedia or "flaggedrevs" for all FLaggedRevs extension messages.
00578 * lang: Language code (default: the installation default language).
00579 * help: Show this help.
00580 * level: Show the following display level (default: 2).
00581 * links: Link the message values (default off).
00582 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
00583 * whitelist: Do only the following checks (form: code,code).
00584 * blacklist: Do not perform the following checks (form: code,code).
00585 * easy: Do only the easy checks, which can be treated by non-speakers of the language.
00586 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc):
00587 * untranslated: Messages which are required to translate, but are not translated.
00588 * duplicate: Messages which translation equal to fallback
00589 * obsolete: Messages which are untranslatable, but translated.
00590 * variables: Messages without variables which should be used, or with variables which shouldn't be used.
00591 * empty: Empty messages.
00592 * whitespace: Messages which have trailing whitespace.
00593 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
00594 * chars: Messages with hidden characters.
00595 * links: Messages which contains broken links to pages (does not find all).
00596 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
00597 Display levels (default: 2):
00598 * 0: Skip the checks (useful for checking syntax).
00599 * 1: Show only the stub headers and number of wrong messages, without list of messages.
00600 * 2: Show only the headers and the message keys, without the message values.
00601 * 3: Show both the headers and the complete messages, with both keys and values.
00602
00603 ENDS;
00604 }
00605
00609 public function execute() {
00610 $this->doChecks();
00611 }
00612
00617 protected function checkLanguage( $code ) {
00618 foreach( $this->extensions as $extension ) {
00619 $this->L = $extension;
00620 $this->results = array();
00621 $this->results[$code] = parent::checkLanguage( $code );
00622
00623 if( !$this->isEmpty() ) {
00624 echo $extension->name() . ":\n";
00625
00626 if( $this->level > 0 ) {
00627 switch( $this->output ) {
00628 case 'plain':
00629 $this->outputText();
00630 break;
00631 case 'wiki':
00632 $this->outputWiki();
00633 break;
00634 default:
00635 throw new MWException( "Invalid output type $this->output" );
00636 }
00637 }
00638
00639 echo "\n";
00640 }
00641 }
00642 }
00643 }
00644
00645 # Blacklist some checks for some languages
00646 $checkBlacklist = array(
00647 #'code' => array( 'check1', 'check2' ... )
00648 'gan' => array( 'plural' ),
00649 'gn' => array( 'plural' ),
00650 'hak' => array( 'plural' ),
00651 'hu' => array( 'plural' ),
00652 'ja' => array( 'plural' ), // Does not use plural
00653 'ka' => array( 'plural' ),
00654 'kk-arab' => array( 'plural' ),
00655 'kk-cyrl' => array( 'plural' ),
00656 'kk-latn' => array( 'plural' ),
00657 'ko' => array( 'plural' ),
00658 'mn' => array( 'plural' ),
00659 'ms' => array( 'plural' ),
00660 'my' => array( 'chars' ), // Uses a lot zwnj
00661 'sah' => array( 'plural' ),
00662 'sq' => array( 'plural' ),
00663 'tet' => array( 'plural' ),
00664 'th' => array( 'plural' ),
00665 'wuu' => array( 'plural' ),
00666 'xmf' => array( 'plural' ),
00667 'yue' => array( 'plural' ),
00668 'zh' => array( 'plural' ),
00669 'zh-classical' => array( 'plural' ),
00670 'zh-cn' => array( 'plural' ),
00671 'zh-hans' => array( 'plural' ),
00672 'zh-hant' => array( 'plural' ),
00673 'zh-hk' => array( 'plural' ),
00674 'zh-sg' => array( 'plural' ),
00675 'zh-tw' => array( 'plural' ),
00676 'zh-yue' => array( 'plural' ),
00677 );