00001 <?php
00002 # Copyright (C) 2004,2008 Brion Vibber <brion@pobox.com>
00003 # http://www.mediawiki.org/
00004 #
00005 # This program is free software; you can redistribute it and/or modify
00006 # it under the terms of the GNU General Public License as published by
00007 # the Free Software Foundation; either version 2 of the License, or
00008 # (at your option) any later version.
00009 #
00010 # This program is distributed in the hope that it will be useful,
00011 # but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00013 # GNU General Public License for more details.
00014 #
00015 # You should have received a copy of the GNU General Public License along
00016 # with this program; if not, write to the Free Software Foundation, Inc.,
00017 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00018 # http://www.gnu.org/copyleft/gpl.html
00019
00030 if( php_sapi_name() != 'cli' ) {
00031 die( "Run me from the command line please.\n" );
00032 }
00033
00034 require_once 'UtfNormalUtil.php';
00035
00036 $in = fopen("UnicodeData.txt", "rt" );
00037 if( !$in ) {
00038 print "Can't open UnicodeData.txt for reading.\n";
00039 print "If necessary, fetch this file from the internet:\n";
00040 print "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n";
00041 exit(-1);
00042 }
00043 $wikiUpperChars = array();
00044 $wikiLowerChars = array();
00045
00046 print "Reading character definitions...\n";
00047 while( false !== ($line = fgets( $in ) ) ) {
00048 $columns = split(';', $line);
00049 $codepoint = $columns[0];
00050 $name = $columns[1];
00051 $simpleUpper = $columns[12];
00052 $simpleLower = $columns[13];
00053
00054 $source = codepointToUtf8( hexdec( $codepoint ) );
00055 if( $simpleUpper ) {
00056 $wikiUpperChars[$source] = codepointToUtf8( hexdec( $simpleUpper ) );
00057 }
00058 if( $simpleLower ) {
00059 $wikiLowerChars[$source] = codepointToUtf8( hexdec( $simpleLower ) );
00060 }
00061 }
00062 fclose( $in );
00063
00064 $out = fopen("Utf8Case.php", "wt");
00065 if( $out ) {
00066 $outUpperChars = escapeArray( $wikiUpperChars );
00067 $outLowerChars = escapeArray( $wikiLowerChars );
00068 $outdata = "<" . "?php
00080 /*
00081 * Translation array to get upper case character
00082 */
00083
00084 \$wikiUpperChars = $outUpperChars;
00085
00086 /*
00087 * Translation array to get lower case character
00088 */
00089 \$wikiLowerChars = $outLowerChars;\n";
00090 fputs( $out, $outdata );
00091 fclose( $out );
00092 print "Wrote out Utf8Case.php\n";
00093 } else {
00094 print "Can't create file Utf8Case.php\n";
00095 exit(-1);
00096 }
00097
00098
00099 function escapeArray( $arr ) {
00100 return "array(\n" .
00101 implode( ",\n",
00102 array_map( "escapeLine",
00103 array_keys( $arr ),
00104 array_values( $arr ) ) ) .
00105 "\n)";
00106 }
00107
00108 function escapeLine( $key, $val ) {
00109 $encKey = escapeSingleString( $key );
00110 $encVal = escapeSingleString( $val );
00111 return "\t'$encKey' => '$encVal'";
00112 }