00001 <?php
00002 # Copyright (C) 2004,2008 Brion Vibber <brion@pobox.com>
00003 # http://www.mediawiki.org/
00004 #
00005 # This program is free software; you can redistribute it and/or modify
00006 # it under the terms of the GNU General Public License as published by
00007 # the Free Software Foundation; either version 2 of the License, or
00008 # (at your option) any later version.
00009 #
00010 # This program is distributed in the hope that it will be useful,
00011 # but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00013 # GNU General Public License for more details.
00014 #
00015 # You should have received a copy of the GNU General Public License along
00016 # with this program; if not, write to the Free Software Foundation, Inc.,
00017 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00018 # http://www.gnu.org/copyleft/gpl.html
00019 
00030 if( php_sapi_name() != 'cli' ) {
00031         die( "Run me from the command line please.\n" );
00032 }
00033 
00034 require_once 'UtfNormalUtil.php';
00035 
00036 $in = fopen("UnicodeData.txt", "rt" );
00037 if( !$in ) {
00038         print "Can't open UnicodeData.txt for reading.\n";
00039         print "If necessary, fetch this file from the internet:\n";
00040         print "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n";
00041         exit(-1);
00042 }
00043 $wikiUpperChars = array();
00044 $wikiLowerChars = array();
00045 
00046 print "Reading character definitions...\n";
00047 while( false !== ($line = fgets( $in ) ) ) {
00048         $columns = split(';', $line);
00049         $codepoint = $columns[0];
00050         $name = $columns[1];
00051         $simpleUpper = $columns[12];
00052         $simpleLower = $columns[13];
00053         
00054         $source = codepointToUtf8( hexdec( $codepoint ) );
00055         if( $simpleUpper ) {
00056                 $wikiUpperChars[$source] = codepointToUtf8( hexdec( $simpleUpper ) );
00057         }
00058         if( $simpleLower ) {
00059                 $wikiLowerChars[$source] = codepointToUtf8( hexdec( $simpleLower ) );
00060         }
00061 }
00062 fclose( $in );
00063 
00064 $out = fopen("Utf8Case.php", "wt");
00065 if( $out ) {
00066         $outUpperChars = escapeArray( $wikiUpperChars );
00067         $outLowerChars = escapeArray( $wikiLowerChars );
00068         $outdata = "<" . "?php
00080 /*
00081  * Translation array to get upper case character
00082  */
00083 
00084 \$wikiUpperChars = $outUpperChars;
00085 
00086 /*
00087  * Translation array to get lower case character
00088  */
00089 \$wikiLowerChars = $outLowerChars;\n";
00090         fputs( $out, $outdata );
00091         fclose( $out );
00092         print "Wrote out Utf8Case.php\n";
00093 } else {
00094         print "Can't create file Utf8Case.php\n";
00095         exit(-1);
00096 }
00097 
00098 
00099 function escapeArray( $arr ) {
00100         return "array(\n" .
00101                 implode( ",\n",
00102                         array_map( "escapeLine",
00103                                 array_keys( $arr ),
00104                                 array_values( $arr ) ) ) .
00105                 "\n)";
00106 }
00107 
00108 function escapeLine( $key, $val ) {
00109         $encKey = escapeSingleString( $key );
00110         $encVal = escapeSingleString( $val );
00111         return "\t'$encKey' => '$encVal'";
00112 }