00001 <?php
00002 # Copyright (C) 2004 Brion Vibber <brion@pobox.com>
00003 # http://www.mediawiki.org/
00004 #
00005 # This program is free software; you can redistribute it and/or modify
00006 # it under the terms of the GNU General Public License as published by
00007 # the Free Software Foundation; either version 2 of the License, or
00008 # (at your option) any later version.
00009 #
00010 # This program is distributed in the hope that it will be useful,
00011 # but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00013 # GNU General Public License for more details.
00014 #
00015 # You should have received a copy of the GNU General Public License along
00016 # with this program; if not, write to the Free Software Foundation, Inc.,
00017 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00018 # http://www.gnu.org/copyleft/gpl.html
00019
00029 require_once dirname(__FILE__).'/UtfNormalDefines.php';
00030
00039 function codepointToUtf8( $codepoint ) {
00040 if($codepoint < 0x80) return chr($codepoint);
00041 if($codepoint < 0x800) return chr($codepoint >> 6 & 0x3f | 0xc0) .
00042 chr($codepoint & 0x3f | 0x80);
00043 if($codepoint < 0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) .
00044 chr($codepoint >> 6 & 0x3f | 0x80) .
00045 chr($codepoint & 0x3f | 0x80);
00046 if($codepoint < 0x110000) return chr($codepoint >> 18 & 0x07 | 0xf0) .
00047 chr($codepoint >> 12 & 0x3f | 0x80) .
00048 chr($codepoint >> 6 & 0x3f | 0x80) .
00049 chr($codepoint & 0x3f | 0x80);
00050
00051 echo "Asked for code outside of range ($codepoint)\n";
00052 die( -1 );
00053 }
00054
00064 function hexSequenceToUtf8( $sequence ) {
00065 $utf = '';
00066 foreach( explode( ' ', $sequence ) as $hex ) {
00067 $n = hexdec( $hex );
00068 $utf .= codepointToUtf8( $n );
00069 }
00070 return $utf;
00071 }
00072
00081 function utf8ToHexSequence( $str ) {
00082 return rtrim( preg_replace( '/(.)/uSe',
00083 'sprintf("%04x ", utf8ToCodepoint("$1"))',
00084 $str ) );
00085 }
00086
00095 function utf8ToCodepoint( $char ) {
00096 # Find the length
00097 $z = ord( $char{0} );
00098 if ( $z & 0x80 ) {
00099 $length = 0;
00100 while ( $z & 0x80 ) {
00101 $length++;
00102 $z <<= 1;
00103 }
00104 } else {
00105 $length = 1;
00106 }
00107
00108 if ( $length != strlen( $char ) ) {
00109 return false;
00110 }
00111 if ( $length == 1 ) {
00112 return ord( $char );
00113 }
00114
00115 # Mask off the length-determining bits and shift back to the original location
00116 $z &= 0xff;
00117 $z >>= $length;
00118
00119 # Add in the free bits from subsequent bytes
00120 for ( $i=1; $i<$length; $i++ ) {
00121 $z <<= 6;
00122 $z |= ord( $char{$i} ) & 0x3f;
00123 }
00124
00125 return $z;
00126 }
00127
00135 function escapeSingleString( $string ) {
00136 return strtr( $string,
00137 array(
00138 '\\' => '\\\\',
00139 '\'' => '\\\''
00140 ));
00141 }