00001 <?php
00002 # Copyright (C) 2004 Brion Vibber <brion@pobox.com>
00003 # http://www.mediawiki.org/
00004 #
00005 # This program is free software; you can redistribute it and/or modify
00006 # it under the terms of the GNU General Public License as published by
00007 # the Free Software Foundation; either version 2 of the License, or
00008 # (at your option) any later version.
00009 #
00010 # This program is distributed in the hope that it will be useful,
00011 # but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00013 # GNU General Public License for more details.
00014 #
00015 # You should have received a copy of the GNU General Public License along
00016 # with this program; if not, write to the Free Software Foundation, Inc.,
00017 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00018 # http://www.gnu.org/copyleft/gpl.html
00019
00028 if( isset( $_SERVER['argv'] ) && in_array( '--icu', $_SERVER['argv'] ) ) {
00029 dl( 'php_utfnormal.so' );
00030 }
00031
00032 require_once 'UtfNormalUtil.php';
00033 require_once 'UtfNormal.php';
00034
00035 define( 'BENCH_CYCLES', 5 );
00036
00037 if( php_sapi_name() != 'cli' ) {
00038 die( "Run me from the command line please.\n" );
00039 }
00040
00041 $testfiles = array(
00042 'testdata/washington.txt' => 'English text',
00043 'testdata/berlin.txt' => 'German text',
00044 'testdata/bulgakov.txt' => 'Russian text',
00045 'testdata/tokyo.txt' => 'Japanese text',
00046 'testdata/young.txt' => 'Korean text'
00047 );
00048 $normalizer = new UtfNormal;
00049 UtfNormal::loadData();
00050 foreach( $testfiles as $file => $desc ) {
00051 benchmarkTest( $normalizer, $file, $desc );
00052 }
00053
00054 # -------
00055
00056 function benchmarkTest( &$u, $filename, $desc ) {
00057 print "Testing $filename ($desc)...\n";
00058 $data = file_get_contents( $filename );
00059 $forms = array(
00060 # 'placebo',
00061 'cleanUp',
00062 'toNFC',
00063 # 'toNFKC',
00064 # 'toNFD', 'toNFKD',
00065 'NFC',
00066 # 'NFKC',
00067 # 'NFD', 'NFKD',
00068 array( 'fastDecompose', 'fastCombiningSort', 'fastCompose' ),
00069 # 'quickIsNFC', 'quickIsNFCVerify',
00070 );
00071 foreach( $forms as $form ) {
00072 if( is_array( $form ) ) {
00073 $str = $data;
00074 foreach( $form as $step ) {
00075 $str = benchmarkForm( $u, $str, $step );
00076 }
00077 } else {
00078 benchmarkForm( $u, $data, $form );
00079 }
00080 }
00081 }
00082
00083 function benchTime(){
00084 $st = explode( ' ', microtime() );
00085 return (float)$st[0] + (float)$st[1];
00086 }
00087
00088 function benchmarkForm( &$u, &$data, $form ) {
00089 global $utfCanonicalDecomp;
00090 #$start = benchTime();
00091 for( $i = 0; $i < BENCH_CYCLES; $i++ ) {
00092 $start = benchTime();
00093 $out = $u->$form( $data, $utfCanonicalDecomp );
00094 $deltas[] = (benchTime() - $start);
00095 }
00096 #$delta = (benchTime() - $start) / BENCH_CYCLES;
00097 sort( $deltas );
00098 $delta = $deltas[0]; # Take shortest time
00099
00100 $rate = intval( strlen( $data ) / $delta );
00101 $same = (0 == strcmp( $data, $out ) );
00102
00103 printf( " %20s %6.1fms %12s bytes/s (%s)\n",
00104 $form,
00105 $delta*1000.0,
00106 number_format( $rate ),
00107 ($same ? 'no change' : 'changed' ) );
00108 return $out;
00109 }