00001 <?php 00002 # Copyright (C) 2004 Brion Vibber <brion@pobox.com> 00003 # http://www.mediawiki.org/ 00004 # 00005 # This program is free software; you can redistribute it and/or modify 00006 # it under the terms of the GNU General Public License as published by 00007 # the Free Software Foundation; either version 2 of the License, or 00008 # (at your option) any later version. 00009 # 00010 # This program is distributed in the hope that it will be useful, 00011 # but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 # GNU General Public License for more details. 00014 # 00015 # You should have received a copy of the GNU General Public License along 00016 # with this program; if not, write to the Free Software Foundation, Inc., 00017 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00018 # http://www.gnu.org/copyleft/gpl.html 00019 00027 $verbose = true; 00028 #define( 'PRETTY_UTF8', true ); 00029 00030 if( defined( 'PRETTY_UTF8' ) ) { 00031 function pretty( $string ) { 00032 return preg_replace( '/([\x00-\xff])/e', 00033 'sprintf("%02X", ord("$1"))', 00034 $string ); 00035 } 00036 } else { 00040 function pretty( $string ) { 00041 return trim( preg_replace( '/(.)/use', 00042 'sprintf("%04X ", utf8ToCodepoint("$1"))', 00043 $string ) ); 00044 } 00045 } 00046 00047 if( isset( $_SERVER['argv'] ) && in_array( '--icu', $_SERVER['argv'] ) ) { 00048 dl( 'php_utfnormal.so' ); 00049 } 00050 00051 require_once 'UtfNormalUtil.php'; 00052 require_once 'UtfNormal.php'; 00053 00054 if( php_sapi_name() != 'cli' ) { 00055 die( "Run me from the command line please.\n" ); 00056 } 00057 00058 $in = fopen("NormalizationTest.txt", "rt"); 00059 if( !$in ) { 00060 print "Couldn't open NormalizationTest.txt -- can't run tests.\n"; 00061 print "If necessary, manually download this file. It can be obtained at\n"; 00062 print "http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt"; 00063 exit(-1); 00064 } 00065 00066 $normalizer = new UtfNormal; 00067 00068 $total = 0; 00069 $success = 0; 00070 $failure = 0; 00071 $ok = true; 00072 $testedChars = array(); 00073 while( false !== ( $line = fgets( $in ) ) ) { 00074 list( $data, $comment ) = explode( '#', $line ); 00075 if( $data === '' ) continue; 00076 $matches = array(); 00077 if( preg_match( '/@Part([\d])/', $data, $matches ) ) { 00078 if( $matches[1] > 0 ) { 00079 $ok = reportResults( $total, $success, $failure ) && $ok; 00080 } 00081 print "Part {$matches[1]}: $comment"; 00082 continue; 00083 } 00084 00085 $columns = array_map( "hexSequenceToUtf8", explode( ";", $data ) ); 00086 array_unshift( $columns, '' ); 00087 00088 $testedChars[$columns[1]] = true; 00089 $total++; 00090 if( testNormals( $normalizer, $columns, $comment ) ) { 00091 $success++; 00092 } else { 00093 $failure++; 00094 # print "FAILED: $comment"; 00095 } 00096 if( $total % 100 == 0 ) print "$total "; 00097 } 00098 fclose( $in ); 00099 00100 $ok = reportResults( $total, $success, $failure ) && $ok; 00101 00102 $in = fopen("UnicodeData.txt", "rt" ); 00103 if( !$in ) { 00104 print "Can't open UnicodeData.txt for reading.\n"; 00105 print "If necessary, fetch this file from the internet:\n"; 00106 print "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n"; 00107 exit(-1); 00108 } 00109 print "Now testing invariants...\n"; 00110 while( false !== ($line = fgets( $in ) ) ) { 00111 $cols = explode( ';', $line ); 00112 $char = codepointToUtf8( hexdec( $cols[0] ) ); 00113 $desc = $cols[0] . ": " . $cols[1]; 00114 if( $char < "\x20" || $char >= UTF8_SURROGATE_FIRST && $char <= UTF8_SURROGATE_LAST ) { 00115 # Can't check NULL with the ICU plugin, as null bytes fail in C land. 00116 # Skip other control characters, as we strip them for XML safety. 00117 # Surrogates are illegal on their own or in UTF-8, ignore. 00118 continue; 00119 } 00120 if( empty( $testedChars[$char] ) ) { 00121 $total++; 00122 if( testInvariant( $normalizer, $char, $desc ) ) { 00123 $success++; 00124 } else { 00125 $failure++; 00126 } 00127 if( $total % 100 == 0 ) print "$total "; 00128 } 00129 } 00130 fclose( $in ); 00131 00132 $ok = reportResults( $total, $success, $failure ) && $ok; 00133 00134 if( $ok ) { 00135 print "TEST SUCCEEDED!\n"; 00136 exit(0); 00137 } else { 00138 print "TEST FAILED!\n"; 00139 exit(-1); 00140 } 00141 00142 ## ------ 00143 00144 function reportResults( &$total, &$success, &$failure ) { 00145 $percSucc = intval( $success * 100 / $total ); 00146 $percFail = intval( $failure * 100 / $total ); 00147 print "\n"; 00148 print "$success tests successful ($percSucc%)\n"; 00149 print "$failure tests failed ($percFail%)\n\n"; 00150 $ok = ($success > 0 && $failure == 0); 00151 $total = 0; 00152 $success = 0; 00153 $failure = 0; 00154 return $ok; 00155 } 00156 00157 function testNormals( &$u, $c, $comment, $reportFailure = false ) { 00158 $result = testNFC( $u, $c, $comment, $reportFailure ); 00159 $result = testNFD( $u, $c, $comment, $reportFailure ) && $result; 00160 $result = testNFKC( $u, $c, $comment, $reportFailure ) && $result; 00161 $result = testNFKD( $u, $c, $comment, $reportFailure ) && $result; 00162 $result = testCleanUp( $u, $c, $comment, $reportFailure ) && $result; 00163 00164 global $verbose; 00165 if( $verbose && !$result && !$reportFailure ) { 00166 print $comment; 00167 testNormals( $u, $c, $comment, true ); 00168 } 00169 return $result; 00170 } 00171 00172 function verbosify( $a, $b, $col, $form, $verbose ) { 00173 #$result = ($a === $b); 00174 $result = (strcmp( $a, $b ) == 0); 00175 if( $verbose ) { 00176 $aa = pretty( $a ); 00177 $bb = pretty( $b ); 00178 $ok = $result ? "succeed" : " failed"; 00179 $eq = $result ? "==" : "!="; 00180 print " $ok $form c$col '$aa' $eq '$bb'\n"; 00181 } 00182 return $result; 00183 } 00184 00185 function testNFC( &$u, $c, $comment, $verbose ) { 00186 $result = verbosify( $c[2], $u->toNFC( $c[1] ), 1, 'NFC', $verbose ); 00187 $result = verbosify( $c[2], $u->toNFC( $c[2] ), 2, 'NFC', $verbose ) && $result; 00188 $result = verbosify( $c[2], $u->toNFC( $c[3] ), 3, 'NFC', $verbose ) && $result; 00189 $result = verbosify( $c[4], $u->toNFC( $c[4] ), 4, 'NFC', $verbose ) && $result; 00190 $result = verbosify( $c[4], $u->toNFC( $c[5] ), 5, 'NFC', $verbose ) && $result; 00191 return $result; 00192 } 00193 00194 function testCleanUp( &$u, $c, $comment, $verbose ) { 00195 $x = $c[1]; 00196 $result = verbosify( $c[2], $u->cleanUp( $x ), 1, 'cleanUp', $verbose ); 00197 $x = $c[2]; 00198 $result = verbosify( $c[2], $u->cleanUp( $x ), 2, 'cleanUp', $verbose ) && $result; 00199 $x = $c[3]; 00200 $result = verbosify( $c[2], $u->cleanUp( $x ), 3, 'cleanUp', $verbose ) && $result; 00201 $x = $c[4]; 00202 $result = verbosify( $c[4], $u->cleanUp( $x ), 4, 'cleanUp', $verbose ) && $result; 00203 $x = $c[5]; 00204 $result = verbosify( $c[4], $u->cleanUp( $x ), 5, 'cleanUp', $verbose ) && $result; 00205 return $result; 00206 } 00207 00208 function testNFD( &$u, $c, $comment, $verbose ) { 00209 $result = verbosify( $c[3], $u->toNFD( $c[1] ), 1, 'NFD', $verbose ); 00210 $result = verbosify( $c[3], $u->toNFD( $c[2] ), 2, 'NFD', $verbose ) && $result; 00211 $result = verbosify( $c[3], $u->toNFD( $c[3] ), 3, 'NFD', $verbose ) && $result; 00212 $result = verbosify( $c[5], $u->toNFD( $c[4] ), 4, 'NFD', $verbose ) && $result; 00213 $result = verbosify( $c[5], $u->toNFD( $c[5] ), 5, 'NFD', $verbose ) && $result; 00214 return $result; 00215 } 00216 00217 function testNFKC( &$u, $c, $comment, $verbose ) { 00218 $result = verbosify( $c[4], $u->toNFKC( $c[1] ), 1, 'NFKC', $verbose ); 00219 $result = verbosify( $c[4], $u->toNFKC( $c[2] ), 2, 'NFKC', $verbose ) && $result; 00220 $result = verbosify( $c[4], $u->toNFKC( $c[3] ), 3, 'NFKC', $verbose ) && $result; 00221 $result = verbosify( $c[4], $u->toNFKC( $c[4] ), 4, 'NFKC', $verbose ) && $result; 00222 $result = verbosify( $c[4], $u->toNFKC( $c[5] ), 5, 'NFKC', $verbose ) && $result; 00223 return $result; 00224 } 00225 00226 function testNFKD( &$u, $c, $comment, $verbose ) { 00227 $result = verbosify( $c[5], $u->toNFKD( $c[1] ), 1, 'NFKD', $verbose ); 00228 $result = verbosify( $c[5], $u->toNFKD( $c[2] ), 2, 'NFKD', $verbose ) && $result; 00229 $result = verbosify( $c[5], $u->toNFKD( $c[3] ), 3, 'NFKD', $verbose ) && $result; 00230 $result = verbosify( $c[5], $u->toNFKD( $c[4] ), 4, 'NFKD', $verbose ) && $result; 00231 $result = verbosify( $c[5], $u->toNFKD( $c[5] ), 5, 'NFKD', $verbose ) && $result; 00232 return $result; 00233 } 00234 00235 function testInvariant( &$u, $char, $desc, $reportFailure = false ) { 00236 $result = verbosify( $char, $u->toNFC( $char ), 1, 'NFC', $reportFailure ); 00237 $result = verbosify( $char, $u->toNFD( $char ), 1, 'NFD', $reportFailure ) && $result; 00238 $result = verbosify( $char, $u->toNFKC( $char ), 1, 'NFKC', $reportFailure ) && $result; 00239 $result = verbosify( $char, $u->toNFKD( $char ), 1, 'NFKD', $reportFailure ) && $result; 00240 $result = verbosify( $char, $u->cleanUp( $char ), 1, 'cleanUp', $reportFailure ) && $result; 00241 global $verbose; 00242 if( $verbose && !$result && !$reportFailure ) { 00243 print $desc; 00244 testInvariant( $u, $char, $desc, true ); 00245 } 00246 return $result; 00247 }