00001 <?php 00002 # Copyright (C) 2004,2008 Brion Vibber <brion@pobox.com> 00003 # http://www.mediawiki.org/ 00004 # 00005 # This program is free software; you can redistribute it and/or modify 00006 # it under the terms of the GNU General Public License as published by 00007 # the Free Software Foundation; either version 2 of the License, or 00008 # (at your option) any later version. 00009 # 00010 # This program is distributed in the hope that it will be useful, 00011 # but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 # GNU General Public License for more details. 00014 # 00015 # You should have received a copy of the GNU General Public License along 00016 # with this program; if not, write to the Free Software Foundation, Inc., 00017 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00018 # http://www.gnu.org/copyleft/gpl.html 00019 00030 if( php_sapi_name() != 'cli' ) { 00031 die( "Run me from the command line please.\n" ); 00032 } 00033 00034 require_once 'UtfNormalUtil.php'; 00035 00036 $in = fopen("UnicodeData.txt", "rt" ); 00037 if( !$in ) { 00038 print "Can't open UnicodeData.txt for reading.\n"; 00039 print "If necessary, fetch this file from the internet:\n"; 00040 print "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n"; 00041 exit(-1); 00042 } 00043 $wikiUpperChars = array(); 00044 $wikiLowerChars = array(); 00045 00046 print "Reading character definitions...\n"; 00047 while( false !== ($line = fgets( $in ) ) ) { 00048 $columns = split(';', $line); 00049 $codepoint = $columns[0]; 00050 $name = $columns[1]; 00051 $simpleUpper = $columns[12]; 00052 $simpleLower = $columns[13]; 00053 00054 $source = codepointToUtf8( hexdec( $codepoint ) ); 00055 if( $simpleUpper ) { 00056 $wikiUpperChars[$source] = codepointToUtf8( hexdec( $simpleUpper ) ); 00057 } 00058 if( $simpleLower ) { 00059 $wikiLowerChars[$source] = codepointToUtf8( hexdec( $simpleLower ) ); 00060 } 00061 } 00062 fclose( $in ); 00063 00064 $out = fopen("Utf8Case.php", "wt"); 00065 if( $out ) { 00066 $outUpperChars = escapeArray( $wikiUpperChars ); 00067 $outLowerChars = escapeArray( $wikiLowerChars ); 00068 $outdata = "<" . "?php 00080 /* 00081 * Translation array to get upper case character 00082 */ 00083 00084 \$wikiUpperChars = $outUpperChars; 00085 00086 /* 00087 * Translation array to get lower case character 00088 */ 00089 \$wikiLowerChars = $outLowerChars;\n"; 00090 fputs( $out, $outdata ); 00091 fclose( $out ); 00092 print "Wrote out Utf8Case.php\n"; 00093 } else { 00094 print "Can't create file Utf8Case.php\n"; 00095 exit(-1); 00096 } 00097 00098 00099 function escapeArray( $arr ) { 00100 return "array(\n" . 00101 implode( ",\n", 00102 array_map( "escapeLine", 00103 array_keys( $arr ), 00104 array_values( $arr ) ) ) . 00105 "\n)"; 00106 } 00107 00108 function escapeLine( $key, $val ) { 00109 $encKey = escapeSingleString( $key ); 00110 $encVal = escapeSingleString( $val ); 00111 return "\t'$encKey' => '$encVal'"; 00112 }