00001 <?php
00002
00021 class IEContentAnalyzer {
00025 protected $baseTypeTable = array(
00026 'ambiguous' => array(
00027 'text/plain',
00028 'application/octet-stream',
00029 'application/x-netcdf',
00030 ),
00031 'text' => array(
00032 'text/richtext', 'image/x-bitmap', 'application/postscript', 'application/base64',
00033 'application/macbinhex40', 'application/x-cdf', 'text/scriptlet'
00034 ),
00035 'binary' => array(
00036 'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif',
00037 'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp',
00038 'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi',
00039 'video/x-msvideo', 'video/mpeg', 'application/x-compressed',
00040 'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java',
00041 'application/x-msdownload'
00042 ),
00043 'html' => array( 'text/html' ),
00044 );
00045
00049 protected $addedTypes = array(
00050 'ie07' => array(
00051 'text' => array( 'text/xml', 'application/xml' )
00052 ),
00053 );
00054
00061 protected $registry = array(
00062 '.323' => 'text/h323',
00063 '.3g2' => 'video/3gpp2',
00064 '.3gp' => 'video/3gpp',
00065 '.3gp2' => 'video/3gpp2',
00066 '.3gpp' => 'video/3gpp',
00067 '.aac' => 'audio/aac',
00068 '.ac3' => 'audio/ac3',
00069 '.accda' => 'application/msaccess',
00070 '.accdb' => 'application/msaccess',
00071 '.accdc' => 'application/msaccess',
00072 '.accde' => 'application/msaccess',
00073 '.accdr' => 'application/msaccess',
00074 '.accdt' => 'application/msaccess',
00075 '.ade' => 'application/msaccess',
00076 '.adp' => 'application/msaccess',
00077 '.adts' => 'audio/aac',
00078 '.ai' => 'application/postscript',
00079 '.aif' => 'audio/aiff',
00080 '.aifc' => 'audio/aiff',
00081 '.aiff' => 'audio/aiff',
00082 '.amc' => 'application/x-mpeg',
00083 '.application' => 'application/x-ms-application',
00084 '.asf' => 'video/x-ms-asf',
00085 '.asx' => 'video/x-ms-asf',
00086 '.au' => 'audio/basic',
00087 '.avi' => 'video/avi',
00088 '.bmp' => 'image/bmp',
00089 '.caf' => 'audio/x-caf',
00090 '.cat' => 'application/vnd.ms-pki.seccat',
00091 '.cbo' => 'application/sha',
00092 '.cdda' => 'audio/aiff',
00093 '.cer' => 'application/x-x509-ca-cert',
00094 '.conf' => 'text/plain',
00095 '.crl' => 'application/pkix-crl',
00096 '.crt' => 'application/x-x509-ca-cert',
00097 '.css' => 'text/css',
00098 '.csv' => 'application/vnd.ms-excel',
00099 '.der' => 'application/x-x509-ca-cert',
00100 '.dib' => 'image/bmp',
00101 '.dif' => 'video/x-dv',
00102 '.dll' => 'application/x-msdownload',
00103 '.doc' => 'application/msword',
00104 '.docm' => 'application/vnd.ms-word.document.macroEnabled.12',
00105 '.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
00106 '.dot' => 'application/msword',
00107 '.dotm' => 'application/vnd.ms-word.template.macroEnabled.12',
00108 '.dotx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
00109 '.dv' => 'video/x-dv',
00110 '.dwfx' => 'model/vnd.dwfx+xps',
00111 '.edn' => 'application/vnd.adobe.edn',
00112 '.eml' => 'message/rfc822',
00113 '.eps' => 'application/postscript',
00114 '.etd' => 'application/x-ebx',
00115 '.exe' => 'application/x-msdownload',
00116 '.fdf' => 'application/vnd.fdf',
00117 '.fif' => 'application/fractals',
00118 '.gif' => 'image/gif',
00119 '.gsm' => 'audio/x-gsm',
00120 '.hqx' => 'application/mac-binhex40',
00121 '.hta' => 'application/hta',
00122 '.htc' => 'text/x-component',
00123 '.htm' => 'text/html',
00124 '.html' => 'text/html',
00125 '.htt' => 'text/webviewhtml',
00126 '.hxa' => 'application/xml',
00127 '.hxc' => 'application/xml',
00128 '.hxd' => 'application/octet-stream',
00129 '.hxe' => 'application/xml',
00130 '.hxf' => 'application/xml',
00131 '.hxh' => 'application/octet-stream',
00132 '.hxi' => 'application/octet-stream',
00133 '.hxk' => 'application/xml',
00134 '.hxq' => 'application/octet-stream',
00135 '.hxr' => 'application/octet-stream',
00136 '.hxs' => 'application/octet-stream',
00137 '.hxt' => 'application/xml',
00138 '.hxv' => 'application/xml',
00139 '.hxw' => 'application/octet-stream',
00140 '.ico' => 'image/x-icon',
00141 '.iii' => 'application/x-iphone',
00142 '.ins' => 'application/x-internet-signup',
00143 '.iqy' => 'text/x-ms-iqy',
00144 '.isp' => 'application/x-internet-signup',
00145 '.jfif' => 'image/jpeg',
00146 '.jnlp' => 'application/x-java-jnlp-file',
00147 '.jpe' => 'image/jpeg',
00148 '.jpeg' => 'image/jpeg',
00149 '.jpg' => 'image/jpeg',
00150 '.jtx' => 'application/x-jtx+xps',
00151 '.latex' => 'application/x-latex',
00152 '.log' => 'text/plain',
00153 '.m1v' => 'video/mpeg',
00154 '.m2v' => 'video/mpeg',
00155 '.m3u' => 'audio/x-mpegurl',
00156 '.mac' => 'image/x-macpaint',
00157 '.man' => 'application/x-troff-man',
00158 '.mda' => 'application/msaccess',
00159 '.mdb' => 'application/msaccess',
00160 '.mde' => 'application/msaccess',
00161 '.mfp' => 'application/x-shockwave-flash',
00162 '.mht' => 'message/rfc822',
00163 '.mhtml' => 'message/rfc822',
00164 '.mid' => 'audio/mid',
00165 '.midi' => 'audio/mid',
00166 '.mod' => 'video/mpeg',
00167 '.mov' => 'video/quicktime',
00168 '.mp2' => 'video/mpeg',
00169 '.mp2v' => 'video/mpeg',
00170 '.mp3' => 'audio/mpeg',
00171 '.mp4' => 'video/mp4',
00172 '.mpa' => 'video/mpeg',
00173 '.mpe' => 'video/mpeg',
00174 '.mpeg' => 'video/mpeg',
00175 '.mpf' => 'application/vnd.ms-mediapackage',
00176 '.mpg' => 'video/mpeg',
00177 '.mpv2' => 'video/mpeg',
00178 '.mqv' => 'video/quicktime',
00179 '.NMW' => 'application/nmwb',
00180 '.nws' => 'message/rfc822',
00181 '.odc' => 'text/x-ms-odc',
00182 '.ols' => 'application/vnd.ms-publisher',
00183 '.p10' => 'application/pkcs10',
00184 '.p12' => 'application/x-pkcs12',
00185 '.p7b' => 'application/x-pkcs7-certificates',
00186 '.p7c' => 'application/pkcs7-mime',
00187 '.p7m' => 'application/pkcs7-mime',
00188 '.p7r' => 'application/x-pkcs7-certreqresp',
00189 '.p7s' => 'application/pkcs7-signature',
00190 '.pct' => 'image/pict',
00191 '.pdf' => 'application/pdf',
00192 '.pdx' => 'application/vnd.adobe.pdx',
00193 '.pfx' => 'application/x-pkcs12',
00194 '.pic' => 'image/pict',
00195 '.pict' => 'image/pict',
00196 '.pinstall' => 'application/x-picasa-detect',
00197 '.pko' => 'application/vnd.ms-pki.pko',
00198 '.png' => 'image/png',
00199 '.pnt' => 'image/x-macpaint',
00200 '.pntg' => 'image/x-macpaint',
00201 '.pot' => 'application/vnd.ms-powerpoint',
00202 '.potm' => 'application/vnd.ms-powerpoint.template.macroEnabled.12',
00203 '.potx' => 'application/vnd.openxmlformats-officedocument.presentationml.template',
00204 '.ppa' => 'application/vnd.ms-powerpoint',
00205 '.ppam' => 'application/vnd.ms-powerpoint.addin.macroEnabled.12',
00206 '.pps' => 'application/vnd.ms-powerpoint',
00207 '.ppsm' => 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12',
00208 '.ppsx' => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
00209 '.ppt' => 'application/vnd.ms-powerpoint',
00210 '.pptm' => 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
00211 '.pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
00212 '.prf' => 'application/pics-rules',
00213 '.ps' => 'application/postscript',
00214 '.pub' => 'application/vnd.ms-publisher',
00215 '.pwz' => 'application/vnd.ms-powerpoint',
00216 '.py' => 'text/plain',
00217 '.pyw' => 'text/plain',
00218 '.qht' => 'text/x-html-insertion',
00219 '.qhtm' => 'text/x-html-insertion',
00220 '.qt' => 'video/quicktime',
00221 '.qti' => 'image/x-quicktime',
00222 '.qtif' => 'image/x-quicktime',
00223 '.qtl' => 'application/x-quicktimeplayer',
00224 '.rat' => 'application/rat-file',
00225 '.rmf' => 'application/vnd.adobe.rmf',
00226 '.rmi' => 'audio/mid',
00227 '.rqy' => 'text/x-ms-rqy',
00228 '.rtf' => 'application/msword',
00229 '.sct' => 'text/scriptlet',
00230 '.sd2' => 'audio/x-sd2',
00231 '.sdp' => 'application/sdp',
00232 '.shtml' => 'text/html',
00233 '.sit' => 'application/x-stuffit',
00234 '.sldm' => 'application/vnd.ms-powerpoint.slide.macroEnabled.12',
00235 '.sldx' => 'application/vnd.openxmlformats-officedocument.presentationml.slide',
00236 '.slk' => 'application/vnd.ms-excel',
00237 '.snd' => 'audio/basic',
00238 '.so' => 'application/x-apachemodule',
00239 '.sol' => 'text/plain',
00240 '.sor' => 'text/plain',
00241 '.spc' => 'application/x-pkcs7-certificates',
00242 '.spl' => 'application/futuresplash',
00243 '.sst' => 'application/vnd.ms-pki.certstore',
00244 '.stl' => 'application/vnd.ms-pki.stl',
00245 '.swf' => 'application/x-shockwave-flash',
00246 '.thmx' => 'application/vnd.ms-officetheme',
00247 '.tif' => 'image/tiff',
00248 '.tiff' => 'image/tiff',
00249 '.txt' => 'text/plain',
00250 '.uls' => 'text/iuls',
00251 '.vcf' => 'text/x-vcard',
00252 '.vdx' => 'application/vnd.ms-visio.viewer',
00253 '.vsd' => 'application/vnd.ms-visio.viewer',
00254 '.vss' => 'application/vnd.ms-visio.viewer',
00255 '.vst' => 'application/vnd.ms-visio.viewer',
00256 '.vsx' => 'application/vnd.ms-visio.viewer',
00257 '.vtx' => 'application/vnd.ms-visio.viewer',
00258 '.wav' => 'audio/wav',
00259 '.wax' => 'audio/x-ms-wax',
00260 '.wbk' => 'application/msword',
00261 '.wdp' => 'image/vnd.ms-photo',
00262 '.wiz' => 'application/msword',
00263 '.wm' => 'video/x-ms-wm',
00264 '.wma' => 'audio/x-ms-wma',
00265 '.wmd' => 'application/x-ms-wmd',
00266 '.wmv' => 'video/x-ms-wmv',
00267 '.wmx' => 'video/x-ms-wmx',
00268 '.wmz' => 'application/x-ms-wmz',
00269 '.wpl' => 'application/vnd.ms-wpl',
00270 '.wsc' => 'text/scriptlet',
00271 '.wvx' => 'video/x-ms-wvx',
00272 '.xaml' => 'application/xaml+xml',
00273 '.xbap' => 'application/x-ms-xbap',
00274 '.xdp' => 'application/vnd.adobe.xdp+xml',
00275 '.xfdf' => 'application/vnd.adobe.xfdf',
00276 '.xht' => 'application/xhtml+xml',
00277 '.xhtml' => 'application/xhtml+xml',
00278 '.xla' => 'application/vnd.ms-excel',
00279 '.xlam' => 'application/vnd.ms-excel.addin.macroEnabled.12',
00280 '.xlk' => 'application/vnd.ms-excel',
00281 '.xll' => 'application/vnd.ms-excel',
00282 '.xlm' => 'application/vnd.ms-excel',
00283 '.xls' => 'application/vnd.ms-excel',
00284 '.xlsb' => 'application/vnd.ms-excel.sheet.binary.macroEnabled.12',
00285 '.xlsm' => 'application/vnd.ms-excel.sheet.macroEnabled.12',
00286 '.xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
00287 '.xlt' => 'application/vnd.ms-excel',
00288 '.xltm' => 'application/vnd.ms-excel.template.macroEnabled.12',
00289 '.xltx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
00290 '.xlw' => 'application/vnd.ms-excel',
00291 '.xml' => 'text/xml',
00292 '.xps' => 'application/vnd.ms-xpsdocument',
00293 '.xsl' => 'text/xml',
00294 );
00295
00301 protected $versions = array( 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' );
00302
00306 protected $typeTable = array();
00307
00309 function __construct() {
00310
00311 $types = $this->baseTypeTable;
00312 foreach ( $this->versions as $version ) {
00313 if ( isset( $this->addedTypes[$version] ) ) {
00314 foreach ( $this->addedTypes[$version] as $format => $addedTypes ) {
00315 $types[$format] = array_merge( $types[$format], $addedTypes );
00316 }
00317 }
00318 $this->typeTable[$version] = $types;
00319 }
00320 }
00321
00332 public function getRealMimesFromData( $fileName, $chunk, $proposed ) {
00333 $types = $this->getMimesFromData( $fileName, $chunk, $proposed );
00334 $types = array_map( array( $this, 'translateMimeType' ), $types );
00335 return $types;
00336 }
00337
00342 public function translateMimeType( $type ) {
00343 static $table = array(
00344 'image/pjpeg' => 'image/jpeg',
00345 'image/x-png' => 'image/png',
00346 'image/x-wmf' => 'application/x-msmetafile',
00347 'image/bmp' => 'image/x-bmp',
00348 'application/x-zip-compressed' => 'application/zip',
00349 'application/x-compressed' => 'application/x-compress',
00350 'application/x-gzip-compressed' => 'application/x-gzip',
00351 'audio/mid' => 'audio/midi',
00352 );
00353 if ( isset( $table[$type] ) ) {
00354 $type = $table[$type];
00355 }
00356 return $type;
00357 }
00358
00368 public function getMimesFromData( $fileName, $chunk, $proposed ) {
00369 $types = array();
00370 foreach ( $this->versions as $version ) {
00371 $types[$version] = $this->getMimeTypeForVersion( $version, $fileName, $chunk, $proposed );
00372 }
00373 return $types;
00374 }
00375
00379 protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) {
00380
00381 $semiPos = strpos( $proposed, ';' );
00382 if ( $semiPos !== false ) {
00383 $proposed = substr( $proposed, 0, $semiPos );
00384 }
00385
00386 $proposedFormat = $this->getDataFormat( $version, $proposed );
00387 if ( $proposedFormat == 'unknown'
00388 && $proposed != 'multipart/mixed'
00389 && $proposed != 'multipart/x-mixed-replace' )
00390 {
00391 return $proposed;
00392 }
00393 if ( strval( $chunk ) === '' ) {
00394 return $proposed;
00395 }
00396
00397
00398 $chunk = substr( $chunk, 0, 255 );
00399
00400
00401
00402
00403 $result = $this->sampleData( $version, $chunk );
00404 $sampleFound = $result['found'];
00405 $counters = $result['counters'];
00406 $binaryType = $this->checkBinaryHeaders( $version, $chunk );
00407 $textType = $this->checkTextHeaders( $version, $chunk );
00408
00409 if ( $proposed == 'text/html' && isset( $sampleFound['html'] ) ) {
00410 return 'text/html';
00411 }
00412 if ( $proposed == 'image/gif' && $binaryType == 'image/gif' ) {
00413 return 'image/gif';
00414 }
00415 if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' )
00416 && $binaryType == 'image/pjpeg' )
00417 {
00418 return $proposed;
00419 }
00420
00421 if ( $version >= 'ie07'
00422 && ( $proposed == 'image/x-png' || $proposed == 'image/png' )
00423 && $binaryType == 'image/x-png' )
00424 {
00425 return $proposed;
00426 }
00427
00428
00429 if ( isset( $sampleFound['cdf'] ) ) {
00430 return 'application/x-cdf';
00431 }
00432
00433
00434
00435 if ( isset( $sampleFound['rss'] ) ) {
00436 return 'application/rss+xml';
00437 }
00438 if ( isset( $sampleFound['rdf-tag'] )
00439 && isset( $sampleFound['rdf-url'] )
00440 && isset( $sampleFound['rdf-purl'] ) )
00441 {
00442 return 'application/rss+xml';
00443 }
00444 if ( isset( $sampleFound['atom'] ) ) {
00445 return 'application/atom+xml';
00446 }
00447
00448 if ( isset( $sampleFound['xml'] ) ) {
00449
00450 if ( strpos( $version, 'strict' ) !== false ) {
00451 if ( $proposed == 'text/html' || $proposed == 'text/xml' ) {
00452 return 'text/xml';
00453 }
00454 } else {
00455 return 'text/xml';
00456 }
00457 }
00458 if ( isset( $sampleFound['html'] ) ) {
00459
00460 if ( strpos( $version, 'nohtml' ) !== false ) {
00461 if ( $proposed == 'text/plain' ) {
00462 return 'text/html';
00463 }
00464 } else {
00465 return 'text/html';
00466 }
00467 }
00468 if ( isset( $sampleFound['xbm'] ) ) {
00469 return 'image/x-bitmap';
00470 }
00471 if ( isset( $sampleFound['binhex'] ) ) {
00472 return 'application/macbinhex40';
00473 }
00474 if ( isset( $sampleFound['scriptlet'] ) ) {
00475 if ( strpos( $version, 'strict' ) !== false ) {
00476 if ( $proposed == 'text/plain' || $proposed == 'text/scriptlet' ) {
00477 return 'text/scriptlet';
00478 }
00479 } else {
00480 return 'text/scriptlet';
00481 }
00482 }
00483
00484
00485
00486 if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] )
00487 < ( $counters['ctrl'] + $counters['high'] ) * 16 )
00488 {
00489 $kindOfBinary = true;
00490 $type = $binaryType ? $binaryType : $textType;
00491 if ( $type === false ) {
00492 $type = 'application/octet-stream';
00493 }
00494 } else {
00495 $kindOfBinary = false;
00496 $type = $textType ? $textType : $binaryType;
00497 if ( $type === false ) {
00498 $type = 'text/plain';
00499 }
00500 }
00501
00502
00503
00504 $detectedFormat = $this->getDataFormat( $version, $type );
00505 if ( $detectedFormat != 'ambiguous' ) {
00506 return $type;
00507 }
00508
00509 if ( $proposedFormat != 'ambiguous' ) {
00510
00511 if ( $proposedFormat == 'text' && !$kindOfBinary ) {
00512 return $proposed;
00513 }
00514 if ( $proposedFormat == 'binary' && $kindOfBinary ) {
00515 return $proposed;
00516 }
00517 if ( $proposedFormat == 'html' ) {
00518 return $proposed;
00519 }
00520 }
00521
00522
00523 $dotPos = strrpos( $fileName, '.' );
00524 if ( $dotPos === false ) {
00525 return $type;
00526 }
00527 $ext = substr( $fileName, $dotPos );
00528 if ( isset( $this->registry[$ext] ) ) {
00529 return $this->registry[$ext];
00530 }
00531
00532
00533
00534
00535
00536
00537 return $type;
00538 }
00539
00544 private function checkTextHeaders( $version, $chunk ) {
00545 $chunk2 = substr( $chunk, 0, 2 );
00546 $chunk4 = substr( $chunk, 0, 4 );
00547 $chunk5 = substr( $chunk, 0, 5 );
00548 if ( $chunk4 == '%PDF' ) {
00549 return 'application/pdf';
00550 }
00551 if ( $chunk2 == '%!' ) {
00552 return 'application/postscript';
00553 }
00554 if ( $chunk5 == '{\\rtf' ) {
00555 return 'text/richtext';
00556 }
00557 if ( $chunk5 == 'begin' ) {
00558 return 'application/base64';
00559 }
00560 return false;
00561 }
00562
00567 private function checkBinaryHeaders( $version, $chunk ) {
00568 $chunk2 = substr( $chunk, 0, 2 );
00569 $chunk3 = substr( $chunk, 0, 3 );
00570 $chunk4 = substr( $chunk, 0, 4 );
00571 $chunk5 = substr( $chunk, 0, 5 );
00572 $chunk5uc = strtoupper( $chunk5 );
00573 $chunk8 = substr( $chunk, 0, 8 );
00574 if ( $chunk5uc == 'GIF87' || $chunk5uc == 'GIF89' ) {
00575 return 'image/gif';
00576 }
00577 if ( $chunk2 == "\xff\xd8" ) {
00578 return 'image/pjpeg';
00579 }
00580
00581 if ( $chunk2 == 'BM'
00582 && substr( $chunk, 6, 2 ) == "\000\000"
00583 && substr( $chunk, 8, 2 ) == "\000\000" )
00584 {
00585 return 'image/bmp';
00586 }
00587 if ( $chunk4 == 'RIFF'
00588 && substr( $chunk, 8, 4 ) == 'WAVE' )
00589 {
00590 return 'audio/wav';
00591 }
00592
00593
00594 if ( $chunk4 == ".sd\000"
00595 || $chunk4 == ".snd"
00596 || $chunk4 == "\000ds."
00597 || $chunk4 == "dns." )
00598 {
00599 return 'audio/basic';
00600 }
00601 if ( $chunk3 == "MM\000" ) {
00602 return 'image/tiff';
00603 }
00604 if ( $chunk2 == 'MZ' ) {
00605 return 'application/x-msdownload';
00606 }
00607 if ( $chunk8 == "\x89PNG\x0d\x0a\x1a\x0a" ) {
00608 return 'image/x-png';
00609 }
00610 if ( strlen( $chunk ) >= 5 ) {
00611 $byte2 = ord( $chunk[2] );
00612 $byte4 = ord( $chunk[4] );
00613 if ( $byte2 >= 3 && $byte2 <= 31 && $byte4 == 0 && $chunk2 == 'JG' ) {
00614 return 'image/x-jg';
00615 }
00616 }
00617
00618 if ( $chunk4 == 'MROF' ) {
00619 return 'audio/x-aiff';
00620 }
00621 $chunk4_8 = substr( $chunk, 8, 4 );
00622 if ( $chunk4 == 'FORM' && ( $chunk4_8 == 'AIFF' || $chunk4_8 == 'AIFC' ) ) {
00623 return 'audio/x-aiff';
00624 }
00625 if ( $chunk4 == 'RIFF' && $chunk4_8 == 'AVI ' ) {
00626 return 'video/avi';
00627 }
00628 if ( $chunk4 == "\x00\x00\x01\xb3" || $chunk4 == "\x00\x00\x01\xba" ) {
00629 return 'video/mpeg';
00630 }
00631 if ( $chunk4 == "\001\000\000\000"
00632 && substr( $chunk, 40, 4 ) == ' EMF' )
00633 {
00634 return 'image/x-emf';
00635 }
00636 if ( $chunk4 == "\xd7\xcd\xc6\x9a" ) {
00637 return 'image/x-wmf';
00638 }
00639 if ( $chunk4 == "\xca\xfe\xba\xbe" ) {
00640 return 'application/java';
00641 }
00642 if ( $chunk2 == 'PK' ) {
00643 return 'application/x-zip-compressed';
00644 }
00645 if ( $chunk2 == "\x1f\x9d" ) {
00646 return 'application/x-compressed';
00647 }
00648 if ( $chunk2 == "\x1f\x8b" ) {
00649 return 'application/x-gzip-compressed';
00650 }
00651
00652 if ( $chunk5 == "MThd\000" ) {
00653 return 'audio/mid';
00654 }
00655 if ( $chunk4 == '%PDF' ) {
00656 return 'application/pdf';
00657 }
00658 return false;
00659 }
00660
00665 protected function sampleData( $version, $chunk ) {
00666 $found = array();
00667 $counters = array(
00668 'ctrl' => 0,
00669 'high' => 0,
00670 'low' => 0,
00671 'lf' => 0,
00672 'cr' => 0,
00673 'ff' => 0
00674 );
00675 $htmlTags = array(
00676 'html',
00677 'head',
00678 'title',
00679 'body',
00680 'script',
00681 'a href',
00682 'pre',
00683 'img',
00684 'plaintext',
00685 'table'
00686 );
00687 $rdfUrl = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
00688 $rdfPurl = 'http://purl.org/rss/1.0/';
00689 $xbmMagic1 = '#define';
00690 $xbmMagic2 = '_width';
00691 $xbmMagic3 = '_bits';
00692 $binhexMagic = 'converted with BinHex';
00693
00694 for ( $offset = 0; $offset < strlen( $chunk ); $offset++ ) {
00695 $curChar = $chunk[$offset];
00696 if ( $curChar == "\x0a" ) {
00697 $counters['lf']++;
00698 continue;
00699 } elseif ( $curChar == "\x0d" ) {
00700 $counters['cr']++;
00701 continue;
00702 } elseif ( $curChar == "\x0c" ) {
00703 $counters['ff']++;
00704 continue;
00705 } elseif ( $curChar == "\t" ) {
00706 $counters['low']++;
00707 continue;
00708 } elseif ( ord( $curChar ) < 32 ) {
00709 $counters['ctrl']++;
00710 continue;
00711 } elseif ( ord( $curChar ) >= 128 ) {
00712 $counters['high']++;
00713 continue;
00714 }
00715
00716 $counters['low']++;
00717 if ( $curChar == '<' ) {
00718
00719 $remainder = substr( $chunk, $offset + 1 );
00720 if ( !strncasecmp( $remainder, '?XML', 4 ) ) {
00721 $nextChar = substr( $chunk, $offset + 5, 1 );
00722 if ( $nextChar == ':' || $nextChar == ' ' || $nextChar == "\t" ) {
00723 $found['xml'] = true;
00724 }
00725 }
00726
00727 if ( !strncasecmp( $remainder, 'SCRIPTLET', 9 ) ) {
00728 $found['scriptlet'] = true;
00729 break;
00730 }
00731
00732 foreach ( $htmlTags as $tag ) {
00733 if ( !strncasecmp( $remainder, $tag, strlen( $tag ) ) ) {
00734 $found['html'] = true;
00735 }
00736 }
00737
00738
00739
00740 if ( $version < 'ie07' ) {
00741 if ( !strncasecmp( $remainder, 'CHANNEL', 7 ) ) {
00742 $found['cdf'] = true;
00743 }
00744 } else {
00745
00746 if ( !strncasecmp( $remainder, 'RSS', 3 ) ) {
00747 $found['rss'] = true;
00748 break;
00749 }
00750 if ( !strncasecmp( $remainder, 'rdf:RDF', 7 ) ) {
00751 $found['rdf-tag'] = true;
00752
00753 }
00754 if ( !strncasecmp( $remainder, 'FEED', 4 ) ) {
00755 $found['atom'] = true;
00756 break;
00757 }
00758 }
00759 continue;
00760 }
00761
00762
00763
00764
00765 $remainder = substr( $chunk, $offset );
00766 if ( !strncasecmp( $remainder, $rdfUrl, strlen( $rdfUrl ) ) ) {
00767 $found['rdf-url'] = true;
00768 if ( isset( $found['rdf-tag'] )
00769 && isset( $found['rdf-purl'] ) )
00770 {
00771 break;
00772 }
00773 continue;
00774 }
00775
00776 if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) {
00777 if ( isset( $found['rdf-tag'] )
00778 && isset( $found['rdf-url'] ) )
00779 {
00780 break;
00781 }
00782 continue;
00783 }
00784
00785
00786 if ( !strncasecmp( $remainder, $xbmMagic1, strlen( $xbmMagic1 ) ) ) {
00787 $found['xbm1'] = true;
00788 continue;
00789 }
00790 if ( $curChar == '_' ) {
00791 if ( isset( $found['xbm2'] ) ) {
00792 if ( !strncasecmp( $remainder, $xbmMagic3, strlen( $xbmMagic3 ) ) ) {
00793 $found['xbm'] = true;
00794 break;
00795 }
00796 } elseif ( isset( $found['xbm1'] ) ) {
00797 if ( !strncasecmp( $remainder, $xbmMagic2, strlen( $xbmMagic2 ) ) ) {
00798 $found['xbm2'] = true;
00799 }
00800 }
00801 }
00802
00803
00804 if ( !strncmp( $remainder, $binhexMagic, strlen( $binhexMagic ) ) ) {
00805 $found['binhex'] = true;
00806 }
00807 }
00808 return array( 'found' => $found, 'counters' => $counters );
00809 }
00810
00811 protected function getDataFormat( $version, $type ) {
00812 $types = $this->typeTable[$version];
00813 if ( $type == '(null)' || strval( $type ) === '' ) {
00814 return 'ambiguous';
00815 }
00816 foreach ( $types as $format => $list ) {
00817 if ( in_array( $type, $list ) ) {
00818 return $format;
00819 }
00820 }
00821 return 'unknown';
00822 }
00823 }
00824