php glob目录下的utf8 [英] php glob directory utf8

查看：122 发布时间：2017/11/3 19:51:43 php file glob

本文介绍了php glob目录下的utf8的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我试图让所有文件在一个可变的路径... atm我使用glob（），但我只是无法找到一种方法来打开一个路径，其中包含äüö在...
我已经尝试了很多德/编码，但似乎没有工作..
当我改变äüö在文件名和变量我得到正确的答案...所以它必须与utf8字符... ...我猜\\ b
$ b

PHP脚本

  //通过ajax获取变量
 
 $ town = $ _ POST ['town']; //例如Bankdrücken
 
 $ pfad ='maps /'。 $国家。 /。 $ town。'/ *。jpg'; 
 
 $ files = glob（$ pfad）; 
 $ n_files = count（$ files）; 
 
 $ erg = new stdClass（）; 
 $ erg-> files = $ files; 
 $ erg-> n_files = $ n_files; 
 
 echo json_encode（$ erg）; 
 return;

Javascript
< （数据）{
console.log（data）;
...
pre>

这不会返回... /Bankdrücken但是... / Bankdr\\\ücken

我做jQuery.parseJson（数据）;它返回./Bankdrücken，但是，因为这是在客户端服务器端已经失败..我只是不能正确地得到它的权利

这里是一个真正的例子...如果文件路径中的$变量包含äöü - > glob（）不会得到我任何文件...
//不，我打印出我从ajax调用...

试用一个（带ü）：
$ b $ stdClass对象
（
[files] => Array
（
）

[n_files] => ; 0 [filename] => workout_uebungen / Brust /Liegestützen/
）

/ ************************************** /
trail二（我改变了文件名从Liegestützen=> Liegestuetzen和$ var从Liegestützen=> Liegestuetzen）
$ b $ stdClass对象
（
[files] =>数组
（
[0] => workout_uebungen / Brust / Liegestuetzen / 1.jpg
[1] => workout_uebungen / Brust / Liegestuetzen / 2.jpg
）
[n_files] => 2 [文件名] => workout_uebungen / Brust / Liegestuetzen /
）

所以审判2给出了正确的答案，但我不想改变我所有的文件...以及这将是一个肮脏的解决方案给我...

<？php $ b $ b class Encoding { protected static $ win1252ToUtf8 = array（ 128 =>\xe2\x82\xac， 130 => ;\\\ xe2\x80\x9a， 131 =>\\\\\\\\ x92， 132 => 133 =>\ xe2\x80\xa6， 134 =>\ xe2\x80\xa0， 135 =>\\ \\ xe2 \x80 \xa1， 136 =>\ xcb\x86， 137 => b 138 =>\ xc5 \xa0， 139 =>\ xe2\x80\xb9， 140 =&g t;\xc5\x92， 142 =>\xc5\xbd， 145 =>\\ \\ xe2 \x80 \x98， 146 => \xe2\x80\x99， 147 => \xe2\x80\x9c， 148 => \xe2\x80\x9d， 149 => \xe2\x80\xa2， 150 => \xe2\x80\x93， 151 => \xe2\x80\x94， 152 => \xcb\x9c， 153 => \xe2\x84\xa2， 154 => \xc5\xa1， 155 => \xe2\x80\xba， 156 => \xc5\x93， 158 => \xc5\xbe， 159 => \xc5\xb8 ）; $ b $ protected static $ brokenUtf8ToUtf8 = array（ \xc2\x80=" \xe2\x82\xac， \xc2\x82=>\ xe2\x80\x9a， \xc2\x83=>\xc6\x92， \xc2\x84=>\ xe2\x80\x9e， \xc2\x85=>\ xe2\x80\xa6 ， \xc2\x86=>\xe2\x80\xa0， \xc2\x87=>\xe2\x80 \ xa1， \xc2\x88=>\xcb\x86， \xc2\x89=>\xe2\ x80 \xb0， \xc2\x8a=>\xc5\xa0， \xc2\x8b=>\xe2\\ \\ x80 \xb9， \xc2\x8c=>\xc5\x92， \xc2\x8e=> \xc5\xbd， \xc2\x91=> \xe2\x80\x98， \xc2\x92=>\xe2\x80\x99， \xc2\x93 = \\xe2 \x80 \x9c， \xc2\x94=>\ xe2\x80\x9d， \ xc2 \x95=>\xe2\x80\xa2， \xc2\x96=> \xe2\x80\x93， \xc2\x97=> \xe2\x80\x94， \xc2\x98=> \xcb\x9c， \xc2\x99=> \xe2\x84\xa2， \xc2\x9a=> \xc5\xa1， \xc2\x9b=> \xe2\x80\xba， \xc2\x9c=> \xc5\x93， \xc2\x9e=> \xc5\xbe， \xc2\x9f=> \xc5\xb8 ）; protected static $ utf8ToWin1252 = array（ \xe2\x82\xac=&; \x80， \ xe2 \x80\x9a=>\x82， \xc6\x92=>\x83， \xe2\x80\\ \\ x9e=>\x84， \xe2\x80\xa6=>\x85， \xe2\x80\xa0 =>\x86， \xe2\x80\xa1=>\x87， \xcb\x86=> \ x88， \xe2\x80\xb0=>\x89， \xc5\xa0=>\x8a， \xe2\x80\xb9=>\x8b， \xc5\x92=>\x8c， \xc5\xbd=>\x8e， \xe2\x80\x98=>\x91 ， \xe2\x80\x99=>\x92， \xe2\x80 \\ x9c=>\x93， \xe2\x80\x9d=>\x94， \xe2\x80\xa2 =>\x95， \xe2\x80\x93=>\x96， \xe2\x80\x94= >\x97， \xcb\x9c=> \x98， \xe2\x84\xa2=> \x99， \xc5\xa1=> \x9a， \xe2\x80\xba=> \x9b， \xc5\x93=> \x9c， \xc5\xbe=> \x9e， \xc5\xb8=> \x9f ）; 静态函数toUTF8（$ text）{ / ** *函数编码:: toUTF8 * *这个函数只保留UTF8字符，同时将几乎所有非UTF8转换为UTF8。 * *它假定原始字符串的编码是Windows-1252或ISO 8859-1。 * *如果这些字符中的任何一个出现这种情况，则可能无法将字符转换为UTF-8： * * 1） *之后是以下任何一个：（B组） *，£¤¥|§¨¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯° b $ b *例如：％ABREPRESENT％C9％BB。 «REPRESENTÉ» （％AB）字符将被转换，但是É后跟»（％C9％BB） *也是一个有效的Unicode字符，将保持不变。 * * 2）当其中任何一个：àáâããäæçèéêëìíîï后跟B组的两个字符， * 3），其中任何一个：ðñòó后跟B组中的三个字符。 b $ b * * @name toUTF8 * @param string $ text任何字符串。 * @return string相同的字符串，UTF8编码 * * / if（is_array（$ text）） { foreach（$ text为$ k => $ v） { $ text [$ k] = self :: toUTF8（$ v）; } 返回$ text; } elseif（is_string（$ text））{ $ max = strlen（$ text）; $ buf =; （$ i = 0; $ i <$ max; $ i ++）{ $ c1 = $ text {$ i}; if（$ c1> =\xc0）{//如果已经不是UTF8，应该转换为UTF8 $ c2 = $ i + 1> = $ max？ \x00：$ text {$ i + 1}; $ c3 = $ i + 2> = $ max？ \x00：$ text {$ i + 2}; $ c4 = $ i + 3> = $ max？ \x00：$ text {$ i + 3}; if（$ c1> =\xc0& $ c1< =\xdf）{// //看起来像2个字节UTF8 if（$ c2> =\\ \\ x80&& $ c2< =\xbf）{//是的，几乎可以肯定的是UTF8已经是 $ buf。= $ c1了。 $ C2; $ i ++; } else {//无效的UTF8。转换它。 $ cc1 =（chr（ord（$ c1）/ 64）|\xc0）; $ cc2 =（$ c1&\x3f）| \x80; $ buf。= $ cc1。 $ CC2; } } elseif（$ c1> =\xe0& $ c1< =\ xef）{//看起来像3个字节UTF8 if（$ c2> =\x80&& $ c2< =\xbf&&& $ c3> =\x80&& $ c3< =\xbf ）{//是的，几乎可以肯定它的UTF8已经是 $ buf。= $ c1。 $ c2。 $ C3; $ i = $ i + 2; } else {//无效的UTF8。转换它。 $ cc1 =（chr（ord（$ c1）/ 64）|\xc0）; $ cc2 =（$ c1&\x3f）| \x80; $ buf。= $ cc1。 $ CC2; } } elseif（$ c1> =\xf0& $ c1< =\xf7）{//看起来像4个字节UTF8 if（$ c2> =\x80&& $ c2< =\xbf&& $ c3> =\x80&& $ c3< =\xbf && $ c4> =\x80&& $ c4< =\xbf）{//是的，几乎可以肯定的是UTF8已经 $ buf。= $ c1 。 $ c2。 $ C3; $ i = $ i + 2; } else {//无效的UTF8。转换它。 $ cc1 =（chr（ord（$ c1）/ 64）|\xc0）; $ cc2 =（$ c1&\x3f）| \x80; $ buf。= $ cc1。 $ CC2; } } else {//看起来不像UTF8，但应该转换 $ cc1 =（chr（ord（$ c1）/ 64）|\xc0） ; $ cc2 =（（$ c1&\x3f）|\x80）; $ buf。= $ cc1。 $ CC2; $（b $ b）elseif（（$ c1&\xc0）==\x80）{//需要转换 if（isset（self :: $ win1252ToUtf8 [ ord（$ c1）]））{//在Windows-1252中找到特殊情况 $ buf。= self :: $ win1252ToUtf8 [ord（$ c1）]; } else { $ cc1 =（chr（ord（$ c1）/ 64）|\xc0）; $ cc2 =（（$ c1&\x3f）|\x80）; $ buf。= $ cc1。 $ CC2; } } else {//不需要convesion $ buf。= $ c1; } } return $ buf; } else { return $ text; $ b静态函数toWin1252（$ text）{ if（is_array（$ text））{ foreach（$ text as $ k => $ v）{ $ text [$ k] = self :: toWin1252（$ v）; } 返回$ text; $ self_toUTF8（$ text） } elseif（is_string（$ text））{ return utf8_decode（str_replace（array_keys（self :: $ utf8ToWin1252），array_values（self :: $ utf8ToWin1252））; } else { return $ text; 静态函数toISO8859（$ text）{ 返回self :: toWin1252（$ text）; 静态函数toLatin1（$ text）{ return self :: toWin1252（$ text）; static function fixUTF8（$ text）{ if（is_array（$ text））{ foreach（$ text as $ k => $ v ）{ $ text [$ k] = self :: fixUTF8（$ v）; } 返回$ text; } $ last =; while（$ last<> $ text）{ $ last = $ text; $ text = self :: toUTF8（utf8_decode（str_replace（array_keys（self :: $ utf8ToWin1252），array_values（self :: $ utf8ToWin1252），$ text）））; } $ text = self :: toUTF8（utf8_decode（str_replace（array_keys（self :: $ utf8ToWin1252），array_values（self :: $ utf8ToWin1252），$ text）））; 返回$ text; 静态函数UTF8FixWin1252Chars（$ text）{ //如果您收到一个从Windows-1252转换的UTF-8字符串，因为它是ISO8859-1 //（忽略从80到9F的Windows-1252字符）使用这个函数来修复它。 //见：http://en.wikipedia.org/wiki/Windows-1252 返回str_replace（array_keys（self :: $ brokenUtf8ToUtf8），array_values（self :: $ brokenUtf8ToUtf8 ），$ text）; 静态函数removeBOM（$ str =）{ if（substr（$ str，0,3）== pack（CCC，0xef，0xbb ，0xbf））{ $ str = substr（$ str，3）; } return $ str; } } ？>
为了使用它，你需要包含这个类的脚本，并且像这样：
Encoding :: toUtf8（'Bankdrücken'）;

I am trying to get all files within a variable path ... atm i am using glob() but i just can not find a way to open a path which contains ä ü ö in it... I have tried a lot of de/encoding but none seem to work.. When i change the ä ü ö in the filename and the variable i get the right answer... so it has to be smth with utf8 chars...i guess

PHP script
// get the variable via ajax post $town =$_POST['town']; // for example "Bankdrücken" $pfad = 'maps/'. $country.'/'. $town .'/*.jpg'; $files = glob($pfad); $n_files = count($files); $erg = new stdClass(); $erg->files = $files; $erg->n_files = $n_files; echo json_encode($erg); return;
Javascript
.success(function(data){ console.log(data); ...
this will not return .../Bankdrücken but .../Bankdr\u00fccken

when i do jQuery.parseJson(data); it returns ./Bankdrücken but as this is on client side the server side already did fail .. i just cant get it right

here is an real example ... if the $variable within the file path contains ä ö ü -> glob() will not get me any files... // no i am printing out what i get back from the ajax call...

Trial One ( With ü ):

"stdClass Object ( [files] => Array ( )
[n_files] => 0 [filename] => workout_uebungen/Brust/Liegestützen/
) " /**************************************/ trail two ( i changed the file name from Liegestützen => Liegestuetzen and the $var from Liegestützen => Liegestuetzen)

"stdClass Object ( [files] => Array ( [0] => workout_uebungen/Brust/Liegestuetzen/1.jpg [1] => workout_uebungen/Brust/Liegestuetzen/2.jpg )
[n_files] => 2 [filename] => workout_uebungen/Brust/Liegestuetzen/
) "

so trial 2 gives the right answer but i dont want to change all my files ... as well it would be a dirty solution to me...
解决方案
As case you can use:
<?php class Encoding { protected static $win1252ToUtf8 = array( 128 => "\xe2\x82\xac", 130 => "\xe2\x80\x9a", 131 => "\xc6\x92", 132 => "\xe2\x80\x9e", 133 => "\xe2\x80\xa6", 134 => "\xe2\x80\xa0", 135 => "\xe2\x80\xa1", 136 => "\xcb\x86", 137 => "\xe2\x80\xb0", 138 => "\xc5\xa0", 139 => "\xe2\x80\xb9", 140 => "\xc5\x92", 142 => "\xc5\xbd", 145 => "\xe2\x80\x98", 146 => "\xe2\x80\x99", 147 => "\xe2\x80\x9c", 148 => "\xe2\x80\x9d", 149 => "\xe2\x80\xa2", 150 => "\xe2\x80\x93", 151 => "\xe2\x80\x94", 152 => "\xcb\x9c", 153 => "\xe2\x84\xa2", 154 => "\xc5\xa1", 155 => "\xe2\x80\xba", 156 => "\xc5\x93", 158 => "\xc5\xbe", 159 => "\xc5\xb8" ); protected static $brokenUtf8ToUtf8 = array( "\xc2\x80" => "\xe2\x82\xac", "\xc2\x82" => "\xe2\x80\x9a", "\xc2\x83" => "\xc6\x92", "\xc2\x84" => "\xe2\x80\x9e", "\xc2\x85" => "\xe2\x80\xa6", "\xc2\x86" => "\xe2\x80\xa0", "\xc2\x87" => "\xe2\x80\xa1", "\xc2\x88" => "\xcb\x86", "\xc2\x89" => "\xe2\x80\xb0", "\xc2\x8a" => "\xc5\xa0", "\xc2\x8b" => "\xe2\x80\xb9", "\xc2\x8c" => "\xc5\x92", "\xc2\x8e" => "\xc5\xbd", "\xc2\x91" => "\xe2\x80\x98", "\xc2\x92" => "\xe2\x80\x99", "\xc2\x93" => "\xe2\x80\x9c", "\xc2\x94" => "\xe2\x80\x9d", "\xc2\x95" => "\xe2\x80\xa2", "\xc2\x96" => "\xe2\x80\x93", "\xc2\x97" => "\xe2\x80\x94", "\xc2\x98" => "\xcb\x9c", "\xc2\x99" => "\xe2\x84\xa2", "\xc2\x9a" => "\xc5\xa1", "\xc2\x9b" => "\xe2\x80\xba", "\xc2\x9c" => "\xc5\x93", "\xc2\x9e" => "\xc5\xbe", "\xc2\x9f" => "\xc5\xb8" ); protected static $utf8ToWin1252 = array( "\xe2\x82\xac" => "\x80", "\xe2\x80\x9a" => "\x82", "\xc6\x92" => "\x83", "\xe2\x80\x9e" => "\x84", "\xe2\x80\xa6" => "\x85", "\xe2\x80\xa0" => "\x86", "\xe2\x80\xa1" => "\x87", "\xcb\x86" => "\x88", "\xe2\x80\xb0" => "\x89", "\xc5\xa0" => "\x8a", "\xe2\x80\xb9" => "\x8b", "\xc5\x92" => "\x8c", "\xc5\xbd" => "\x8e", "\xe2\x80\x98" => "\x91", "\xe2\x80\x99" => "\x92", "\xe2\x80\x9c" => "\x93", "\xe2\x80\x9d" => "\x94", "\xe2\x80\xa2" => "\x95", "\xe2\x80\x93" => "\x96", "\xe2\x80\x94" => "\x97", "\xcb\x9c" => "\x98", "\xe2\x84\xa2" => "\x99", "\xc5\xa1" => "\x9a", "\xe2\x80\xba" => "\x9b", "\xc5\x93" => "\x9c", "\xc5\xbe" => "\x9e", "\xc5\xb8" => "\x9f" ); static function toUTF8($text){ /** * Function Encoding::toUTF8 * * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8. * * It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1. * * It may fail to convert characters to UTF-8 if they fall into one of these scenarios: * * 1) when any of these characters: ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß * are followed by any of these: ("group B") * ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶•¸¹º»¼½¾¿ * For example: %ABREPRESENT%C9%BB. «REPRESENTÉ» * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB) * is also a valid unicode character, and will be left unchanged. * * 2) when any of these: àáâãäåæçèéêëìíîï are followed by TWO chars from group B, * 3) when any of these: ðñòó are followed by THREE chars from group B. * * @name toUTF8 * @param string $text Any string. * @return string The same string, UTF8 encoded * */ if(is_array($text)) { foreach($text as $k => $v) { $text[$k] = self::toUTF8($v); } return $text; } elseif(is_string($text)) { $max = strlen($text); $buf = ""; for($i = 0; $i < $max; $i++){ $c1 = $text{$i}; if($c1>="\xc0"){ //Should be converted to UTF8, if it's not UTF8 already $c2 = $i+1 >= $max? "\x00" : $text{$i+1}; $c3 = $i+2 >= $max? "\x00" : $text{$i+2}; $c4 = $i+3 >= $max? "\x00" : $text{$i+3}; if($c1 >= "\xc0" & $c1 <= "\xdf"){ //looks like 2 bytes UTF8 if($c2 >= "\x80" && $c2 <= "\xbf"){ //yeah, almost sure it's UTF8 already $buf .= $c1 . $c2; $i++; } else { //not valid UTF8. Convert it. $cc1 = (chr(ord($c1) / 64) | "\xc0"); $cc2 = ($c1 & "\x3f") | "\x80"; $buf .= $cc1 . $cc2; } } elseif($c1 >= "\xe0" & $c1 <= "\xef"){ //looks like 3 bytes UTF8 if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf"){ //yeah, almost sure it's UTF8 already $buf .= $c1 . $c2 . $c3; $i = $i + 2; } else { //not valid UTF8. Convert it. $cc1 = (chr(ord($c1) / 64) | "\xc0"); $cc2 = ($c1 & "\x3f") | "\x80"; $buf .= $cc1 . $cc2; } } elseif($c1 >= "\xf0" & $c1 <= "\xf7"){ //looks like 4 bytes UTF8 if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf"){ //yeah, almost sure it's UTF8 already $buf .= $c1 . $c2 . $c3; $i = $i + 2; } else { //not valid UTF8. Convert it. $cc1 = (chr(ord($c1) / 64) | "\xc0"); $cc2 = ($c1 & "\x3f") | "\x80"; $buf .= $cc1 . $cc2; } } else { //doesn't look like UTF8, but should be converted $cc1 = (chr(ord($c1) / 64) | "\xc0"); $cc2 = (($c1 & "\x3f") | "\x80"); $buf .= $cc1 . $cc2; } } elseif(($c1 & "\xc0") == "\x80"){ // needs conversion if(isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases $buf .= self::$win1252ToUtf8[ord($c1)]; } else { $cc1 = (chr(ord($c1) / 64) | "\xc0"); $cc2 = (($c1 & "\x3f") | "\x80"); $buf .= $cc1 . $cc2; } } else { // it doesn't need convesion $buf .= $c1; } } return $buf; } else { return $text; } } static function toWin1252($text) { if(is_array($text)) { foreach($text as $k => $v) { $text[$k] = self::toWin1252($v); } return $text; } elseif(is_string($text)) { return utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text))); } else { return $text; } } static function toISO8859($text) { return self::toWin1252($text); } static function toLatin1($text) { return self::toWin1252($text); } static function fixUTF8($text){ if(is_array($text)) { foreach($text as $k => $v) { $text[$k] = self::fixUTF8($v); } return $text; } $last = ""; while($last <> $text){ $last = $text; $text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text))); } $text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text))); return $text; } static function UTF8FixWin1252Chars($text){ // If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1 // (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it. // See: http://en.wikipedia.org/wiki/Windows-1252 return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text); } static function removeBOM($str=""){ if(substr($str, 0,3) == pack("CCC",0xef,0xbb,0xbf)) { $str=substr($str, 3); } return $str; } } ?>
For use it you need include script with this class and ue it like:
Encoding::toUtf8('Bankdrücken');

这篇关于php glob目录下的utf8的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持IT屋！

查看全文

php glob目录下的utf8 [英] php glob directory utf8

问题描述

相关文章

PHP最新文章

热门教程

热门工具

登录关闭

php glob目录下的utf8 [英] php glob directory utf8

问题描述

相关文章

PHP最新文章

热门教程

热门工具

登录 关闭

登录关闭