php glob目录下的utf8 [英] php glob directory utf8
问题描述
我已经尝试了很多德/编码,但似乎没有工作..
当我改变äüö在文件名和变量我得到正确的答案...所以它必须与utf8字符... ...我猜\\ b
$ b
PHP脚本
//通过ajax获取变量
$ town = $ _ POST ['town']; //例如Bankdrücken
$ pfad ='maps /'。 $国家。 /。 $ town。'/ *。jpg';
$ files = glob($ pfad);
$ n_files = count($ files);
$ erg = new stdClass();
$ erg-> files = $ files;
$ erg-> n_files = $ n_files;
echo json_encode($ erg);
return;
Javascript
< (数据){
console.log(data);
...
pre>
这不会返回... /Bankdrücken但是... / Bankdr\\\ücken
我做jQuery.parseJson(数据);它返回./Bankdrücken,但是,因为这是在客户端服务器端已经失败..我只是不能正确地得到它的权利
这里是一个真正的例子...如果文件路径中的$变量包含äöü - > glob()不会得到我任何文件...
//不,我打印出我从ajax调用...
试用一个(带ü):
$ b $ stdClass对象
(
[files] => Array
(
)
[n_files] => ; 0
[filename] => workout_uebungen / Brust /Liegestützen/
)
/ ************************************** /
trail二(我改变了文件名从Liegestützen=> Liegestuetzen和$ var从Liegestützen=> Liegestuetzen)
$ b $ stdClass对象
(
[files] =>数组
(
[0] => workout_uebungen / Brust / Liegestuetzen / 1.jpg
[1] => workout_uebungen / Brust / Liegestuetzen / 2.jpg
)
[n_files] => 2
[文件名] => workout_uebungen / Brust / Liegestuetzen /
)
所以审判2给出了正确的答案,但我不想改变我所有的文件...以及这将是一个肮脏的解决方案给我...
<?php $ b $ b class Encoding {
protected static $ win1252ToUtf8 = array(
128 =>\xe2\x82\xac,
130 => ;\\\ xe2\x80\x9a,
131 =>\\\\\\\\ x92,
132 =>
133 =>\ xe2\x80\xa6,
134 =>\ xe2\x80\xa0,
135 =>\\ \\ xe2 \x80 \xa1,
136 =>\ xcb\x86,
137 => b 138 =>\ xc5 \xa0,
139 =>\ xe2\x80\xb9,
140 =&g t;\xc5\x92,
142 =>\xc5\xbd,
145 =>\\ \\ xe2 \x80 \x98,
146 => \xe2\x80\x99,
147 => \xe2\x80\x9c,
148 => \xe2\x80\x9d,
149 => \xe2\x80\xa2,
150 => \xe2\x80\x93,
151 => \xe2\x80\x94,
152 => \xcb\x9c,
153 => \xe2\x84\xa2,
154 => \xc5\xa1,
155 => \xe2\x80\xba,
156 => \xc5\x93,
158 => \xc5\xbe,
159 => \xc5\xb8
);
$ b $ protected static $ brokenUtf8ToUtf8 = array(
\xc2\x80=" \xe2\x82\xac,
\xc2\x82=>\ xe2\x80\x9a,
\xc2\x83=>\xc6\x92,
\xc2\x84=>\ xe2\x80\x9e,
\xc2\x85=>\ xe2\x80\xa6 ,
\xc2\x86=>\xe2\x80\xa0,
\xc2\x87=>\xe2\x80 \ xa1,
\xc2\x88=>\xcb\x86,
\xc2\x89=>\xe2\ x80 \xb0,
\xc2\x8a=>\xc5\xa0,
\xc2\x8b=>\xe2\\ \\ x80 \xb9,
\xc2\x8c=>\xc5\x92,
\xc2\x8e=> \xc5\xbd,
\xc2\x91=> \xe2\x80\x98,
\xc2\x92=>\xe2\x80\x99,
\xc2\x93 = \\xe2 \x80 \x9c,
\xc2\x94=>\ xe2\x80\x9d,
\ xc2 \x95=>\xe2\x80\xa2,
\xc2\x96=> \xe2\x80\x93,
\xc2\x97=> \xe2\x80\x94,
\xc2\x98=> \xcb\x9c,
\xc2\x99=> \xe2\x84\xa2,
\xc2\x9a=> \xc5\xa1,
\xc2\x9b=> \xe2\x80\xba,
\xc2\x9c=> \xc5\x93,
\xc2\x9e=> \xc5\xbe,
\xc2\x9f=> \xc5\xb8
);
protected static $ utf8ToWin1252 = array(
\xe2\x82\xac=&; \x80,
\ xe2 \x80\x9a=>\x82,
\xc6\x92=>\x83,
\xe2\x80\\ \\ x9e=>\x84,
\xe2\x80\xa6=>\x85,
\xe2\x80\xa0 =>\x86,
\xe2\x80\xa1=>\x87,
\xcb\x86=> \ x88,
\xe2\x80\xb0=>\x89,
\xc5\xa0=>\x8a,
\xe2\x80\xb9=>\x8b,
\xc5\x92=>\x8c,
\xc5\xbd=>\x8e,
\xe2\x80\x98=>\x91 ,
\xe2\x80\x99=>\x92,
\xe2\x80 \\ x9c=>\x93,
\xe2\x80\x9d=>\x94,
\xe2\x80\xa2 =>\x95,
\xe2\x80\x93=>\x96,
\xe2\x80\x94= >\x97,
\xcb\x9c=> \x98,
\xe2\x84\xa2=> \x99,
\xc5\xa1=> \x9a,
\xe2\x80\xba=> \x9b,
\xc5\x93=> \x9c,
\xc5\xbe=> \x9e,
\xc5\xb8=> \x9f
);
静态函数toUTF8($ text){
/ **
*函数编码:: toUTF8
*
*这个函数只保留UTF8字符,同时将几乎所有非UTF8转换为UTF8。
*
*它假定原始字符串的编码是Windows-1252或ISO 8859-1。
*
*如果这些字符中的任何一个出现这种情况,则可能无法将字符转换为UTF-8:
*
* 1)
*之后是以下任何一个:(B组)
*,£¤¥|§¨¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯° b $ b *例如:%ABREPRESENT%C9%BB。 «REPRESENTÉ»
(%AB)字符将被转换,但是É后跟»(%C9%BB)
*也是一个有效的Unicode字符,将保持不变。
*
* 2)当其中任何一个:àáâããäæçèéêëìíîï后跟B组的两个字符,
* 3),其中任何一个:ðñòó后跟B组中的三个字符。 b $ b *
* @name toUTF8
* @param string $ text任何字符串。
* @return string相同的字符串,UTF8编码
*
* /
if(is_array($ text))
{
foreach($ text为$ k => $ v)
{
$ text [$ k] = self :: toUTF8($ v);
}
返回$ text;
} elseif(is_string($ text)){
$ max = strlen($ text);
$ buf =; ($ i = 0; $ i <$ max; $ i ++){
$ c1 = $ text {$ i};
if($ c1> =\xc0){//如果已经不是UTF8,应该转换为UTF8
$ c2 = $ i + 1> = $ max? \x00:$ text {$ i + 1};
$ c3 = $ i + 2> = $ max? \x00:$ text {$ i + 2};
$ c4 = $ i + 3> = $ max? \x00:$ text {$ i + 3};
if($ c1> =\xc0& $ c1< =\xdf){// //看起来像2个字节UTF8
if($ c2> =\\ \\ x80&& $ c2< =\xbf){//是的,几乎可以肯定的是UTF8已经是
$ buf。= $ c1了。 $ C2;
$ i ++;
} else {//无效的UTF8。转换它。
$ cc1 =(chr(ord($ c1)/ 64)|\xc0);
$ cc2 =($ c1&\x3f)| \x80;
$ buf。= $ cc1。 $ CC2;
}
} elseif($ c1> =\xe0& $ c1< =\ xef){//看起来像3个字节UTF8
if($ c2> =\x80&& $ c2< =\xbf&&& $ c3> =\x80&& $ c3< =\xbf ){//是的,几乎可以肯定它的UTF8已经是
$ buf。= $ c1。 $ c2。 $ C3;
$ i = $ i + 2;
} else {//无效的UTF8。转换它。
$ cc1 =(chr(ord($ c1)/ 64)|\xc0);
$ cc2 =($ c1&\x3f)| \x80;
$ buf。= $ cc1。 $ CC2;
}
} elseif($ c1> =\xf0& $ c1< =\xf7){//看起来像4个字节UTF8
if($ c2> =\x80&& $ c2< =\xbf&& $ c3> =\x80&& $ c3< =\xbf && $ c4> =\x80&& $ c4< =\xbf){//是的,几乎可以肯定的是UTF8已经
$ buf。= $ c1 。 $ c2。 $ C3;
$ i = $ i + 2;
} else {//无效的UTF8。转换它。
$ cc1 =(chr(ord($ c1)/ 64)|\xc0);
$ cc2 =($ c1&\x3f)| \x80;
$ buf。= $ cc1。 $ CC2;
}
} else {//看起来不像UTF8,但应该转换
$ cc1 =(chr(ord($ c1)/ 64)|\xc0) ;
$ cc2 =(($ c1&\x3f)|\x80);
$ buf。= $ cc1。 $ CC2; $(b
$ b)elseif(($ c1&\xc0)==\x80){//需要转换
if(isset(self :: $ win1252ToUtf8 [ ord($ c1)])){//在Windows-1252中找到特殊情况
$ buf。= self :: $ win1252ToUtf8 [ord($ c1)];
} else {
$ cc1 =(chr(ord($ c1)/ 64)|\xc0);
$ cc2 =(($ c1&\x3f)|\x80);
$ buf。= $ cc1。 $ CC2;
}
} else {//不需要convesion
$ buf。= $ c1;
}
}
return $ buf;
} else {
return $ text;
$ b静态函数toWin1252($ text){
if(is_array($ text)){
foreach($ text as $ k => $ v){
$ text [$ k] = self :: toWin1252($ v);
}
返回$ text; $ self_toUTF8($ text)
} elseif(is_string($ text)){
return utf8_decode(str_replace(array_keys(self :: $ utf8ToWin1252),array_values(self :: $ utf8ToWin1252) );
} else {
return $ text;
静态函数toISO8859($ text){
返回self :: toWin1252($ text);
静态函数toLatin1($ text){
return self :: toWin1252($ text);
static function fixUTF8($ text){
if(is_array($ text)){
foreach($ text as $ k => $ v ){
$ text [$ k] = self :: fixUTF8($ v);
}
返回$ text;
}
$ last =;
while($ last<> $ text){
$ last = $ text;
$ text = self :: toUTF8(utf8_decode(str_replace(array_keys(self :: $ utf8ToWin1252),array_values(self :: $ utf8ToWin1252),$ text)));
}
$ text = self :: toUTF8(utf8_decode(str_replace(array_keys(self :: $ utf8ToWin1252),array_values(self :: $ utf8ToWin1252),$ text)));
返回$ text;
静态函数UTF8FixWin1252Chars($ text){
//如果您收到一个从Windows-1252转换的UTF-8字符串,因为它是ISO8859-1
//(忽略从80到9F的Windows-1252字符)使用这个函数来修复它。
//见:http://en.wikipedia.org/wiki/Windows-1252
返回str_replace(array_keys(self :: $ brokenUtf8ToUtf8),array_values(self :: $ brokenUtf8ToUtf8 ),$ text);
静态函数removeBOM($ str =){
if(substr($ str,0,3)== pack(CCC,0xef,0xbb ,0xbf)){
$ str = substr($ str,3);
}
return $ str;
}
}
?>
为了使用它,你需要包含这个类的脚本,并且像这样:
Encoding :: toUtf8('Bankdrücken');
I am trying to get all files within a variable path ... atm i am using glob() but i just can not find a way to open a path which contains ä ü ö in it... I have tried a lot of de/encoding but none seem to work.. When i change the ä ü ö in the filename and the variable i get the right answer... so it has to be smth with utf8 chars...i guess
PHP script
// get the variable via ajax post
$town =$_POST['town']; // for example "Bankdrücken"
$pfad = 'maps/'. $country.'/'. $town .'/*.jpg';
$files = glob($pfad);
$n_files = count($files);
$erg = new stdClass();
$erg->files = $files;
$erg->n_files = $n_files;
echo json_encode($erg);
return;
Javascript
.success(function(data){
console.log(data);
...
this will not return .../Bankdrücken but .../Bankdr\u00fccken
when i do jQuery.parseJson(data); it returns ./Bankdrücken but as this is on client side the server side already did fail .. i just cant get it right
here is an real example ... if the $variable within the file path contains ä ö ü -> glob() will not get me any files... // no i am printing out what i get back from the ajax call...
Trial One ( With ü ):
"stdClass Object ( [files] => Array ( )
[n_files] => 0
[filename] => workout_uebungen/Brust/Liegestützen/
) " /**************************************/ trail two ( i changed the file name from Liegestützen => Liegestuetzen and the $var from Liegestützen => Liegestuetzen)
"stdClass Object ( [files] => Array ( [0] => workout_uebungen/Brust/Liegestuetzen/1.jpg [1] => workout_uebungen/Brust/Liegestuetzen/2.jpg )
[n_files] => 2
[filename] => workout_uebungen/Brust/Liegestuetzen/
) "
so trial 2 gives the right answer but i dont want to change all my files ... as well it would be a dirty solution to me...
As case you can use:
<?php
class Encoding {
protected static $win1252ToUtf8 = array(
128 => "\xe2\x82\xac",
130 => "\xe2\x80\x9a",
131 => "\xc6\x92",
132 => "\xe2\x80\x9e",
133 => "\xe2\x80\xa6",
134 => "\xe2\x80\xa0",
135 => "\xe2\x80\xa1",
136 => "\xcb\x86",
137 => "\xe2\x80\xb0",
138 => "\xc5\xa0",
139 => "\xe2\x80\xb9",
140 => "\xc5\x92",
142 => "\xc5\xbd",
145 => "\xe2\x80\x98",
146 => "\xe2\x80\x99",
147 => "\xe2\x80\x9c",
148 => "\xe2\x80\x9d",
149 => "\xe2\x80\xa2",
150 => "\xe2\x80\x93",
151 => "\xe2\x80\x94",
152 => "\xcb\x9c",
153 => "\xe2\x84\xa2",
154 => "\xc5\xa1",
155 => "\xe2\x80\xba",
156 => "\xc5\x93",
158 => "\xc5\xbe",
159 => "\xc5\xb8"
);
protected static $brokenUtf8ToUtf8 = array(
"\xc2\x80" => "\xe2\x82\xac",
"\xc2\x82" => "\xe2\x80\x9a",
"\xc2\x83" => "\xc6\x92",
"\xc2\x84" => "\xe2\x80\x9e",
"\xc2\x85" => "\xe2\x80\xa6",
"\xc2\x86" => "\xe2\x80\xa0",
"\xc2\x87" => "\xe2\x80\xa1",
"\xc2\x88" => "\xcb\x86",
"\xc2\x89" => "\xe2\x80\xb0",
"\xc2\x8a" => "\xc5\xa0",
"\xc2\x8b" => "\xe2\x80\xb9",
"\xc2\x8c" => "\xc5\x92",
"\xc2\x8e" => "\xc5\xbd",
"\xc2\x91" => "\xe2\x80\x98",
"\xc2\x92" => "\xe2\x80\x99",
"\xc2\x93" => "\xe2\x80\x9c",
"\xc2\x94" => "\xe2\x80\x9d",
"\xc2\x95" => "\xe2\x80\xa2",
"\xc2\x96" => "\xe2\x80\x93",
"\xc2\x97" => "\xe2\x80\x94",
"\xc2\x98" => "\xcb\x9c",
"\xc2\x99" => "\xe2\x84\xa2",
"\xc2\x9a" => "\xc5\xa1",
"\xc2\x9b" => "\xe2\x80\xba",
"\xc2\x9c" => "\xc5\x93",
"\xc2\x9e" => "\xc5\xbe",
"\xc2\x9f" => "\xc5\xb8"
);
protected static $utf8ToWin1252 = array(
"\xe2\x82\xac" => "\x80",
"\xe2\x80\x9a" => "\x82",
"\xc6\x92" => "\x83",
"\xe2\x80\x9e" => "\x84",
"\xe2\x80\xa6" => "\x85",
"\xe2\x80\xa0" => "\x86",
"\xe2\x80\xa1" => "\x87",
"\xcb\x86" => "\x88",
"\xe2\x80\xb0" => "\x89",
"\xc5\xa0" => "\x8a",
"\xe2\x80\xb9" => "\x8b",
"\xc5\x92" => "\x8c",
"\xc5\xbd" => "\x8e",
"\xe2\x80\x98" => "\x91",
"\xe2\x80\x99" => "\x92",
"\xe2\x80\x9c" => "\x93",
"\xe2\x80\x9d" => "\x94",
"\xe2\x80\xa2" => "\x95",
"\xe2\x80\x93" => "\x96",
"\xe2\x80\x94" => "\x97",
"\xcb\x9c" => "\x98",
"\xe2\x84\xa2" => "\x99",
"\xc5\xa1" => "\x9a",
"\xe2\x80\xba" => "\x9b",
"\xc5\x93" => "\x9c",
"\xc5\xbe" => "\x9e",
"\xc5\xb8" => "\x9f"
);
static function toUTF8($text){
/**
* Function Encoding::toUTF8
*
* This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
*
* It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
*
* It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
*
* 1) when any of these characters: ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß
* are followed by any of these: ("group B")
* ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶•¸¹º»¼½¾¿
* For example: %ABREPRESENT%C9%BB. «REPRESENTÉ»
* The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
* is also a valid unicode character, and will be left unchanged.
*
* 2) when any of these: àáâãäåæçèéêëìíîï are followed by TWO chars from group B,
* 3) when any of these: ðñòó are followed by THREE chars from group B.
*
* @name toUTF8
* @param string $text Any string.
* @return string The same string, UTF8 encoded
*
*/
if(is_array($text))
{
foreach($text as $k => $v)
{
$text[$k] = self::toUTF8($v);
}
return $text;
} elseif(is_string($text)) {
$max = strlen($text);
$buf = "";
for($i = 0; $i < $max; $i++){
$c1 = $text{$i};
if($c1>="\xc0"){ //Should be converted to UTF8, if it's not UTF8 already
$c2 = $i+1 >= $max? "\x00" : $text{$i+1};
$c3 = $i+2 >= $max? "\x00" : $text{$i+2};
$c4 = $i+3 >= $max? "\x00" : $text{$i+3};
if($c1 >= "\xc0" & $c1 <= "\xdf"){ //looks like 2 bytes UTF8
if($c2 >= "\x80" && $c2 <= "\xbf"){ //yeah, almost sure it's UTF8 already
$buf .= $c1 . $c2;
$i++;
} else { //not valid UTF8. Convert it.
$cc1 = (chr(ord($c1) / 64) | "\xc0");
$cc2 = ($c1 & "\x3f") | "\x80";
$buf .= $cc1 . $cc2;
}
} elseif($c1 >= "\xe0" & $c1 <= "\xef"){ //looks like 3 bytes UTF8
if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf"){ //yeah, almost sure it's UTF8 already
$buf .= $c1 . $c2 . $c3;
$i = $i + 2;
} else { //not valid UTF8. Convert it.
$cc1 = (chr(ord($c1) / 64) | "\xc0");
$cc2 = ($c1 & "\x3f") | "\x80";
$buf .= $cc1 . $cc2;
}
} elseif($c1 >= "\xf0" & $c1 <= "\xf7"){ //looks like 4 bytes UTF8
if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf"){ //yeah, almost sure it's UTF8 already
$buf .= $c1 . $c2 . $c3;
$i = $i + 2;
} else { //not valid UTF8. Convert it.
$cc1 = (chr(ord($c1) / 64) | "\xc0");
$cc2 = ($c1 & "\x3f") | "\x80";
$buf .= $cc1 . $cc2;
}
} else { //doesn't look like UTF8, but should be converted
$cc1 = (chr(ord($c1) / 64) | "\xc0");
$cc2 = (($c1 & "\x3f") | "\x80");
$buf .= $cc1 . $cc2;
}
} elseif(($c1 & "\xc0") == "\x80"){ // needs conversion
if(isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases
$buf .= self::$win1252ToUtf8[ord($c1)];
} else {
$cc1 = (chr(ord($c1) / 64) | "\xc0");
$cc2 = (($c1 & "\x3f") | "\x80");
$buf .= $cc1 . $cc2;
}
} else { // it doesn't need convesion
$buf .= $c1;
}
}
return $buf;
} else {
return $text;
}
}
static function toWin1252($text) {
if(is_array($text)) {
foreach($text as $k => $v) {
$text[$k] = self::toWin1252($v);
}
return $text;
} elseif(is_string($text)) {
return utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text)));
} else {
return $text;
}
}
static function toISO8859($text) {
return self::toWin1252($text);
}
static function toLatin1($text) {
return self::toWin1252($text);
}
static function fixUTF8($text){
if(is_array($text)) {
foreach($text as $k => $v) {
$text[$k] = self::fixUTF8($v);
}
return $text;
}
$last = "";
while($last <> $text){
$last = $text;
$text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text)));
}
$text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text)));
return $text;
}
static function UTF8FixWin1252Chars($text){
// If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
// (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
// See: http://en.wikipedia.org/wiki/Windows-1252
return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text);
}
static function removeBOM($str=""){
if(substr($str, 0,3) == pack("CCC",0xef,0xbb,0xbf)) {
$str=substr($str, 3);
}
return $str;
}
}
?>
For use it you need include script with this class and ue it like:
Encoding::toUtf8('Bankdrücken');
这篇关于php glob目录下的utf8的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!