php glob目录下的utf8 [英] php glob directory utf8

查看:122
本文介绍了php glob目录下的utf8的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我试图让所有文件在一个可变的路径... atm我使用glob(),但我只是无法找到一种方法来打开一个路径,其中包含äüö在...
我已经尝试了很多德/编码,但似乎没有工作..
当我改变äüö在文件名和变量我得到正确的答案...所以它必须与utf8字符... ...我猜\\ b
$ b

PHP脚本

  //通过ajax获取变量

$ town = $ _ POST ['town']; //例如Bankdrücken

$ pfad ='maps /'。 $国家。 /。 $ town。'/ *。jpg';

$ files = glob($ pfad);
$ n_files = count($ files);

$ erg = new stdClass();
$ erg-> files = $ files;
$ erg-> n_files = $ n_files;

echo json_encode($ erg);
return;

Javascript
< (数据){
console.log(data);
...
pre>

这不会返回... /Bankdrücken但是... / Bankdr\\\ücken



我做jQuery.parseJson(数据);它返回./Bankdrücken,但是,因为这是在客户端服务器端已经失败..我只是不能正确地得到它的权利



这里是一个真正的例子...如果文件路径中的$变量包含äöü - > glob()不会得到我任何文件...
//不,我打印出我从ajax调用...

试用一个(带ü):
$ b $ stdClass对象

[files] => Array



  [n_files] => ; 0 
[filename] => workout_uebungen / Brust /Liegestützen/



/ ************************************** /
trail二(我改变了文件名从Liegestützen=> Liegestuetzen和$ var从Liegestützen=> Liegestuetzen)
$ b $ stdClass对象

[files] =>数组

[0] => workout_uebungen / Brust / Liegestuetzen / 1.jpg
[1] => workout_uebungen / Brust / Liegestuetzen / 2.jpg

  [n_files] => 2 
[文件名] => workout_uebungen / Brust / Liegestuetzen /




所以审判2给出了正确的答案,但我不想改变我所有的文件...以及这将是一个肮脏的解决方案给我...



 <?php $ b $ 

b class Encoding {

protected static $ win1252ToUtf8 = array(
128 =>\xe2\x82\xac,

130 => ;\\\ xe2\x80\x9a,
131 =>\\\\\\\\ x92,
132 =>
133 =>\ xe2\x80\xa6,
134 =>\ xe2\x80\xa0,
135 =>\\ \\ xe2 \x80 \xa1,
136 =>\ xcb\x86,
137 => b 138 =>\ xc5 \xa0,
139 =>\ xe2\x80\xb9,
140 =&g t;\xc5\x92,

142 =>\xc5\xbd,


145 =>\\ \\ xe2 \x80 \x98,
146 => \xe2\x80\x99,
147 => \xe2\x80\x9c,
148 => \xe2\x80\x9d,
149 => \xe2\x80\xa2,
150 => \xe2\x80\x93,
151 => \xe2\x80\x94,
152 => \xcb\x9c,
153 => \xe2\x84\xa2,
154 => \xc5\xa1,
155 => \xe2\x80\xba,
156 => \xc5\x93,

158 => \xc5\xbe,
159 => \xc5\xb8
);
$ b $ protected static $ brokenUtf8ToUtf8 = array(
\xc2\x80=" \xe2\x82\xac,

\xc2\x82=>\ xe2\x80\x9a,
\xc2\x83=>\xc6\x92,
\xc2\x84=>\ xe2\x80\x9e,
\xc2\x85=>\ xe2\x80\xa6 ,
\xc2\x86=>\xe2\x80\xa0,
\xc2\x87=>\xe2\x80 \ xa1,
\xc2\x88=>\xcb\x86,
\xc2\x89=>\xe2\ x80 \xb0,
\xc2\x8a=>\xc5\xa0,
\xc2\x8b=>\xe2\\ \\ x80 \xb9,
\xc2\x8c=>\xc5\x92,

\xc2\x8e=> \xc5\xbd,


\xc2\x91=> \xe2\x80\x98,
\xc2\x92=>\xe2\x80\x99,
\xc2\x93 = \\xe2 \x80 \x9c,
\xc2\x94=>\ xe2\x80\x9d,
\ xc2 \x95=>\xe2\x80\xa2,
\xc2\x96=> \xe2\x80\x93,
\xc2\x97=> \xe2\x80\x94,
\xc2\x98=> \xcb\x9c,
\xc2\x99=> \xe2\x84\xa2,
\xc2\x9a=> \xc5\xa1,
\xc2\x9b=> \xe2\x80\xba,
\xc2\x9c=> \xc5\x93,

\xc2\x9e=> \xc5\xbe,
\xc2\x9f=> \xc5\xb8
);

protected static $ utf8ToWin1252 = array(
\xe2\x82\xac=&; \x80,

\ xe2 \x80\x9a=>\x82,
\xc6\x92=>\x83,
\xe2\x80\\ \\ x9e=>\x84,
\xe2\x80\xa6=>\x85,
\xe2\x80\xa0 =>\x86,
\xe2\x80\xa1=>\x87,
\xcb\x86=> \ x88,
\xe2\x80\xb0=>\x89,
\xc5\xa0=>\x8a,
\xe2\x80\xb9=>\x8b,
\xc5\x92=>\x8c,

\xc5\xbd=>\x8e,


\xe2\x80\x98=>\x91 ,
\xe2\x80\x99=>\x92,
\xe2\x80 \\ x9c=>\x93,
\xe2\x80\x9d=>\x94,
\xe2\x80\xa2 =>\x95,
\xe2\x80\x93=>\x96,
\xe2\x80\x94= >\x97,
\xcb\x9c=> \x98,
\xe2\x84\xa2=> \x99,
\xc5\xa1=> \x9a,
\xe2\x80\xba=> \x9b,
\xc5\x93=> \x9c,

\xc5\xbe=> \x9e,
\xc5\xb8=> \x9f
);

静态函数toUTF8($ text){
/ **
*函数编码:: toUTF8
*
*这个函数只保留UTF8字符,同时将几乎所有非UTF8转换为UTF8。
*
*它假定原始字符串的编码是Windows-1252或ISO 8859-1。
*
*如果这些字符中的任何一个出现这种情况,则可能无法将字符转换为UTF-8:
*
* 1)
*之后是以下任何一个:(B组)
*,£¤¥|§¨¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯° b $ b *例如:%ABREPRESENT%C9%BB。 «REPRESENTÉ»
(%AB)字符将被转换,但是É后跟»(%C9%BB)
*也是一个有效的Unicode字符,将保持不变。
*
* 2)当其中任何一个:àáâããäæçèéêëìíîï后跟B组的两个字符,
* 3),其中任何一个:ðñòó后跟B组中的三个字符。 b $ b *
* @name toUTF8
* @param string $ text任何字符串。
* @return string相同的字符串,UTF8编码
*
* /

if(is_array($ text))
{
foreach($ text为$ k => $ v)
{
$ text [$ k] = self :: toUTF8($ v);
}
返回$ text;
} elseif(is_string($ text)){

$ max = strlen($ text);
$ buf =; ($ i = 0; $ i <$ max; $ i ++){
$ c1 = $ text {$ i};
if($ c1> =\xc0){//如果已经不是UTF8,应该转换为UTF8
$ c2 = $ i + 1> = $ max? \x00:$ text {$ i + 1};
$ c3 = $ i + 2> = $ max? \x00:$ text {$ i + 2};
$ c4 = $ i + 3> = $ max? \x00:$ text {$ i + 3};
if($ c1> =\xc0& $ c1< =\xdf){// //看起来像2个字节UTF8
if($ c2> =\\ \\ x80&& $ c2< =\xbf){//是的,几乎可以肯定的是UTF8已经是
$ buf。= $ c1了。 $ C2;
$ i ++;
} else {//无效的UTF8。转换它。
$ cc1 =(chr(ord($ c1)/ 64)|\xc0);
$ cc2 =($ c1&\x3f)| \x80;
$ buf。= $ cc1。 $ CC2;
}
} elseif($ c1> =\xe0& $ c1< =\ xef){//看起来像3个字节UTF8
if($ c2> =\x80&& $ c2< =\xbf&&& $ c3> =\x80&& $ c3< =\xbf ){//是的,几乎可以肯定它的UTF8已经是
$ buf。= $ c1。 $ c2。 $ C3;
$ i = $ i + 2;
} else {//无效的UTF8。转换它。
$ cc1 =(chr(ord($ c1)/ 64)|\xc0);
$ cc2 =($ c1&\x3f)| \x80;
$ buf。= $ cc1。 $ CC2;
}
} elseif($ c1> =\xf0& $ c1< =\xf7){//看起来像4个字节UTF8
if($ c2> =\x80&& $ c2< =\xbf&& $ c3> =\x80&& $ c3< =\xbf && $ c4> =\x80&& $ c4< =\xbf){//是的,几乎可以肯定的是UTF8已经
$ buf。= $ c1 。 $ c2。 $ C3;
$ i = $ i + 2;
} else {//无效的UTF8。转换它。
$ cc1 =(chr(ord($ c1)/ 64)|\xc0);
$ cc2 =($ c1&\x3f)| \x80;
$ buf。= $ cc1。 $ CC2;
}
} else {//看起来不像UTF8,但应该转换
$ cc1 =(chr(ord($ c1)/ 64)|\xc0) ;
$ cc2 =(($ c1&\x3f)|\x80);
$ buf。= $ cc1。 $ CC2; $(b
$ b)elseif(($ c1&\xc0)==\x80){//需要转换
if(isset(self :: $ win1252ToUtf8 [ ord($ c1)])){//在Windows-1252中找到特殊情况
$ buf。= self :: $ win1252ToUtf8 [ord($ c1)];
} else {
$ cc1 =(chr(ord($ c1)/ 64)|\xc0);
$ cc2 =(($ c1&\x3f)|\x80);
$ buf。= $ cc1。 $ CC2;
}
} else {//不需要convesion
$ buf。= $ c1;
}
}
return $ buf;
} else {
return $ text;


$ b静态函数toWin1252($ text){
if(is_array($ text)){
foreach($ text as $ k => $ v){
$ text [$ k] = self :: toWin1252($ v);
}
返回$ text; $ self_toUTF8($ text)
} elseif(is_string($ text)){
return utf8_decode(str_replace(array_keys(self :: $ utf8ToWin1252),array_values(self :: $ utf8ToWin1252) );
} else {
return $ text;



静态函数toISO8859($ text){
返回self :: toWin1252($ text);


静态函数toLatin1($ text){
return self :: toWin1252($ text);


static function fixUTF8($ text){
if(is_array($ text)){
foreach($ text as $ k => $ v ){
$ text [$ k] = self :: fixUTF8($ v);
}
返回$ text;
}

$ last =;
while($ last<> $ text){
$ last = $ text;
$ text = self :: toUTF8(utf8_decode(str_replace(array_keys(self :: $ utf8ToWin1252),array_values(self :: $ utf8ToWin1252),$ text)));
}
$ text = self :: toUTF8(utf8_decode(str_replace(array_keys(self :: $ utf8ToWin1252),array_values(self :: $ utf8ToWin1252),$ text)));
返回$ text;


静态函数UTF8FixWin1252Chars($ text){
//如果您收到一个从Windows-1252转换的UTF-8字符串,因为它是ISO8859-1
//(忽略从80到9F的Windows-1252字符)使用这个函数来修复它。
//见:http://en.wikipedia.org/wiki/Windows-1252

返回str_replace(array_keys(self :: $ brokenUtf8ToUtf8),array_values(self :: $ brokenUtf8ToUtf8 ),$ text);


静态函数removeBOM($ str =){
if(substr($ str,0,3)== pack(CCC,0xef,0xbb ,0xbf)){
$ str = substr($ str,3);
}
return $ str;
}
}
?>

为了使用它,你需要包含这个类的脚本,并且像这样:

  Encoding :: toUtf8('Bankdrücken'); 


I am trying to get all files within a variable path ... atm i am using glob() but i just can not find a way to open a path which contains ä ü ö in it... I have tried a lot of de/encoding but none seem to work.. When i change the ä ü ö in the filename and the variable i get the right answer... so it has to be smth with utf8 chars...i guess

PHP script

// get the variable via ajax post

$town =$_POST['town'];       // for example "Bankdrücken"

$pfad = 'maps/'. $country.'/'. $town .'/*.jpg';

$files = glob($pfad);
$n_files = count($files);

    $erg = new stdClass();
    $erg->files = $files;
    $erg->n_files = $n_files;

    echo json_encode($erg);
    return;

Javascript

.success(function(data){
    console.log(data);
...

this will not return .../Bankdrücken but .../Bankdr\u00fccken

when i do jQuery.parseJson(data); it returns ./Bankdrücken but as this is on client side the server side already did fail .. i just cant get it right

here is an real example ... if the $variable within the file path contains ä ö ü -> glob() will not get me any files... // no i am printing out what i get back from the ajax call...

Trial One ( With ü ):

"stdClass Object ( [files] => Array ( )

[n_files] => 0
[filename] => workout_uebungen/Brust/Liegestützen/

) " /**************************************/ trail two ( i changed the file name from Liegestützen => Liegestuetzen and the $var from Liegestützen => Liegestuetzen)

"stdClass Object ( [files] => Array ( [0] => workout_uebungen/Brust/Liegestuetzen/1.jpg [1] => workout_uebungen/Brust/Liegestuetzen/2.jpg )

[n_files] => 2
[filename] => workout_uebungen/Brust/Liegestuetzen/

) "

so trial 2 gives the right answer but i dont want to change all my files ... as well it would be a dirty solution to me...

解决方案

As case you can use:

<?php
class Encoding {

  protected static $win1252ToUtf8 = array(
        128 => "\xe2\x82\xac",

        130 => "\xe2\x80\x9a",
        131 => "\xc6\x92",
        132 => "\xe2\x80\x9e",
        133 => "\xe2\x80\xa6",
        134 => "\xe2\x80\xa0",
        135 => "\xe2\x80\xa1",
        136 => "\xcb\x86",
        137 => "\xe2\x80\xb0",
        138 => "\xc5\xa0",
        139 => "\xe2\x80\xb9",
        140 => "\xc5\x92",

        142 => "\xc5\xbd",


        145 => "\xe2\x80\x98",
        146 => "\xe2\x80\x99",
        147 => "\xe2\x80\x9c",
        148 => "\xe2\x80\x9d",
        149 => "\xe2\x80\xa2",
        150 => "\xe2\x80\x93",
        151 => "\xe2\x80\x94",
        152 => "\xcb\x9c",
        153 => "\xe2\x84\xa2",
        154 => "\xc5\xa1",
        155 => "\xe2\x80\xba",
        156 => "\xc5\x93",

        158 => "\xc5\xbe",
        159 => "\xc5\xb8"
  );

    protected static $brokenUtf8ToUtf8 = array(
        "\xc2\x80" => "\xe2\x82\xac",

        "\xc2\x82" => "\xe2\x80\x9a",
        "\xc2\x83" => "\xc6\x92",
        "\xc2\x84" => "\xe2\x80\x9e",
        "\xc2\x85" => "\xe2\x80\xa6",
        "\xc2\x86" => "\xe2\x80\xa0",
        "\xc2\x87" => "\xe2\x80\xa1",
        "\xc2\x88" => "\xcb\x86",
        "\xc2\x89" => "\xe2\x80\xb0",
        "\xc2\x8a" => "\xc5\xa0",
        "\xc2\x8b" => "\xe2\x80\xb9",
        "\xc2\x8c" => "\xc5\x92",

        "\xc2\x8e" => "\xc5\xbd",


        "\xc2\x91" => "\xe2\x80\x98",
        "\xc2\x92" => "\xe2\x80\x99",
        "\xc2\x93" => "\xe2\x80\x9c",
        "\xc2\x94" => "\xe2\x80\x9d",
        "\xc2\x95" => "\xe2\x80\xa2",
        "\xc2\x96" => "\xe2\x80\x93",
        "\xc2\x97" => "\xe2\x80\x94",
        "\xc2\x98" => "\xcb\x9c",
        "\xc2\x99" => "\xe2\x84\xa2",
        "\xc2\x9a" => "\xc5\xa1",
        "\xc2\x9b" => "\xe2\x80\xba",
        "\xc2\x9c" => "\xc5\x93",

        "\xc2\x9e" => "\xc5\xbe",
        "\xc2\x9f" => "\xc5\xb8"
  );

  protected static $utf8ToWin1252 = array(
       "\xe2\x82\xac" => "\x80",

       "\xe2\x80\x9a" => "\x82",
       "\xc6\x92"     => "\x83",
       "\xe2\x80\x9e" => "\x84",
       "\xe2\x80\xa6" => "\x85",
       "\xe2\x80\xa0" => "\x86",
       "\xe2\x80\xa1" => "\x87",
       "\xcb\x86"     => "\x88",
       "\xe2\x80\xb0" => "\x89",
       "\xc5\xa0"     => "\x8a",
       "\xe2\x80\xb9" => "\x8b",
       "\xc5\x92"     => "\x8c",

       "\xc5\xbd"     => "\x8e",


       "\xe2\x80\x98" => "\x91",
       "\xe2\x80\x99" => "\x92",
       "\xe2\x80\x9c" => "\x93",
       "\xe2\x80\x9d" => "\x94",
       "\xe2\x80\xa2" => "\x95",
       "\xe2\x80\x93" => "\x96",
       "\xe2\x80\x94" => "\x97",
       "\xcb\x9c"     => "\x98",
       "\xe2\x84\xa2" => "\x99",
       "\xc5\xa1"     => "\x9a",
       "\xe2\x80\xba" => "\x9b",
       "\xc5\x93"     => "\x9c",

       "\xc5\xbe"     => "\x9e",
       "\xc5\xb8"     => "\x9f"
    );

  static function toUTF8($text){
  /**
   * Function Encoding::toUTF8
   *
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
   * 
   * It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
   *
   * It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
   *
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß
   *    are followed by any of these:  ("group B")
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB) 
   * is also a valid unicode character, and will be left unchanged.
   *
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
   *
   * @name toUTF8
   * @param string $text  Any string.
   * @return string  The same string, UTF8 encoded
   *
   */

    if(is_array($text))
    {
      foreach($text as $k => $v)
      {
        $text[$k] = self::toUTF8($v);
      }
      return $text;
    } elseif(is_string($text)) {

      $max = strlen($text);
      $buf = "";
      for($i = 0; $i < $max; $i++){
          $c1 = $text{$i};
          if($c1>="\xc0"){ //Should be converted to UTF8, if it's not UTF8 already
            $c2 = $i+1 >= $max? "\x00" : $text{$i+1};
            $c3 = $i+2 >= $max? "\x00" : $text{$i+2};
            $c4 = $i+3 >= $max? "\x00" : $text{$i+3};
              if($c1 >= "\xc0" & $c1 <= "\xdf"){ //looks like 2 bytes UTF8
                  if($c2 >= "\x80" && $c2 <= "\xbf"){ //yeah, almost sure it's UTF8 already
                      $buf .= $c1 . $c2;
                      $i++;
                  } else { //not valid UTF8.  Convert it.
                      $cc1 = (chr(ord($c1) / 64) | "\xc0");
                      $cc2 = ($c1 & "\x3f") | "\x80";
                      $buf .= $cc1 . $cc2;
                  }
              } elseif($c1 >= "\xe0" & $c1 <= "\xef"){ //looks like 3 bytes UTF8
                  if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf"){ //yeah, almost sure it's UTF8 already
                      $buf .= $c1 . $c2 . $c3;
                      $i = $i + 2;
                  } else { //not valid UTF8.  Convert it.
                      $cc1 = (chr(ord($c1) / 64) | "\xc0");
                      $cc2 = ($c1 & "\x3f") | "\x80";
                      $buf .= $cc1 . $cc2;
                  }
              } elseif($c1 >= "\xf0" & $c1 <= "\xf7"){ //looks like 4 bytes UTF8
                  if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf"){ //yeah, almost sure it's UTF8 already
                      $buf .= $c1 . $c2 . $c3;
                      $i = $i + 2;
                  } else { //not valid UTF8.  Convert it.
                      $cc1 = (chr(ord($c1) / 64) | "\xc0");
                      $cc2 = ($c1 & "\x3f") | "\x80";
                      $buf .= $cc1 . $cc2;
                  }
              } else { //doesn't look like UTF8, but should be converted
                      $cc1 = (chr(ord($c1) / 64) | "\xc0");
                      $cc2 = (($c1 & "\x3f") | "\x80");
                      $buf .= $cc1 . $cc2;
              }
          } elseif(($c1 & "\xc0") == "\x80"){ // needs conversion
                if(isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases
                    $buf .= self::$win1252ToUtf8[ord($c1)];
                } else {
                  $cc1 = (chr(ord($c1) / 64) | "\xc0");
                  $cc2 = (($c1 & "\x3f") | "\x80");
                  $buf .= $cc1 . $cc2;
                }
          } else { // it doesn't need convesion
              $buf .= $c1;
          }
      }
      return $buf;
    } else {
      return $text;
    }
  }

  static function toWin1252($text) {
    if(is_array($text)) {
      foreach($text as $k => $v) {
        $text[$k] = self::toWin1252($v);
      }
      return $text;
    } elseif(is_string($text)) {
      return utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text)));
    } else {
      return $text;
    }
  }

  static function toISO8859($text) {
    return self::toWin1252($text);
  }

  static function toLatin1($text) {
    return self::toWin1252($text);
  }

  static function fixUTF8($text){
    if(is_array($text)) {
      foreach($text as $k => $v) {
        $text[$k] = self::fixUTF8($v);
      }
      return $text;
    }

    $last = "";
    while($last <> $text){
      $last = $text;
      $text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text)));
    }
    $text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text)));
    return $text;
  }

  static function UTF8FixWin1252Chars($text){
    // If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1 
    // (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
    // See: http://en.wikipedia.org/wiki/Windows-1252

    return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text);
  }

  static function removeBOM($str=""){
    if(substr($str, 0,3) == pack("CCC",0xef,0xbb,0xbf)) {
      $str=substr($str, 3);
    }
    return $str;
  }
}
?>

For use it you need include script with this class and ue it like:

Encoding::toUtf8('Bankdrücken');

这篇关于php glob目录下的utf8的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆