编程常识

gb2312转utf8的PHP代码

生活词典 changshi.cidiancn.com

阅读: 303

<?
//初始化gb2312--unicode数组对应表作为全程变量,以提高处理速度
$____global_codetable=array();
$____global_filename=pathinfo($_SERVER["SCRIPT_FILENAME"]);
$____global_filename=$____global_filename["dirname"]."/gb2312.txt";
$____global_tmp=file($____global_filename);
while(list($key,$value)=each($____global_tmp))
{
 if (strcmp($value{0},’#’)!=0)
 $____global_codetable[hexdec(substr($value,2,4))]=substr($value,9,4);
}
reset($____global_tmp);
while(list($key,$value)=each($____global_tmp))
{
 if (strcmp($value{0},’#’)!=0)
 $____global_codetable2[hexdec(substr($value,9,4))]=hexdec(substr($value,2,4));
}
unset($____global_filename);
unset($____global_tmp);


/*
将带 &#x3F8F;&#x5354;格式的文本(可以包含其它ASCII字符)转换成gb2312格式的文本;
可以用于XML编码的转换
需要注意的是,函数不改变xml中关于编码的声明
*/
function unicode2gb($un)
{
 if(!trim($un))
  return $un;
 $gb="";
 global $____global_codetable2;
 while(strlen($un)>0)
 {
  $p=strpos($un,"&#");
  if ($p===FALSE)//串中已无unicode字符
  {
   $gb.=$un;
   return $gb;
  }
  else
  {
   if ($p!=0)//串中unicode字符前缀不是第一个字符
   {
    $gb.=substr($un,0,$p);
    $un=substr($un,$p);
   }
   $p=strpos($un,";");
   if ($p===FALSE)//此前缀非unicode前缀,串中已无unicode字符
   {
         $gb.=$un;
         return $gb;
   }
   else
   {
    $code=substr($un,2,$p-2);
    $un=substr($un,$p+1);
    if (strcasecmp($code{0},"x")==0)//unicode码16进制表示
    {
     $code=hexdec(substr($code,1));
    }else
    {
     $code=intval($code);
    }
           $code=0x8080|$____global_codetable2[$code];
    $gb.=chr((($code & 0xFF00)>>8) & 0xFF);
    $gb.=chr($code & 0xFF);
   }
  }
 }
 return $gb;
}

/*
将 gb2312格式的文本(可以包含其它ASCII字符)转化为 带 &#x3F8F;&#x5354;格式的unicode文本;
可以用于XML编码的转换
需要注意的是,函数不改变xml中关于编码的声明
*/
function gb2unicode($gb)
{
   if(!trim($gb))
      return $gb;
   $utf="";
   global $____global_codetable;
   while(strlen($gb)>0)
    {
     if (ord(substr($gb,0,1))>127)
        {
         $this=substr($gb,0,2);
         $gb=substr($gb,2);
         $code=$____global_codetable[hexdec(bin2hex($this))&0x7F7F];
         $utf.="&#x".$code.";";
        }
     else
        {
         $utf.=substr($gb,0,1);
         $gb=substr($gb,1);
        }
     }
   return $utf;
}

/*
将utf8格式的文本转化为gb2312格式的文本;这与上述的unicode2gb不同,是二进制格式的转换
*/
function utf82gb($utf8)
{
   if(!trim($utf8))
      return $utf8;
   global $____global_codetable2;
   $gb="";
   while(strlen($utf8)>0)
    {
 $c=substr($utf8,0,1);
 $d=ord($c);
 if (($d&0x80) == 0)//1位
 {
  $gb.=$c;
  $utf8=substr($utf8,1);
 }
 else
 if (($d&0xC0)==0x80)//错位
 {
  $utf8=substr($utf8,1);
 }
 else
 if (($d&0xE0)==0xC0)//2位
 {
  $utf8=substr($utf8,2);
 }
 else
 if (($d&0xF0)==0xE0)//3位
 {
  $d1=ord($utf8{1}) & 0x3F;
  $d2=ord($utf8{2}) & 0x3F;
  $d=$d & 0x0F;
  $d=($d<<12) + ($d1 <<6) + $d2;
         $code=0x8080|$____global_codetable2[$d];
  $gb.=chr((($code & 0xFF00)>>8) & 0xFF);
  $gb.=chr($code & 0xFF);
  $utf8=substr($utf8,3);
 }
 else
 if (($d&0xF8)==0xF0)//4位
 {
  $d1=ord($utf8{1}) & 0x3F;
  $d2=ord($utf8{2}) & 0x3F;
  $d3=ord($utf8{3}) & 0x3F;
  $d=$d & 0x07;
  $d=($d<<18) + ($d1 <<12) + ($d2 << 6) +$d3;
  //$code=0x8080+getgb($d);
         $code=0x8080|$____global_codetable2[$d];
  $gb.=chr((($code & 0xFF00)>>8) & 0xFF);
  $gb.=chr($code & 0xFF);
  $utf8=substr($utf8,4);
 }
 else
 {
  $utf8=substr($utf8,1);
 }
    }
   return $gb;
}

/*
将gb2312格式的文本转化为utf8格式的文本;这与上述的gb2unicode不同,是二进制格式的转换
*/
function gb2utf8($gb)
{
   if(!trim($gb))
      return $gb;
   global $____global_codetable;
   $utf8="";
   while(strlen($gb)>0)
   {
 if (ord(substr($gb,0,1))>127)
 {
         $code=substr($gb,0,2);
         $gb=substr($gb,2);
         //echo "gb=$code;";
         $code=bin2hex($code);
         //echo "code=$code;";
         $code=hexdec($code)&0x7F7F;
         //echo "newcode=".dechex($code);
         $code=$____global_codetable[$code];
         //echo "unicode=$code";
         $code=hexdec($code);
         //11位:6+5
         if (($code&0x7FF)==$code)
         {
          $utf8.=chr(0xC0|((($code&0x7C0)>>6)&0x3F));
          $utf8.=chr(0x80|($code&0x3F));
         }else
         //16位:12+4
         if (($code&0xFFFF)==$code)
         {
          $utf8.=chr(0xE0|((($code&0xF000)>>12)&0x3F));
          $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));
          $utf8.=chr(0x80|($code&0x3F));
          //echo "16位==$utf8;\n";
         }
         else
         //21位:18+3
         if (($code&0x1FFFFF)==$code)
         {
          $utf8.=chr(0xF0|((($code&0x1C0000)>>18)&0x3F));
          $utf8.=chr(0x80|((($code&0x3F000)>>12)&0x3F));
          $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));
          $utf8.=chr(0x80|($code&0x3F));
         }
         /*
         else
         //26位:24+2
         if (($code&0x3FFFFFF)==$code)
         {
          $utf8.=chr(0xF8|((($code&0x3000000)>>24)&0x3F));
          $utf8.=chr(0x80|((($code&0xFC0000)>>18)&0x3F));
          $utf8.=chr(0x80|((($code&0x3F000)>>12)&0x3F));
          $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));
          $utf8.=chr(0x80|($code&0x3F));
         }
         else
         //31位:30+1
         if (($code&0x7FFFFFFF)==$code)
         {
          $utf8.=chr(0xFC|((($code&0x40000000)>>30)&0x3F));
          $utf8.=chr(0x80|((($code&0x3F000000)>>24)&0x3F));
          $utf8.=chr(0x80|((($code&0xFC0000)>>18)&0x3F));
          $utf8.=chr(0x80|((($code&0x3F000)>>12)&0x3F));
          $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));
          $utf8.=chr(0x80|($code&0x3F));
         }
         //36位
         else
         {
          //首字节全部作为前缀,无数据
          $utf8.=chr(0x80|((($code&0xC0000000)>>30)&0x3F));
          $utf8.=chr(0x80|((($code&0x3F000000)>>24)&0x3F));
          $utf8.=chr(0x80|((($code&0xFC0000)>>18)&0x3F));
          $utf8.=chr(0x80|((($code&0x3F000)>>12)&0x3F));
          $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));
          $utf8.=chr(0x80|($code&0x3F));
         }
         */
 }
 else
 {
  $utf8.=substr($gb,0,1);
  $gb=substr($gb,1);
 }
  }
  return $utf8;
}
?>

分享常识给亲友.

下一篇:怎样汉化GBA游戏 下一篇 【方向键 ( → )下一篇】

上一篇:怎么成长为一名黑客 上一篇 【方向键 ( ← )上一篇】