`
hcmfys
  • 浏览: 349561 次
  • 性别: Icon_minigender_1
  • 来自: 广州
社区版块
存档分类
最新评论

php 转码类

    博客分类:
  • php
阅读更多
Charset::convert(string input,string incharset,string outcharset)

Charset::unescape(string escaped,string outcharset)

Charset::escape(string string,string incharset)

Charset::jsondecode(string encoded,string outcharset)

Charset::jsonencode(mix value,string incharset)

Charset::pinYin(string chinese,string incharset)

PHP代码


<?php    
/*   
 * 编码转换     
 * 说明:   
 * jsonencode 有参考Services_JSON 但有很大区别 此处可以将utf-8 gb2312 big5都可以jsonencode   
 * jsondecode 自己原创 该算法是模拟目录读取的方法    
 *   可以将json中中文unicode编码unescape为gbk big5 utf8    
 */   
define('TABLE_DIR','./table');    
define('USEEXISTS',FALSE);//是否使用系统存在的php内置编码转换函数    
//其实php内置编码转换函数转换的不够好    
class Charset{    
        
    private static $target_lang,$source_lang;    
    protected static $string = '';    
    protected static $table = NULL;   

PHP代码


/**   
 * 编码互换   
 *   
 * @param string $source   
 * @param string $source_lang  输入编码 'utf-8' or 'gb2312' or 'big5'   
 * @param string $target_lang  输出编码 'utf-8' or 'gb2312' or 'big5'   
 * @return string   
 */   
static public function convert($source,$source_lang,$target_lang='utf-8'){    
    if($source_lang != ''){    
        $source_lang = str_replace(    
            array('gbk','utf8','big-5'),    
            array('gb2312','utf-8','big5'),    
            strtolower($source_lang)    
        );    
    }    
    if($target_lang != ''){    
        $target_lang = str_replace(    
            array('gbk','utf8','big-5'),    
            array('gb2312','utf-8','big5'),    
            strtolower($target_lang)    
        );    
    }    
    if($source_lang == $target_lang||$source == ''){    
        return $source;    
    }    
    $index = $source_lang."_".$target_lang;    
    if(USEEXISTS&&!in_array($index,array('gb2312_big5','big5_gb2312'))){//繁简互换并不是交换字符集编码     
        if(function_exists('iconv')){    
            return iconv($source_lang,$target_lang,$source);    
        }    
        if(function_exists('mb_convert_encoding')){    
            return mb_convert_encoding($source,$target_lang,$source_lang);    
        }    
    }    
    $table = self::loadtable($index);    
    if(!$table){    
        return $source;    
    }    
    self::$string = $source;    
    self::$source_lang = $source_lang;    
    self::$target_lang = $target_lang;    
    if($source_lang=='gb2312'||$source_lang=='big5'){    
        if($target_lang=='utf-8'){    
            self::$table = $table;    
            return self::CHS2UTF8();    
        }    
        if($target_lang=='gb2312'){    
            self::$table = array_flip($table);    
        }else{    
            self::$table = $table;    
        }    
        return self::BIG2GB();    
    }elseif(self::$source_lang=='utf-8'){    
        self::$table = array_flip($table);    
        return self::UTF82CHS();    
    }    
    return NULL;    
}   

PHP代码


/**   
 * js 中的unescape功能   
 *    
 * @param string $str       源字符串   
 * @param string $charset   目标字符串编码 'utf-8' or 'gb2312' or 'big5'   
 * @return string   
 */   
static public function unescape($str,$charset='utf-8'){    
    $charset = strtolower($charset);    
    self::$target_lang = str_replace(    
        array('gbk','utf8','big-5'),    
        array('gb2312','utf-8','big5'),    
        $charset   
    );    
    if(self::$target_lang!='utf-8'&&    
        !(USEEXISTS&&(function_exists('mb_convert_encoding')||function_exists('iconv')))    
    ){    
        self::$table = array_flip(self::loadtable('unescapeto'.$charset));    
    }    
    return preg_replace_callback('/[\\\\|%]u(\w{4})/iU',array('Charset','descape'),$str);    
}   

PHP代码


/**   
 * js 中的escape功能   
 *    
 * @param string $str       源字符串   
 * @param string $charset   源字符串编码 'utf-8' or 'gb2312' or 'big5'   
 * @return string   
 */   
static public function escape($str,$charset='utf-8'){    
    $escaped = '';    
    $charset = strtolower($charset);    
    $charset = str_replace(    
        array('gbk','big-5','utf8'),    
        array('gb2312','big5','utf-8'),    
        $charset   
    );    
    $ulen = strlen($str);    
    if($charset!='utf-8'){    
        $table = self::loadtable($charset.'escape');    
        for($i=0;$i<$ulen;$i++){    
            $c = $str[$i];    
            if(ord($c)>0x80){    
                $bin = $c.$str[$i+1];    
                $i += 1;    
                $escaped .= sprintf('\u%04X',$table[hexdec(bin2hex($bin))]);    
                // bin2hex 返回的是string 必须再转化    
            }else{    
                $escaped .= $c;    
            }    
        }    
        return $escaped;    
    }else{    
        for($i=0;$i<$ulen;$i++){    
            $c = $str[$i];    
            $char = ord($c);    
            switch ($char>>4){    
                case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:    
                    $escaped .= $c;    
                    break;    
                case 12: case 13:    
                    $char = ((($char&0x1F)<<6)|(ord($str[++$i])&0x3F));    
                    $escaped .= sprintf('\u%04X',$char);    
                    break;    
                case 14:    
                    $char = ((($char&0x0F)<<12)|((ord($str[++$i])&0x3F)<<6)|(ord($str[++$i])&0x3F));    
                    $escaped .= sprintf('\u%04X',$char);    
                    break;    
                default:$escaped .= $c;break;    
            }    
            /*$cb = decbin(ord($c));   
            if(strlen($cb)==8){   
                $csize = strpos(decbin(ord($cb)),"0");   
                for($j=0;$j < $csize;$j++){   
                    $i++;   
                    $c .= $str[$i];   
                }   
                $escaped .= sprintf('\u%04X',self::utf82u($c));   
            }else{   
                $escaped .= $c;   
            }*/   
        }    
        return $escaped;    
    }    
}   

PHP代码


/**   
 * json_decode   
 *    
 * @param string $encoded   源字符串   
 * @param string $charset   目标字符串编码 'utf-8' or 'gb2312' or 'big5'   
 * @return string/array/boolean/null   
 */     
static public function jsondecode($encoded,$charset='utf-8'){    
    $encoded = preg_replace('/([\t\b\f\n\r ])*/s','',$encoded);//eat whitespace    
    self::$target_lang = $charset;    
    $c = self::cursor($encoded);    
    switch($c){    
        case '{':return self::parseArray($encoded);    
        case '[':return self::parseArray($encoded,FALSE);    
        case '"':return self::string_find($encoded);    
        case 't':return TRUE;    
        case 'f':return FALSE;    
        case 'n':return NULL;    
        default:return self::num_read($c.$encoded);    
    }    
}   

PHP代码


/**   
 * json_encode   
 *    
 * @param mixvar $var       多类型变量   
 * @param string $charset   默认'utf-8'源变量中字符编码 'utf-8' or 'gb2312' or 'big5'   
 * @return string   
 */   
static public function jsonencode($var,$charset=NULL){    
    if(is_null($charset)){    
        $charset = self::$source_lang;    
    }else{    
        self::$source_lang = $charset;    
    }    
    if(!$charset){    
        $charset = 'utf-8';    
    }    
    switch (gettype($var)){    
        case 'boolean':    
            return $var ? 'true' : 'false';    
        case 'NULL':    
            return 'null';    
        case 'integer':    
            return (int) $var;    
        case 'double':    
        case 'float':    
            return (float) $var;    
        case 'string':    
            $var = strtr($var,array("\r" => '\\r',"\n" => '\\n',"\t" => '\\t',"\b"  => '\\b',    
                "\f" => '\\f','\\' => '\\\\','"' => '\"',"\x08" => '\b',"\x0c" => '\f')   
            );   
            $var = self::escape($var,$charset);   
            return '"'.$var.'"';   
        case 'array':   
            return self::encodearray($var);   
        case 'object':   
            $var = get_object_vars($var);   
            return self::encodearray($var);   
        default:return 'null';    
    }    
}   

PHP代码


    /**   
     * 汉字拼音   
     *   
     * @param string $str   
     * @param string $charset     输入编码 'utf-8' or 'gb2312' or 'big5'   
     * @return string   
     */   
    static public function PinYin($str,$charset='utf-8'){    
        if($charset!='gb2312'){    
            $str = self::convert($str,$charset,'gb2312');    
        }    
        self::$table = include(TABLE_DIR.'./pinyin.php');    
        $gblen = strlen($str);    
        $pin = '';    
        for($i=0;$i<$gblen;$i++){    
            $c = ord($str[$i]);    
            if($c > 0x00A0){    
                $index = 0x10000-($c*0x0100 + ord($str[++$i]));    
                $pin .= self::getPinYin($index);    
            }else{    
                $pin .= $str[$i];    
            }    
        }    
        return trim($pin);    
    }    
    static protected function getPinYin($index){    
        if($index==0x1534) return 'yan';    
        if($index>0x4F5F||$index<0x2807){    
            return '';    
        }    
        if(!self::$table){    
            return '';    
        }    
        while(true){    
            if(!isset(self::$table[$index])){    
                $index += 1;    
                if($index > 0x4F5F){    
                    return '';    
                }    
                continue;    
            }else{    
                return self::$table[$index];    
            }    
        }    
        return '';    
    }    
    static protected function loadtable($index){    
        static $table = array();    
        $tabIndex = '';    
        switch ($index) {    
            case 'gb2312_utf-8':    
            case 'utf-8_gb2312':    
            case 'gb2312escape':    
            case 'unescapetogb2312':    
                $tabIndex = 'gbkutf';    
                break;    
            case 'big5_utf-8':    
            case 'utf-8_big5':    
            case 'big5escape':    
            case 'unescapetobig5':    
                $tabIndex = 'big5utf';    
                break;    
            case 'gb2312_big5':    
            case 'big5_gb2312':    
                $tabIndex = 'gbkbig5';    
                break;    
            default:return NULL;    
        }    
        if(!isset($table[$tabIndex])){    
            $table[$tabIndex] = @include(TABLE_DIR."/".$tabIndex.".php");    
        }    
        return $table[$tabIndex];    
    }    
    static protected function descape($str){    
        $dec = hexdec($str[1]);    
        $str = self::u2utf8($dec);    
        if(self::$target_lang == 'utf-8'){    
            return $str;    
        }    
        if(USEEXISTS){    
            if(function_exists('iconv')){    
                return iconv('utf-8',self::$target_lang,$str);    
            }    
            if(function_exists('mb_convert_encoding')){    
                return mb_convert_encoding($str,self::$target_lang,'utf-8');    
            }    
        }    
        if(isset(self::$table[$dec])){    
            return self::hex2bin(dechex(self::$table[$dec]));    
        }else{    
            return "&#".$dec.";";    
        }    
    }    
    static protected function parseArray($str,$index=TRUE){    
        $result = array();    
        $fp = self::array_open($index,$str);//模拟打开目录    
        while($fp){    
            $type = '';    
            $key = '';    
            $value = self::array_read($fp,$type,$index,$key);//模拟读取目录    
            if($type=='{'){    
                if($index){    
                    $result[$key] = self::parseArray($fp);//递归    
                }else{    
                    $result[] = self::parseArray($fp);    
                }    
            }elseif($type=='['){    
                if($index){    
                    $result[$key] = self::parseArray($fp,FALSE);    
                }else{    
                    $result[] = self::parseArray($fp,FALSE);    
                }    
            }else{    
                if($index){    
                    $result[$key] = $value;    
                }else{    
                    $result[] = $value;    
                }    
            }    
        }    
        return $result;    
    }    
    static protected function array_open($index=TRUE,$string){    
        if($index){    
            $end = '}';    
            $new = '{';    
        }else{    
            $end = ']';    
            $new = '[';    
        }    
        $endpos = self::getpos($string,$end);    
        //用getpos获得$endpos 因为要判断{,},[,]是不是在字符串里面    
        $newpos = self::getpos($string,$new);    
        $fp = '';    
        if($endpos===FALSE){    
            return 'null';    
        }elseif($newpos===FALSE||$newpos>$endpos){    
            $fp = substr($string,0,$endpos);    
            $string = substr($string,$endpos+1);    
            return $fp;    
        }else{// 条件'if($newpos<$endpos)'可以不要了 找到与自己匹对结束符    
            $i = 1;    
            while($i){    
                $endpos = self::getpos($string,$end,$endpos+1);    
                $newpos = self::getpos($string,$new,$endpos+1);    
                if($endpos===FALSE){    
                    return 'null';    
                }elseif($newpos===FALSE){    
                    $i-=1;    
                    continue;    
                }elseif($newpos<$endpos){    
                    $i+=1;    
                    continue;    
                }else{    
                    continue;    
                }    
            }    
            $fp = substr($string,0,$endpos);    
            $string = substr($string,$endpos+1);    
            return $fp;    
        }    
    }    
    static protected function getpos($string,$sign,$offset=0){    
        /**   
         * 判断是否在字符串里面原理:   
         * 取得$offset到$pos($sign)位置之间字符串中'"'个数   
         * 如果为奇数说明在字符串里面 否则在字符串外面   
         */   
        $pos = strpos($string,$sign,$offset);    
        if($pos===FALSE){    
            return FALSE;    
        }    
        $str = substr($string,$offset,$pos-$offset);    
        $arr = array();    
        preg_match_all('/"/',str_replace('\"','',$str),$arr);    
        $in = count($arr[0])%2;    
        if(!$in){    
            return $pos;    
        }    
        do{    
            $next = strpos($string,$sign,$pos+1);    
            if($next===FALSE){    
                return FALSE;    
            }    
            $str = substr($string,$pos,$next-$pos);    
            $arr = array();    
            preg_match_all('/"/',str_replace('\"','',$str),$arr);    
            $in = !(count($arr[0])%2);    
            $pos = $next;    
        }while($in);    
        return $pos;    
    }    
    static protected function array_read($fp,$type,$index=TRUE,$key=null){    
        if($fp[0]==','){    
            self::cursor($fp);//跳过','    
        }    
        if($index){//有索引的数组     
            self::cursor($fp);//跳过 '"'合法    
            $key = self::string_find($fp);//读取索引值    
            self::cursor($fp);//跳过':'    
        }    
        $c = self::cursor($fp);    
        switch($c){    
            case '{':    
                $type='{';    
                return NULL;    
            case '[':    
                $type='[';    
                return NULL;    
            case '"':    
                $rs = self::string_find($fp);    
                $s = self::cursor($fp);//跳过','or '}' or ']' 要求合法    
                if(!($s==','||$s==null)){    
                    die('parse error1!');    
                }    
                return $rs;    
            case 't':    
                if(self::cursor($fp,3)=='rue'){//跳过'rue'    
                    $s = self::cursor($fp);//跳过','or '}' or ']' 要求合法    
                    if(!($s==','||$s==null)){    
                        die("parse error$s!");   
                    }   
                    return TRUE;   
                }else{   
                    die('parse error3!');   
                }   
            case 'f':   
                if(self::cursor($fp,4)=='alse'){   
                    $s = self::cursor($fp);//跳过','or '}' or ']' 要求合法   
                    if(!($s==','||$s==null)){   
                        die('parse error4!');   
                    }   
                    return FALSE;   
                }else{   
                    die('parse error5!');   
                }   
            case 'n':   
                if(self::cursor($fp,3)=='ull'){//跳过'ull'   
                    $s = self::cursor($fp);//跳过','or '}' or ']' 要求合法   
                    if(!($s==','||$s==null)){   
                        die('parse error6!');   
                    }   
                    return NULL;   
                }   
            default:   
                $pos = strpos($fp,',');   
                if($pos===FALSE){   
                    $num = substr($fp,0);   
                    $fp = '';   
                }else{   
                    $num = substr($fp,0,$pos);   
                    $fp = substr($fp,$pos+1);   
                }   
                return self::num_read($c.$num);   
        }   
    }   
    static protected function string_find(&$str){   
        $end = strpos($str,'"',0);    
        while($str[$end-1]=='\\'){   
            $end = strpos($str,'"',$end+1);   
            if($end===FALSE){   
                return 'null';   
            }   
        }   
        $escaped = strtr(   
            rtrim(self::cursor($str,$end+1),'"'),   
            array('\\"' => '"','\\\\'=> '\\','\\/'=> '/','\\b' => chr(8),   
                '\\f'=>chr(12),'\\n'=>chr(10),'\\r'=> chr(13),   
                '\\t'=>chr(9),'\\u'=>'%u'   
            )   
        );   
        return self::unescape($escaped,self::$target_lang);   
    }   
    static protected function num_read($str){   
        $matches = array();   
        if (preg_match('/-?([0-9])*(\.[0-9]*)?((e|E)((-|\+)?)[0-9]+)?/s',$str,$matches)){   
            $num = $matches[0];   
            $val   = intval($num);   
            $fval  = floatval($num);   
            $value = $val?$val:$fval;   
            return $value;   
        }else{   
            return NULL;   
        }   
    }   
    static protected function cursor(&$str,$shift=1){   
        $get = substr($str,0,$shift);   
        $str = substr($str,$shift);   
        return $get;   
    }   
    static protected function encodearray($array){   
        if(!$array){   
            return 'null';   
        }   
        if((array_keys($array)!==range(0,sizeof($array)- 1))){   
            $rs = '';   
            foreach($array as $key=>$value){   
                $rs .= ','.self::jsonencode(strval($key)).':';   
                if(is_array($value)){   
                    $rs .= self::encodearray($value);   
                }else{   
                    $rs .= self::jsonencode($value);   
                }   
            }   
            $rs = '{'.ltrim($rs,',').'}';   
            return $rs;   
        }else{   
            $rs = '';   
            foreach($array as $value){   
                if(is_array($value)){   
                    $rs .= ','.self::encodearray($value);   
                }else{   
                    $rs .= ','.self::jsonencode($value);   
                }   
            }   
            $rs = '['.ltrim($rs,',').']';   
            return $rs;   
        }   
    }   
    static protected function CHS2UTF8(){   
        $utf8 = "";   
        while(self::$string){   
            if (ord(self::$string[0]) > 0x80){   
                $bin = substr(self::$string,0,2);   
                $utf8 .= self::u2utf8(self::$table[hexdec(bin2hex($bin))]);   
                self::$string = substr(self::$string,2);   
            }else{   
                $utf8 .= self::$string[0];   
                self::$string = substr(self::$string,1);   
            }   
        }   
        return $utf8;   
    }   
    static protected function UTF82CHS(){   
        $chs  = "";   
        $ulen = strlen(self::$string);   
        for($i=0;$i<$ulen;$i++){   
            $c = self::$string[$i];   
            $char = ord($c);   
            switch ($char>>4){   
                case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:   
                    $chs .= $c;   
                    break;   
                case 12: case 13:   
                    $char = self::$table[(($char&0x1F)<<6)|(ord(self::$string[++$i])&0x3F)];   
                    $chs .= self::hex2bin(dechex($char));   
                    break;   
                case 14:   
                    $char = self::$table[   
                        (($char&0x0F)<<12)   
                        |((ord(self::$string[++$i])&0x3F)<<6)   
                        |(ord(self::$string[++$i])& 0x3F)   
                    ];   
                    $chs .= self::hex2bin(dechex($char));   
                    break;   
                default:$chs .= $c;break;   
            }   
        }   
        /*for($i=0;$i<$ulen;$i++){   
            $c = self::$string[$i];   
            $cb = decbin(ord($c));   
            if(strlen($cb)==8){   
                $csize = strpos(decbin(ord($cb)),"0");   
                for($j=0;$j < $csize;$j++){   
                    $i++;   
                    $c .= self::$string[$i];   
                }   
                $c = self::utf82u($c);   
                if(isset(self::$table[$c])){   
                    $chs .= self::hex2bin(dechex(self::$table[$c]));   
                }else{    
                    $chs .= "&#".$c.";";   
                }   
            }else{   
                $chs .= $c;   
            }   
        }*/   
        return trim($chs);   
    }   
    static protected function BIG2GB(){   
        $ret = '';   
        while(self::$string){   
            if(ord(self::$string[0]) > 0x80){   
                $index = hexdec(bin2hex(self::$string[0].self::$string[1]));   
                $value = self::$table[$index];   
                $ret .= self::hex2bin(dechex($value));   
                self::$string = substr(self::$string,2);   
            }else{   
                $ret .= self::$string[0];   
                self::$string = substr(self::$string,1);   
            }   
        }   
        return $ret;   
    }   
    static protected function u2utf8($c){   
        $str = '';   
        if ($c < 0x80){   
            $str.= chr($c);   
        }elseif($c < 0x800){   
            $str.= chr(0xC0 | $c>>6);   
            $str.= chr(0x80 | $c & 0x3F);   
        }elseif($c < 0x10000){   
            $str.= chr(0xE0 | $c>>12);   
            $str.= chr(0x80 | $c>>6 & 0x3F);   
            $str.= chr(0x80 | $c & 0x3F);   
        }elseif($c < 0x200000){   
            $str.= chr(0xF0 | $c>>18);   
            $str.= chr(0x80 | $c>>12 & 0x3F);   
            $str.= chr(0x80 | $c>>6 & 0x3F);   
            $str.= chr(0x80 | $c & 0x3F);   
        }   
        return $str;   
    }   
    static protected function utf82u($c){   
        switch(strlen($c)) {   
            case 1:   
                return ord($c);   
                break;   
            case 2:   
                $n  = (ord($c[0]) & 0x3f) << 6;   
                $n += ord($c[1]) & 0x3f;   
                return $n;   
                break;   
            case 3:   
                $n  = (ord($c[0]) & 0x1f) << 12;   
                $n += (ord($c[1]) & 0x3f) << 6;   
                $n += ord($c[2]) & 0x3f;   
                return $n;   
                break;   
            case 4:   
                $n  = (ord($c[0]) & 0x0f) << 18;   
                $n += (ord($c[1]) & 0x3f) << 12;   
                $n += (ord($c[2]) & 0x3f) << 6;   
                $n += ord($c[3]) & 0x3f;   
                return $n;   
                break;   
            default:return '';break;   
        }   
    }   
    static protected function hex2bin($hexdata){   
        $bindata = '';    
        for ($i = 0, $count = strlen($hexdata); $i < $count; $i += 2){    
            $bindata .= chr(hexdec($hexdata[$i].$hexdata[$i + 1]));    
        }    
        return $bindata;    
    }    
}

 

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics