`
dlcoco9999
  • 浏览: 37354 次
  • 性别: Icon_minigender_1
  • 来自: 福州
社区版块
存档分类
最新评论

xmltojson中遇到的CDATA变空以及json_encode成unicode的解决方法

    博客分类:
  • php
阅读更多
function xml_to_json($source) {
if(is_file($source)){ //传的是文件,还是xml的string的判断
$xml_array=simplexml_load_file($source);
}else{
$source =uncdata($source) ;

$xml_array=simplexml_load_string($source);

}
$json = json_encode($xml_array); //php5,以及以上,如果是更早版本,请查看JSON.php
return decodeUnicode($json);
}
//处理CDATA
function uncdata($xml)
    {
        // States:
        //
        //     'out'
        //     '<'
        //     '<!'
        //     '<!['
        //     '<![C'
        //     '<![CD'
        //     '<![CDAT'
        //     '<![CDATA'
        //     'in'
        //     ']'
        //     ']]'
        //
        // (Yes, the states a represented by strings.)
        //

        $state = 'out';

        $a = str_split($xml);

        $new_xml = '';

        foreach ($a AS $k => $v) {

            // Deal with "state".
            switch ( $state ) {
                case 'out':
                    if ( '<' == $v ) {
                        $state = $v;
                    } else {
                        $new_xml .= $v;
                    }
                break;

                case '<':
                    if ( '!' == $v  ) {
                        $state = $state . $v;
                    } else {
                        $new_xml .= $state . $v;
                        $state = 'out';
                    }
                break;

                 case '<!':
                    if ( '[' == $v  ) {
                        $state = $state . $v;
                    } else {
                        $new_xml .= $state . $v;
                        $state = 'out';
                    }
                break;

                case '<![':
                    if ( 'C' == $v  ) {
                        $state = $state . $v;
                    } else {
                        $new_xml .= $state . $v;
                        $state = 'out';
                    }
                break;

                case '<![C':
                    if ( 'D' == $v  ) {
                        $state = $state . $v;
                    } else {
                        $new_xml .= $state . $v;
                        $state = 'out';
                    }
                break;

                case '<![CD':
                    if ( 'A' == $v  ) {
                        $state = $state . $v;
                    } else {
                        $new_xml .= $state . $v;
                        $state = 'out';
                    }
                break;

                case '<![CDA':
                    if ( 'T' == $v  ) {
                        $state = $state . $v;
                    } else {
                        $new_xml .= $state . $v;
                        $state = 'out';
                    }
                break;

                case '<![CDAT':
                    if ( 'A' == $v  ) {
                        $state = $state . $v;
                    } else {
                        $new_xml .= $state . $v;
                        $state = 'out';
                    }
                break;

                case '<![CDATA':
                    if ( '[' == $v  ) {


                        $cdata = '';
                        $state = 'in';
                    } else {
                        $new_xml .= $state . $v;
                        $state = 'out';
                    }
                break;

                case 'in':
                    if ( ']' == $v ) {
                        $state = $v;
                    } else {
                        $cdata .= $v;
                    }
                break;

                case ']':
                    if (  ']' == $v  ) {
                        $state = $state . $v;
                    } else {
                        $cdata .= $state . $v;
                        $state = 'in';
                    }
                break;

                case ']]':
                    if (  '>' == $v  ) {
                        $new_xml .= str_replace('>','&gt;',
                                    str_replace('>','&lt;',
                                    str_replace('"','&quot;',
                                    str_replace('&','&amp;',
                                    $cdata))));
                        $state = 'out';
                    } else {
                        $cdata .= $state . $v;
                        $state = 'in';
                    }
                break;
            } // switch

        }

        //
        // Return.
        //
            return $new_xml;

    }
//UNICODE转UTF-8
function decodeUnicode($str)
{
    return preg_replace_callback('/\\\\u([0-9a-f]{4})/i',
        create_function(
            '$matches',
            'return mb_convert_encoding(pack("H*", $matches[1]), "UTF-8", "UCS-2BE");'
        ),
        $str);
}
分享到:
评论
1 楼 ngxiaoyi 2013-09-22  
 $new_xml .= str_replace('>','&gt;', 
                                    str_replace('>','&lt;', 
                                    str_replace('"','&quot;', 
                                    str_replace('&','&amp;', 
                                    $cdata)))); 


最后的应为'<'转为'&lt;'

相关推荐

Global site tag (gtag.js) - Google Analytics