`
hyw520110
  • 浏览: 211591 次
  • 性别: Icon_minigender_1
  • 来自: 杭州
社区版块
存档分类
最新评论

文件转码

    博客分类:
  • java
 
阅读更多

工程项目太多,各工程或各文件编码不统一时,可运行本工具类,把工作目录下,指定类型的文件,转换成指定的编码格式

 

源码:

import info.monitorenter.cpdetector.io.CodepageDetectorProxy;
import info.monitorenter.cpdetector.io.JChardetFacade;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * 文本文件转码
 * @author Howard.He
 */
public class TransFileCoding {

    private static final Logger logger = LoggerFactory.getLogger(TransFileCoding.class);

    public static void main(String[] args) {
        String dirPath = "E:/workspace/";
        String suffix = ".java";
        transformEncoding(dirPath, suffix, "UTF-8");
    }

    /**
     * 文件转码 (文本文件)
     * 
     * @param dirPath
     *            工程路径
     * @param suffix
     *            要转码文件的后缀(.java)
     * @param newEncoding
     *            转换的编码
     */
    public static void transformEncoding(String dirPath, String suffix, String newEncoding) {
        List<File> list = getFileList(null, dirPath, suffix);
        if (null != list && !list.isEmpty()) {
            for (File f : list) {
                String encoding = getFileCharacterEnding(f);
                newEncoding = isEmpty(newEncoding) ? getFileCharacterEnding(f) : newEncoding;
                if (!encoding.equals(newEncoding)) {
                    StringBuilder content = readContent(f, encoding);
                    logger.info("convert file encoding from {} to {}" ,new Object[]{encoding,newEncoding});
                    write(f, content, newEncoding);
                }
                else {
                    logger.debug("srcEncoding({})==dstEncoding({}) skipped f: {}", new Object[] { encoding, newEncoding, f.toString() });
                }
            }
        }
    }

    /**
     * 取文件的编码格式
     * 异常时默认UTF-8
     * 
     * @param file
     * @return
     */
    @SuppressWarnings("deprecation")
    public static String getFileCharacterEnding(File file) {
        String fileCharacterEnding = "UTF-8";
        CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance();
        detector.add(JChardetFacade.getInstance());
        try {
            Charset charset = detector.detectCodepage(file.toURL());
            if (charset != null)
                fileCharacterEnding = charset.name();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        return fileCharacterEnding;
    }

    /**
     * 获取指定路径下的特定类型的文件集
     * 
     * @param list
     * @param dirPath
     * @param suffix
     * @return
     */
    public static List<File> getFileList(List<File> list, String dirPath, String suffix) {
        File dir = new File(dirPath);
        if (dir.exists()) {
            File[] files = dir.listFiles();
            if (null == list)
                list = new ArrayList<File>();
            for (File f : files) {
                String name = f.getName();
                if (f.isDirectory() && !name.equals(".svn"))
                    getFileList(list, f.getPath(), suffix);
                else if (f.isFile() && name.endsWith(suffix))
                    list.add(f);
            }
        }
        return list;
    }

    public static StringBuilder readContent(File f, String encoding) {
        return readContent(f, encoding, "\n");
    }

    /**
     * 读取文件内容
     * 
     * @param f
     * @param encoding
     * @return
     */
    public static StringBuilder readContent(File f, String encoding, String enter) {
        StringBuilder builder = new StringBuilder();
        try {
            encoding = encoding == null ? getFileCharacterEnding(f) : encoding;
            String data = null;
            BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f), encoding));
            while ((data = br.readLine()) != null) {
                builder.append(isEmpty(enter) ? data : data + enter);
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        return builder;
    }

    /**
     * 写入文件内容
     * 
     * @param f
     * @param content
     * @param newEncoding
     */
    private static void write(File f, StringBuilder content, String newEncoding) {
        try {
            logger.debug("write file:{}",f);
            logger.debug(content.toString());
            OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(f), newEncoding);
            writer.write(content.toString());
            writer.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static boolean isEmpty(String str) {
        return null == str || "".equals(str);
    }
}

 

取文件编码格式,依赖cpdetector ,可到 http://cpdetector.sourceforge.net/  下载lib包

使用maven时,直接添加依赖信息:

 

  <dependency>
   <groupId>org.mozilla.intl</groupId>
   <artifactId>chardet</artifactId>
   <version>1.4.2</version>
   <scope>test</scope>
  </dependency>
  <dependency>
   <groupId>info.monitorenter</groupId>
   <artifactId>cpdetector</artifactId>
   <version>1.0.7</version>
   <scope>test</scope>
  </dependency>

  

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics