`
xinyoulinglei
  • 浏览: 123619 次
社区版块
存档分类
最新评论

java 中的文件读取信息

    博客分类:
  • java
阅读更多
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;

import org.apache.log4j.Logger;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.pdfbox.encryption.DecryptDocument;
import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;
import org.textmining.text.extraction.WordExtractor;


public class FileReader
{
    /**
     * Description:解析文件,返回文档内容 <br>
     * 
     * 
     * @param logger
     *            Longger
     * @param realPath
     *            String
     * @return String
     */
    
    public static String readOneFile(Logger logger, String realPath)
    {
        FileInputStream in = null;
        BufferedReader reader = null;
        
        String content = "";
        try
        {
            File file = new File(realPath);
            
            if (!file.canRead())
            {
                logger.error(MessageCode.getPageMessage("iiss.info.common.filenotread") + realPath);
                return "";
            }
            
            String suffix = realPath.substring(realPath.lastIndexOf(".") + 1, realPath.length());
            
            if ("doc".equalsIgnoreCase(suffix))
            {
                WordExtractor extractor = new WordExtractor();
                content = extractor.extractText(new FileInputStream(file));
                
                if (logger.isDebugEnabled())
                {
                    logger.debug("summary=" + content);
                }
            }//解析excel文件
            else if ("xls".equalsIgnoreCase(suffix))
            {
                StringBuffer rowData = new StringBuffer(KeyConstant.INITIAL_BUFFER);
                
                in = new FileInputStream(file);
                // 创建对Excel对象
                HSSFWorkbook workbook = new HSSFWorkbook(in);
                //获得excel的页数
                int sheetNo = workbook.getNumberOfSheets();
                
                if (sheetNo > 0)
                {
                    for (int i = 0; i < sheetNo; i++)
                    {
                        // 获取每一页对象
                        HSSFSheet sheet = workbook.getSheetAt(i);
                        
                        if (sheet != null)
                        {
                            //逐行获得内容
                            HSSFRow row = null;
                            for (int j = 0; j <= sheet.getLastRowNum(); j++)
                            {
                                try
                                {
                                    row = sheet.getRow(j);
                                }
                                catch (Exception e)
                                {
                                    row = null;
                                }
                                
                                if (row == null)
                                {
                                    continue;
                                }
                                
                                //逐个单元格获得内容
                                HSSFCell cell = null;
                                String fieldValue = null;
                                for (int k = 0; k <= row.getLastCellNum() - 1; k++)
                                {
                                    try
                                    {
                                        cell = row.getCell((short)k);
                                    }
                                    catch (Exception e)
                                    {
                                        cell = row.createCell((short)k);
                                        cell.setCellType(HSSFCell.CELL_TYPE_STRING);
                                        cell.setCellValue("");
                                    }
                                    
                                    fieldValue = POITools.getCellValue(cell);
                                    
                                    if (fieldValue != null && !"".equals(fieldValue))
                                    {
                                        rowData.append(fieldValue);
                                        rowData.append("");
                                    }
                                    
                                }
                                
                            }
                        }
                        
                    }
                }
                
                content = rowData.toString();
                
                if (logger.isDebugEnabled())
                {
                    logger.debug("summary=" + content);
                }
            }
            else if ("txt".equalsIgnoreCase(suffix))
            {
                in = new FileInputStream(file);
                reader = new BufferedReader(new InputStreamReader(in));
                
                StringBuffer sBuffer = new StringBuffer();
                String s = null;
                do
                {
                    s = reader.readLine();
                    if (s != null)
                    {
                        sBuffer.append(s);
                    }
                } while (s != null);
                
                content = sBuffer.toString();
                
                if (logger.isDebugEnabled())
                {
                    logger.debug("summary=" + content);
                }
            }
            else if ("html".equalsIgnoreCase(suffix) || "htm".equalsIgnoreCase(suffix))
            {
                HTMLParser parser = new HTMLParser(file);
                content = parser.getContent();
            }
            else if ("pdf".equalsIgnoreCase(suffix))
            {
                PDDocument pdf = null;
                try
                {
                    PDFParser parser = new PDFParser(new FileInputStream(file));
                    parser.parse();
                    pdf = parser.getPDDocument();
                    if (pdf.isEncrypted())
                    {
                        DecryptDocument decryptor = new DecryptDocument(pdf);
                        decryptor.decryptDocument("");
                    }
                    
                    PDFTextStripper stripper = new PDFTextStripper();
                    content = stripper.getText(pdf);
                    if (logger.isDebugEnabled())
                    {
                        logger.debug("summary=" + content);
                    }
                }
                catch (Exception e)
                {
                    logger.error(e, e);
                }
                catch (OutOfMemoryError t)
                {
                    logger.error(t, t);
                }
                finally
                {
                    try
                    {
                        if (pdf != null)
                        {
                            pdf.close();
                        }
                        
                    }
                    catch (IOException e)
                    {
                        logger.error(MessageCode.getPageMessage("iiss.info.common.readfilefail") + realPath);
                    }
                }
            }
            else
            {
                content = " ";
            }
        }
        catch (FileNotFoundException e)
        {
            logger.error(MessageCode.getPageMessage("iiss.info.common.filenotfound") + realPath);
        }
        catch (IOException e)
        {
            logger.error(MessageCode.getPageMessage("iiss.info.common.readfilefail") + realPath);
        }
        catch (InterruptedException e)
        {
            logger.error(MessageCode.getPageMessage("iiss.info.common.readhtmlfilefail") + realPath);
        }
        catch (Exception e)
        {
            logger.error(MessageCode.getPageMessage("iiss.info.common.parsefilefail") + e, e);
        }
        finally
        {
            try
            {
                if (in != null)
                {
                    in.close();
                }
                
            }
            catch (IOException e)
            {
                logger.error(e, e);
            }
            
            try
            {
                if (reader != null)
                {
                    reader.close();
                }
                
            }
            catch (IOException e)
            {
                logger.error(e, e);
            }
        }
        return content;
    }
    
}
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics