`

Iterate over rows and cells(EXCEL文件的读取)

阅读更多
package cn.com.songjy.test.excel;

import java.io.IOException;
import java.text.NumberFormat;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.ss.util.CellReference;

public class ReadLine {

	private Log log = LogFactory.getLog(ReadLine.class);

	/**
	 * 
	 * @param excel_file_path
	 *            --excel文件的存储路径
	 * @throws IOException
	 * @throws InvalidFormatException
	 */
	public void read(String excel_file_path) throws IOException, InvalidFormatException {

		Workbook wb = WorkbookFactory.create(new java.io.File(excel_file_path));

		Sheet sheet = wb.getSheetAt(0);

		/* 定义最小读取的行数 */
		int rowStart = Math.min(0, sheet.getFirstRowNum());

		/* 定义最大读取的行数 */
		int rowEnd = Math.max(0, sheet.getLastRowNum());

		for (int rowNum = rowStart; rowNum <= rowEnd; rowNum++) {

			Row row = sheet.getRow(rowNum);

			/* 假如当前为空行,跳过当前行继续读取下一行数据 */
			if (row == null)
				continue;

			/* 定义当前行的最大读取列 */
			int lastColumn = Math.max(row.getLastCellNum(), 0);

			NumberFormat df = NumberFormat.getNumberInstance();
			// df.setMaximumFractionDigits(2);//保留小数点后两位
			// df.setMinimumFractionDigits(2);//如果不这么做,那么当value的值是100.00的时候返回100
			df.setGroupingUsed(false);// 去掉千分位,如果想输出的格式用逗号隔开,可以设置成true

			for (int cn = 0; cn < lastColumn; cn++) {
				Cell cell = row.getCell(cn, Row.RETURN_BLANK_AS_NULL);

				/* 假如当前为空格,跳过当前格继续读取下一格的数据 */
				if (null == cell)
					continue;

				CellReference cellRef = new CellReference(row.getRowNum(), cell.getColumnIndex());

				log.info(cellRef.formatAsString());
				log.info(" - ");

				switch (cell.getCellType()) {
				case Cell.CELL_TYPE_STRING:
					log.info(cell.getRichStringCellValue().getString());
					break;
				case Cell.CELL_TYPE_NUMERIC:
					if (DateUtil.isCellDateFormatted(cell)) {
						log.info(cell.getDateCellValue());
					} else {
						log.info(df.format(cell.getNumericCellValue()));
					}
					break;
				case Cell.CELL_TYPE_BOOLEAN:
					log.info(cell.getBooleanCellValue());
					break;
				case Cell.CELL_TYPE_FORMULA:
					log.info(cell.getCellFormula());
					break;
				default:
					log.info("----");
				}

			}
		}
	}

	public static void main(String[] args) {
		try {
			new ReadLine().read("E:\\a.xlsx");
		} catch (IOException e) {
			e.printStackTrace();
		} catch (InvalidFormatException e) {
			e.printStackTrace();
		}
	}
}


该程序依赖jar包

poi-3.9.jar
poi-ooxml-3.9.jar
poi-ooxml-schemas-3.9.jar
commons-logging-1.1.3.jar
dom4j-1.6.jar
xmlbeans-2.3.0.jar

Java 正则表达式解析csv文件
POI读取大数据量的Excel文件
Java读取大数据量07Excel的方法(POI),如下:
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

public class ExcelUtil extends DefaultHandler {

	private SharedStringsTable sst;
	private String lastContents;
	private boolean nextIsString;

	private int sheetIndex = -1;
	private List<String> rowlist = new ArrayList<String>();
	private int curRow = 0;
	private int curCol = 0;

	/**
	 * 读取第一个工作簿的入口方法
	 * 
	 * @param path
	 */
	public void readOneSheet(String path) throws Exception {
		OPCPackage pkg = OPCPackage.open(path);
		XSSFReader r = new XSSFReader(pkg);
		SharedStringsTable sst = r.getSharedStringsTable();

		XMLReader parser = fetchSheetParser(sst);

		InputStream sheet = r.getSheet("rId1");

		InputSource sheetSource = new InputSource(sheet);
		parser.parse(sheetSource);

		sheet.close();
	}

	/**
	 * 读取所有工作簿的入口方法
	 * 
	 * @param path
	 * @throws Exception
	 */
	public void process(String path) throws Exception {
		OPCPackage pkg = OPCPackage.open(path);
		XSSFReader r = new XSSFReader(pkg);
		SharedStringsTable sst = r.getSharedStringsTable();

		XMLReader parser = fetchSheetParser(sst);

		Iterator<InputStream> sheets = r.getSheetsData();
		while (sheets.hasNext()) {
			curRow = 0;
			sheetIndex++;
			InputStream sheet = sheets.next();
			InputSource sheetSource = new InputSource(sheet);
			parser.parse(sheetSource);
			sheet.close();
		}
	}

	/**
	 * 该方法自动被调用,每读一行调用一次,在方法中写自己的业务逻辑即可
	 * 
	 * @param sheetIndex
	 *            工作簿序号
	 * @param curRow
	 *            处理到第几行
	 * @param rowList
	 *            当前数据行的数据集合
	 */
	public void optRow(int sheetIndex, int curRow, List<String> rowList) {
		String temp = "";
		for (String str : rowList) {
			temp += str + "_";
		}
		System.out.println(temp);
	}

	public XMLReader fetchSheetParser(SharedStringsTable sst)
			throws SAXException {
		XMLReader parser = XMLReaderFactory
				.createXMLReader("org.apache.xerces.parsers.SAXParser");
		this.sst = sst;
		parser.setContentHandler(this);
		return parser;
	}

	public void startElement(String uri, String localName, String name,
			Attributes attributes) throws SAXException {
		// c => 单元格
		if (name.equals("c")) {
			// 如果下一个元素是 SST 的索引,则将nextIsString标记为true
			String cellType = attributes.getValue("t");
			if (cellType != null && cellType.equals("s")) {
				nextIsString = true;
			} else {
				nextIsString = false;
			}
		}
		// 置空
		lastContents = "";
	}

	public void endElement(String uri, String localName, String name)
			throws SAXException {
		// 根据SST的索引值的到单元格的真正要存储的字符串
		// 这时characters()方法可能会被调用多次
		if (nextIsString) {
			try {
				int idx = Integer.parseInt(lastContents);
				lastContents = new XSSFRichTextString(sst.getEntryAt(idx))
						.toString();
			} catch (Exception e) {

			}
		}

		// v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引
		// 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
		if (name.equals("v")) {
			String value = lastContents.trim();
			value = value.equals("") ? " " : value;
			rowlist.add(curCol, value);
			curCol++;
		} else {
			// 如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法
			if (name.equals("row")) {
				optRow(sheetIndex, curRow, rowlist);
				rowlist.clear();
				curRow++;
				curCol = 0;
			}
		}
	}

	public void characters(char[] ch, int start, int length)
			throws SAXException {
		// 得到单元格内容的值
		lastContents += new String(ch, start, length);
	}

}

j​a​v​a​向​e​x​c​e​l​ ​写​入​海​量​数​据​内​存​溢​出​问​题​的​解​决,如下:
import java.io.FileOutputStream;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

public class Test {

	/*** @param args */
	public static void main(String[] args) {
		if (args[0].equals("hssf")) {
			hssfTest();
		}
		if (args[0].equals("sxssf")) {
			sxssfTest();
		}
	}

	/* 不会内存溢出,最后生成一个大概40M的文件 */
	public static void sxssfTest() {
		Workbook wb = new SXSSFWorkbook(100); // keep 100 rows in memory,//
												// exceeding rows will be//
												// flushed to disk
		Sheet sh = wb.createSheet();
		int rownum = 0;
		try {
			while (true) {
				Row row = sh.createRow(rownum);
				for (int cellnum = 0; cellnum < 10; cellnum++) {
					Cell cell = row.createCell(cellnum);
					String address = new CellReference(cell).formatAsString();
					cell.setCellValue(address);
				}
				System.out.println(rownum);
				rownum++;
				if (rownum >= 1000000)
					break;
			}
			FileOutputStream out = new FileOutputStream("sxssf.xlsx");
			wb.write(out);
			out.close();
		} catch (Exception e) {
			System.out.println(ExceptionUtils.getFullStackTrace(e));
		}
	}

	/* 20000行左右就内存溢出了 */
	public static void hssfTest() {
		XSSFWorkbook wb = new XSSFWorkbook();
		Sheet sh = wb.createSheet();
		int rownum = 0;
		try {
			while (true) {
				Row row = sh.createRow(rownum);
				for (int cellnum = 0; cellnum < 10; cellnum++) {
					Cell cell = row.createCell(cellnum);
					String address = new CellReference(cell).formatAsString();
					cell.setCellValue(address);
				}
				System.out.println(rownum);
				rownum++;
				if (rownum >= 1000000)
					break;
			}
			FileOutputStream out = new FileOutputStream("hssf.xlsx");
			wb.write(out);
			out.close();
		} catch (Exception e) {
			System.out.println(ExceptionUtils.getFullStackTrace(e));
		}
	}
}


apache common之CSV文件操作

读取合并单元格的内容
package demo;

import java.io.IOException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.ss.util.CellRangeAddress;

public class ReadLine {

	private Log log = LogFactory.getLog(ReadLine.class);

	/**
	 * 
	 * @param excel_file_path
	 *            --excel文件的存储路径
	 * @throws IOException
	 * @throws InvalidFormatException
	 */
	public void read(String excel_file_path) throws IOException, InvalidFormatException {

		Workbook wb = WorkbookFactory.create(new java.io.File(excel_file_path));

		Sheet sheet = wb.getSheetAt(0);

		/* 定义最小读取的行数 */
		int rowStart = Math.min(0, sheet.getFirstRowNum());

		/* 定义最大读取的行数 */
		int rowEnd = Math.max(0, sheet.getLastRowNum());

		for (int rowNum = rowStart; rowNum <= rowEnd; rowNum++) {

			Row row = sheet.getRow(rowNum);

			/* 假如当前为空行,跳过当前行继续读取下一行数据 */
			if (row == null)
				continue;

			/* 定义当前行的最大读取列 */
			int lastColumn = Math.max(row.getLastCellNum(), 0);

			for (int cellnum = 0; cellnum < lastColumn; cellnum++) {
				Cell cell = row.getCell(cellnum, Row.RETURN_NULL_AND_BLANK);

				boolean is = isMergedRegion(sheet, rowNum, cellnum);// 判断是合并单元格否
				if (true == is) {
					log.info(getMergedRegionValue(sheet, rowNum, cellnum));
				} else {
					log.info(getCellValue(cell));
				}

			}
		}
	}

	/**
	 * 判断指定的单元格是否是合并单元格
	 * 
	 * @param sheet
	 * @param row
	 * @param column
	 * @return
	 */
	public boolean isMergedRegion(Sheet sheet, int row, int column) {
		int sheetMergeCount = sheet.getNumMergedRegions();

		for (int i = 0; i < sheetMergeCount; i++) {
			CellRangeAddress ca = sheet.getMergedRegion(i);
			int firstColumn = ca.getFirstColumn();
			int lastColumn = ca.getLastColumn();
			int firstRow = ca.getFirstRow();
			int lastRow = ca.getLastRow();

			if (row >= firstRow && row <= lastRow) {
				if (column >= firstColumn && column <= lastColumn) {
					return true;
				}
			}
		}

		return false;
	}

	/**
	 * 获取单元格的值
	 * 
	 * @param cell
	 * @return
	 */
	public String getCellValue(Cell cell) {

		if (cell == null)
			return "";

		if (cell.getCellType() == Cell.CELL_TYPE_STRING) {

			return cell.getStringCellValue();

		} else if (cell.getCellType() == Cell.CELL_TYPE_BOOLEAN) {

			return String.valueOf(cell.getBooleanCellValue());

		} else if (cell.getCellType() == Cell.CELL_TYPE_FORMULA) {

			return cell.getCellFormula();

		} else if (cell.getCellType() == Cell.CELL_TYPE_NUMERIC) {

			return String.valueOf(cell.getNumericCellValue());

		}

		return "";
	}

	/**
	 * 获取合并单元格的值
	 * 
	 * @param sheet
	 * @param row
	 * @param column
	 * @return
	 */
	public String getMergedRegionValue(Sheet sheet, int row, int column) {
		int sheetMergeCount = sheet.getNumMergedRegions();

		for (int i = 0; i < sheetMergeCount; i++) {
			CellRangeAddress ca = sheet.getMergedRegion(i);
			int firstColumn = ca.getFirstColumn();
			int lastColumn = ca.getLastColumn();
			int firstRow = ca.getFirstRow();
			int lastRow = ca.getLastRow();

			if (row >= firstRow && row <= lastRow) {

				if (column >= firstColumn && column <= lastColumn) {
					Row fRow = sheet.getRow(firstRow);
					Cell fCell = fRow.getCell(firstColumn);

					return getCellValue(fCell);
				}
			}
		}

		return null;
	}

	public static void main(String[] args) {
		try {
			new ReadLine().read("E:\\a.xlsx");
		} catch (IOException e) {
			e.printStackTrace();
		} catch (InvalidFormatException e) {
			e.printStackTrace();
		}
	}
}


添加超链接示例代码:
这个是设置一个超链接弹出email地址, 其他类似
import java.io.FileOutputStream;
      
import org.apache.poi.ss.usermodel;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.ss.usermodel.IndexedColors;

/**
* @author lance
*/
public class HyperlinkExample {


  public static void main(String[]args) throws Exception{
	  Workbook wb = new XSSFWorkbook(); //or new HSSFWorkbook();
	  CreationHelper createHelper = wb.getCreationHelper();

	  CellStyle hlink_style = wb.createCellStyle();
	  Font hlink_font = wb.createFont();
	  hlink_font.setUnderline(Font.U_SINGLE);
	  hlink_font.setColor(IndexedColors.BLUE.getIndex());
	  hlink_style.setFont(hlink_font);

	  Cell cell;
	  Sheet sheet = wb.createSheet("Hyperlinks");
	  //URL
	  cell = sheet.createRow(0).createCell((short)0);
	  cell.setCellValue("URL Link");

	  Hyperlink link = createHelper.createHyperlink(Hyperlink.LINK_URL);
	  link.setAddress("http://poi.apache.org/");
	  cell.setHyperlink(link);
	  cell.setCellStyle(hlink_style);

	  //link to a file in the current directory
	  cell = sheet.createRow(1).createCell((short)0);
	  cell.setCellValue("File Link");
	  link = createHelper.createHyperlink(Hyperlink.LINK_FILE);
	  link.setAddress("link1.xls");
	  cell.setHyperlink(link);
	  cell.setCellStyle(hlink_style);

	  //e-mail link
	  cell = sheet.createRow(2).createCell((short)0);
	  cell.setCellValue("Email Link");
	  link = createHelper.createHyperlink(Hyperlink.LINK_EMAIL);
	  //设置路径
	  link.setAddress("mailto:poi@apache.org?subject=Hyperlinks");
	  cell.setHyperlink(link);
	  cell.setCellStyle(hlink_style);

	  //create a target sheet and cell
	  Sheet sheet2 = wb.createSheet("Target Sheet");
	  sheet2.createRow(0).createCell((short)0).setCellValue("Target Cell");

	  cell = sheet.createRow(3).createCell((short)0);
	  cell.setCellValue("Worksheet Link");
	  Hyperlink link2 = createHelper.createHyperlink(Hyperlink.LINK_DOCUMENT);
	  link2.setAddress("'Target Sheet'!A1");
	  cell.setHyperlink(link2);
	  cell.setCellStyle(hlink_style);

	  FileOutputStream out = new FileOutputStream("hyperinks.xlsx");
	  wb.write(out);
	  out.close();

  }
}
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics