excel 导入导出在量大时,有大坑

08 年时,excel 2007 xlsx还未成主流, xls 行列都有限

xls 65536行*256列

xlsx 1048576行*16384列

csv

导出 csv 文件

  • 不同系统上的编码不一样,需要人工选择,对于普通用户不做好
  • 没有优化和数据压缩,数据量越大,csv 文件的大小比 excel 更大,当数据导出超过10w 时,csv 文件大小是 excel 的1.5倍

小数据量


FileInputStream fi = new FileInputStream("e:/2.xlsx");
XSSFWorkbook wk = new XSSFWorkbook(fi);

取Sheet、Row和Cell,这种方式会把文件的所有内容都加载到内存中


<dependency>
    <groupId>com.monitorjbl</groupId>
    <artifactId>xlsx-streamer</artifactId>
    <version>1.2.0</version>
</dependency>

写时省内存


//内存中只保留1000条记录
SXSSFWorkbook workbook = new SXSSFWorkbook(1000);


try {
     long startTime = System.currentTimeMillis();
     final int NUM_OF_ROWS = rowsNum;
     final int NUM_OF_COLUMNS = 30;
     SXSSFWorkbook wb = null;
     try {
         wb = new SXSSFWorkbook();
         wb.setCompressTempFiles(true); //压缩临时文件,很重要,否则磁盘很快就会被写满
         Sheet sh = wb.createSheet();
         int rowNum = 0;
         for (int num = 0; num < NUM_OF_ROWS; num++) {
             if (num % 100_0000 == 0) {
                 sh = wb.createSheet("sheet " + num);
                 rowNum = 0;
             }
             rowNum++;
             Row row = sh.createRow(rowNum);
             for (int cellnum = 0; cellnum < NUM_OF_COLUMNS; cellnum++) {
                 Cell cell = row.createCell(cellnum);
                 cell.setCellValue(Math.random());
             }
         }
         FileOutputStream out = new FileOutputStream("ooxml-scatter-chart_SXSSFW_" + rowsNum + ".xlsx");
         wb.write(out);
         out.close();
     } catch (Exception ex) {
         ex.printStackTrace();
     } finally {
         if (wb != null) {
             wb.dispose();// 删除临时文件,很重要,否则磁盘可能会被写满
         }
     }
     long endTime = System.currentTimeMillis();
     System.out.println("process " + rowsNum + " spent time:" + (endTime - startTime));
 } catch (Exception e) {
     e.printStackTrace();
     throw e;
 }



import com.monitorjbl.xlsx.StreamingReader;

InputStream is = new FileInputStream(new File("/path/to/workbook.xlsx"));
StreamingReader reader = StreamingReader.builder()
        .rowCacheSize(100)    // number of rows to keep in memory (defaults to 10)
        .bufferSize(4096)     // buffer size to use when reading InputStream to file (defaults to 1024)
        .sheetIndex(0)        // index of sheet to use (defaults to 0)
        .read(is);            // InputStream or File for XLSX file (required)

for (Row r : reader) {
  for (Cell c : r) {
    System.out.println(c.getStringCellValue());
  }
}
}

关键代码

SXSSFWorkbook wb = null;
try {
	wb = new SXSSFWorkbook();
	wb.setCompressTempFiles(true); //压缩临时文件,很重要,否则磁盘很快就会被写满
	...
} finally {
	if (wb != null) {
		wb.dispose();// 删除临时文件,很重要,否则磁盘可能会被写满
	}
}