Java 容易被忽视的 poi 包
Poi 提取技术是 java 中一大亮点。前几日在做项目时需要将 ppt 转化为图片,将 word 文档输出的功能。在百度搜了些资料,整理了一下,借用前辈们的经验整合了这两个小小的类
文章原出两处:http://vtrtbb.iteye.com/blog/601267
http://pengenjing.iteye.com/blog/1901225
来看下面的吧 word 文档转化为 html 格式的代码先:
package com.hsp.util;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.w3c.dom.Document;
public class Word2Html {
/*newrealpath是指word文档的原路径,realpath是指word文档的新路径,newsid是指该word文档存储在数据库中的唯一id值,用户没上传一个word文档,后台会根据生成的唯一id值创建一个.html文件,就是newsid.html*/ public static boolean changeWord2html(String newrealpath,String realpath,int newsid) { boolean b=true; try { convert2Html(newrealpath,realpath+"//"+newsid+".html"); } catch (Exception e) { e.printStackTrace(); b=false; } return b; }
//该方法是将文本写入路径之中
public static void writeFile(String content, String path) { FileOutputStream fos = null; BufferedWriter bw = null; try { File file = new File(path); fos = new FileOutputStream(file); bw = new BufferedWriter(new OutputStreamWriter(fos,"GB2312")); bw.write(content); } catch (FileNotFoundException fnfe) { fnfe.printStackTrace(); } catch (IOException ioe) { ioe.printStackTrace(); } finally { try { if (bw != null) bw.close(); if (fos != null) fos.close(); } catch (IOException ie) { } } } public static void convert2Html(String fileName, String outPutFile) throws TransformerException, IOException, ParserConfigurationException { HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));//创建一个文档 WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder() .newDocument());//对普通文本的操作 wordToHtmlConverter.setPicturesManager( new PicturesManager() { public String savePicture( byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches ) { return "test/"+suggestedName; } } );//对图片的操作 wordToHtmlConverter.processDocument(wordDocument); //保存图片 List pics=wordDocument.getPicturesTable().getAllPictures(); if(pics!=null){ for(int i=0;i下面都是转换 TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); out.close(); writeFile(new String(out.toByteArray()), outPutFile); //调用writeFile类 }
}
下面的我们来看看 ppt 转化为图片格式的代码:
package com.hsp.util;
import java.awt.Dimension;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.awt.Color;
import java.awt.Dimension;
import java.awt.Graphics2D;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import org.apache.poi.hslf.model.TextRun;
import org.apache.poi.hslf.record.Slide;
import org.apache.poi.hslf.usermodel.RichTextRun;
import org.apache.poi.hslf.usermodel.SlideShow;
public class PPTtoImage {
/pptload 是指 ppt 的原路径,newfullnewpath 是指 ppt 的新路径,newsid 是指该 ppt 存储在数据库中的唯一 id 值,用户没上传一个 ppt,后台会根据生成的唯一 id 值创建一个唯一的文件夹,该文件夹内存放该 ppt 转化的图片/
public static List changppttoimage(String pptload,String newfullnewpath,int newsid){
// 读入 PPT 文件
File file = new File(pptload);
return doPPTtoImage(file,newfullnewpath,newsid);
}
public static List doPPTtoImage(File file,String newfullnewpath,int newsid){
/*boolean isppt = checkFile(file);
if (!isppt) {
System.out.println("你指定的文件不是 ppt 文档!");
return false;
}*/
List al=new ArrayList();
try {
FileInputStream is = new FileInputStream(file);
SlideShow ppt = new SlideShow(is);
is.close();
Dimension pgsize = ppt.getPageSize();
org.apache.poi.hslf.model.Slide[] slide = ppt.getSlides();
for (int i = 0; i < slide.length; i++) {
//System.out.print("第" + i + "页。"); if(slide[i].getNotesSheet()!=null&&slide[i].getNotesSheet().getTextRuns()!=null){ //获取第一个备注 //System.out.println("备注:" + slide[i].getNotesSheet().getTextRuns()[0].getText()); } TextRun[] truns = slide[i].getTextRuns(); for (int k = 0; k < truns.length; k++) { RichTextRun[] rtruns = truns[k].getRichTextRuns(); for (int l = 0; l < rtruns.length; l++) { rtruns[l].setFontIndex(1); rtruns[l].setFontName("宋体"); // 获取文本列表 //System.out.println(rtruns[l].getText()); } } BufferedImage img = new BufferedImage(pgsize.width, pgsize.height, BufferedImage.TYPE_INT_RGB); Graphics2D graphics = img.createGraphics(); graphics.setPaint(Color.white); graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height)); slide[i].draw(graphics); // 这里设置图片的存放路径和图片的格式(jpeg,png,bmp等等),注意生成文件路径 File f=new File(newfullnewpath);
//判断该文件夹是否存在,如果不存在,这创建一个新的文件夹
if(!f.isDirectory())
{
f.mkdirs();
}
FileOutputStream out = new FileOutputStream(newfullnewpath+"pict_"+(i + 1) + ".jpeg"); javax.imageio.ImageIO.write(img, "jpeg", out); al.add("pict_"+(i + 1) + ".jpeg"); out.close();
}
/*System.out.println("ok");
return true;*/
} catch (FileNotFoundException e) {
System.out.println(e);
} catch (IOException e) {
e.printStackTrace();
}
return al;
}
// function 检查文件是否为 PPT
public static boolean checkFile(File file) {
boolean isppt = false;
String filename = file.getName();
String suffixname = null;
if (filename != null && filename.indexOf(".") != -1) {
suffixname = filename.substring(filename.indexOf("."));
if (suffixname.equals(".ppt")) {
isppt = true;
}
return isppt;
} else {
return isppt;
}
}
}
经测试是可行的,在小编这里是可以运行成功的,不过,上面只是简单的两个类而已,在项目中,还要用 struts2 去限定转化文件的大小等等。
欢迎来到这里!
我们正在构建一个小众社区,大家在这里相互信任,以平等 • 自由 • 奔放的价值观进行分享交流。最终,希望大家能够找到与自己志同道合的伙伴,共同成长。
注册 关于