Java 容易被忽视的 poi 包
Poi 提取技术是 java 中一大亮点。前几日在做项目时需要将 ppt 转化为图片,将 word 文档输出的功能。在百度搜了些资料,整理了一下,借用前辈们的经验整合了这两个小小的类
文章原出两处:http://vtrtbb.iteye.com/blog/601267
http://pengenjing.iteye.com/blog/1901225
来看下面的吧 word 文档转化为 html 格式的代码先:
package com.hsp.util;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.w3c.dom.Document;
public class Word2Html {
/*newrealpath是指word文档的原路径,realpath是指word文档的新路径,newsid是指该word文档存储在数据库中的唯一id值,用户没上传一个word文档,后台会根据生成的唯一id值创建一个.html文件,就是newsid.html*/
public static boolean changeWord2html(String newrealpath,String realpath,int newsid) {
boolean b=true;
try {
convert2Html(newrealpath,realpath+"//"+newsid+".html");
} catch (Exception e) {
e.printStackTrace();
b=false;
}
return b;
}
//该方法是将文本写入路径之中
public static void writeFile(String content, String path) {
FileOutputStream fos = null;
BufferedWriter bw = null;
try {
File file = new File(path);
fos = new FileOutputStream(file);
bw = new BufferedWriter(new OutputStreamWriter(fos,"GB2312"));
bw.write(content);
} catch (FileNotFoundException fnfe) {
fnfe.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
} finally {
try {
if (bw != null)
bw.close();
if (fos != null)
fos.close();
} catch (IOException ie) {
}
}
}
public static void convert2Html(String fileName, String outPutFile)
throws TransformerException, IOException,
ParserConfigurationException {
HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));//创建一个文档
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument());//对普通文本的操作
wordToHtmlConverter.setPicturesManager( new PicturesManager()
{
public String savePicture( byte[] content,
PictureType pictureType, String suggestedName,
float widthInches, float heightInches )
{
return "test/"+suggestedName;
}
} );//对图片的操作
wordToHtmlConverter.processDocument(wordDocument);
//保存图片
List pics=wordDocument.getPicturesTable().getAllPictures();
if(pics!=null){
for(int i=0;i下面都是转换
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
out.close();
writeFile(new String(out.toByteArray()), outPutFile); //调用writeFile类
}
}
下面的我们来看看 ppt 转化为图片格式的代码:
package com.hsp.util;
import java.awt.Dimension;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.awt.Color;
import java.awt.Dimension;
import java.awt.Graphics2D;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import org.apache.poi.hslf.model.TextRun;
import org.apache.poi.hslf.record.Slide;
import org.apache.poi.hslf.usermodel.RichTextRun;
import org.apache.poi.hslf.usermodel.SlideShow;
public class PPTtoImage {
/pptload 是指 ppt 的原路径,newfullnewpath 是指 ppt 的新路径,newsid 是指该 ppt 存储在数据库中的唯一 id 值,用户没上传一个 ppt,后台会根据生成的唯一 id 值创建一个唯一的文件夹,该文件夹内存放该 ppt 转化的图片/
public static List changppttoimage(String pptload,String newfullnewpath,int newsid){
// 读入 PPT 文件
File file = new File(pptload);
return doPPTtoImage(file,newfullnewpath,newsid);
}
public static List doPPTtoImage(File file,String newfullnewpath,int newsid){
/*boolean isppt = checkFile(file);
if (!isppt) {
System.out.println("你指定的文件不是 ppt 文档!");
return false;
}*/
List al=new ArrayList();
try {
FileInputStream is = new FileInputStream(file);
SlideShow ppt = new SlideShow(is);
is.close();
Dimension pgsize = ppt.getPageSize();
org.apache.poi.hslf.model.Slide[] slide = ppt.getSlides();
for (int i = 0; i < slide.length; i++) {
//System.out.print("第" + i + "页。");
if(slide[i].getNotesSheet()!=null&&slide[i].getNotesSheet().getTextRuns()!=null){
//获取第一个备注
//System.out.println("备注:" + slide[i].getNotesSheet().getTextRuns()[0].getText());
}
TextRun[] truns = slide[i].getTextRuns();
for (int k = 0; k < truns.length; k++) {
RichTextRun[] rtruns = truns[k].getRichTextRuns();
for (int l = 0; l < rtruns.length; l++) {
rtruns[l].setFontIndex(1);
rtruns[l].setFontName("宋体");
// 获取文本列表
//System.out.println(rtruns[l].getText());
}
}
BufferedImage img = new BufferedImage(pgsize.width,
pgsize.height, BufferedImage.TYPE_INT_RGB);
Graphics2D graphics = img.createGraphics();
graphics.setPaint(Color.white);
graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width,
pgsize.height));
slide[i].draw(graphics);
// 这里设置图片的存放路径和图片的格式(jpeg,png,bmp等等),注意生成文件路径
File f=new File(newfullnewpath);
//判断该文件夹是否存在,如果不存在,这创建一个新的文件夹
if(!f.isDirectory())
{
f.mkdirs();
}
FileOutputStream out = new FileOutputStream(newfullnewpath+"pict_"+(i + 1) + ".jpeg");
javax.imageio.ImageIO.write(img, "jpeg", out);
al.add("pict_"+(i + 1) + ".jpeg");
out.close();
}
/*System.out.println("ok");
return true;*/
} catch (FileNotFoundException e) {
System.out.println(e);
} catch (IOException e) {
e.printStackTrace();
}
return al;
}
// function 检查文件是否为 PPT
public static boolean checkFile(File file) {
boolean isppt = false;
String filename = file.getName();
String suffixname = null;
if (filename != null && filename.indexOf(".") != -1) {
suffixname = filename.substring(filename.indexOf("."));
if (suffixname.equals(".ppt")) {
isppt = true;
}
return isppt;
} else {
return isppt;
}
}
}
经测试是可行的,在小编这里是可以运行成功的,不过,上面只是简单的两个类而已,在项目中,还要用 struts2 去限定转化文件的大小等等。
欢迎来到这里!
我们正在构建一个小众社区,大家在这里相互信任,以平等 • 自由 • 奔放的价值观进行分享交流。最终,希望大家能够找到与自己志同道合的伙伴,共同成长。
注册 关于