使用poi將word轉(zhuǎn)換為html,支持doc,docx,轉(zhuǎn)換后可以保持圖片、樣式。
1.導(dǎo)入Maven包
<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>xdocreport</artifactId> <version>1.0.6</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>ooxml-schemas</artifactId> <version>1.3</version> </dependency>
2.轉(zhuǎn)換代碼
import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.converter.WordToHtmlConverter; import org.apache.poi.xwpf.converter.core.BasicURIResolver; import org.apache.poi.xwpf.converter.core.FileImageExtractor; import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter; import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.w3c.dom.Document;  import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.OutputStreamWriter;  public class Test {   // doc轉(zhuǎn)換為html   void docToHtml() throws Exception {     String sourceFileName = "C://doc//test.doc";     String targetFileName = "C://html//test.html";     String imagePathStr = "C://html//image//";     HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(sourceFileName));     Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();     WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);     // 保存圖片,并返回圖片的相對路徑     wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {       try(FileOutputStream out = new FileOutputStream(imagePathStr + name)){          out.write(content);       } catch (Exception e) {         e.printStackTrace();       }        return "image/" + name;     });     wordToHtmlConverter.processDocument(wordDocument);     Document htmlDocument = wordToHtmlConverter.getDocument();     DOMSource domSource = new DOMSource(htmlDocument);     StreamResult streamResult = new StreamResult(new File(targetFileName));      TransformerFactory tf = TransformerFactory.newInstance();     Transformer serializer = tf.newTransformer();     serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");     serializer.setOutputProperty(OutputKeys.INDENT, "yes");     serializer.setOutputProperty(OutputKeys.METHOD, "html");     serializer.transform(domSource, streamResult);   }   // docx轉(zhuǎn)換為html   public void docxToHtml() throws Exception {     String sourceFileName = "D://ac//00.docx";     String targetFileName = "D://ac//test.html";     String imagePathStr = "D://ac//image//";     OutputStreamWriter outputStreamWriter = null;     try {       XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileName));       XHTMLOptions options = XHTMLOptions.create();       // 存放圖片的文件夾       options.setExtractor(new FileImageExtractor(new File(imagePathStr)));       // html中圖片的路徑       options.URIResolver(new BasicURIResolver("image"));       outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8");       XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();       xhtmlConverter.convert(document, outputStreamWriter, options);     } finally {       if (outputStreamWriter != null) {         outputStreamWriter.close();       }     }   } 演示地址: https://www.xiaoyun.studio/app/preview.html
以上就是本文的全部內(nèi)容,希望對大家的學(xué)習(xí)有所幫助,也希望大家多多支持武林網(wǎng)。
新聞熱點
疑難解答