基于poi的word更具标题截取文件

2023-10-31

maven依赖

 <dependency>
            <groupId>com.deepoove</groupId>
            <artifactId>poi-tl</artifactId>
            <version>1.9.1</version>

 </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>ooxml-schemas</artifactId>
            <version>1.4</version>
        </dependency>

获取文件的大纲

private static String getTitleLvl(XWPFDocument doc, XWPFParagraph para) {
        String titleLvl = "";
        try {
            //判断该段落是否设置了大纲级别
            if (para.getCTP().getPPr().getOutlineLvl() != null) {
                BigInteger val = para.getCTP().getPPr().getOutlineLvl().getVal();
                if (val == null) {
                    return titleLvl;
                }
                return val.compareTo(new BigInteger("8")) > 0 ? titleLvl : String.valueOf(val);
            }
        } catch (Exception e) {
        }
        try {
            //判断该段落的样式是否设置了大纲级别
            if (doc.getStyles().getStyle(para.getStyle()).getCTStyle().getPPr().getOutlineLvl() != null) {

                return String.valueOf(doc.getStyles().getStyle(para.getStyle()).getCTStyle().getPPr().getOutlineLvl().getVal());
            }
        } catch (Exception e) {
        }
        try {
            //判断该段落的样式的基础样式是否设置了大纲级别
            if (doc.getStyles().getStyle(doc.getStyles().getStyle(para.getStyle()).getCTStyle().getBasedOn().getVal())
                    .getCTStyle().getPPr().getOutlineLvl() != null) {
                String styleName = doc.getStyles().getStyle(para.getStyle()).getCTStyle().getBasedOn().getVal();
                return String.valueOf(doc.getStyles().getStyle(styleName).getCTStyle().getPPr().getOutlineLvl().getVal());
            }
        } catch (Exception e) {

        }

        return titleLvl;
    }

截取标题

public static ByteArrayOutputStream interceptByInputStream(InputStream inputStream, String... titleName) throws IOException {
        MyXWPDocument xdoc = new MyXWPDocument(inputStream);
        List<IBodyElement> bodyElements = xdoc.getBodyElements();
        int count = bodyElements.size();
        System.out.println(count);

        int start = 0;
        int end = count;
        boolean isExit = false;
        for (int i = 0; i < count; i++) {
            IBodyElement bodyElement = bodyElements.get(i);
            BodyElementType bet = bodyElement.getElementType();
            if (bet == BodyElementType.PARAGRAPH) {
                // 段落
                XWPFParagraph paragraph = ((XWPFParagraph) bodyElement);
                // 判断该段落是否设置了大纲级别
                String control = getTitleLvl(xdoc, paragraph);
                if (control.equals("0") && !isExit) {
                    for (String s : titleName) {
                        if (paragraph.getText().contains(s)) {
                            start = i;
                            isExit = true;
                            break;
                        }
                    }
                    if (isExit) {
                        continue;
                    }
                }
                if (isExit && control.equals("0")) {
                    end = i - 1;
                    break;
                }
            }
        }
        log.info("#################################################################,开始分割开始标标记[{}],结束标记:[{}]", start, end);


        System.out.println("start:" + start);
        System.out.println("end:" + end);
        ByteArrayOutputStream stream = new ByteArrayOutputStream();
        if (isExit) {
            for (int i = count; i >= end; i--) {
                xdoc.removeBodyElementAndSTD(i);
            }
            log.info("*****************");
            // 写入一个新文件
            for (int i = start - 1; i >= 0; i--) {
                xdoc.removeBodyElementAndSTD(i);
            }
            xdoc.write(stream);
        }

        return stream;
    }

MyXWPDocument 基于XWPDocument word 实现的(XWDPDocument 对于可编辑的区域没有处理)

/**
 * @author hs
 * @version 1.0
 * @date: 2023/5/30
 */
public class MyXWPDocument extends XWPFDocument {
    public MyXWPDocument(InputStream inputStream) throws IOException {
        super(inputStream);
    }
    public boolean removeBodyElementAndSTD(int pos) {
        if (pos >= 0 && pos < bodyElements.size()) {
            BodyElementType type = bodyElements.get(pos).getElementType();
            if (type == BodyElementType.TABLE) {
                int tablePos = getTablePos(pos);
                tables.remove(tablePos);
                getDocument().getBody().removeTbl(tablePos);
            }
            if (type == BodyElementType.PARAGRAPH) {
                int paraPos = getParagraphPos(pos);
                paragraphs.remove(paraPos);
                getDocument().getBody().removeP(paraPos);
            }
            if(type == BodyElementType.CONTENTCONTROL){
                int i=-1;
                int j =0;
               if(!contentControls.isEmpty()){
                   for (XWPFSDT contentControl : contentControls) {
                       if (contentControl == bodyElements.get(pos)) {
                           i = j;
                           break;
                       }
                       j++;
                   }
                   if(i!=-1){
                       getDocument().getBody().removeSdt(i);
                       contentControls.remove(i);
                   }
               }
            }
            bodyElements.remove(pos);
            return true;
        }
        return false;
    }
}

大梦谁先觉?平生我自知,草堂春睡足,窗外日迟迟。

本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)

基于poi的word更具标题截取文件 的相关文章

随机推荐