我的 page1->Resource -> Xobjects->Fm0、Fm1、Fm2 下有 FormXobject。
所以它不是直接内容流,在内容->内容流下不可用。所以我想将 Fm0->Contentstream 的内容流移动到 page1->contents->contentstream。
当我们像这样移动内容流时,我们必须同时将 Fm0 相关资源传输或复制到页面级资源。
1.内容流需要复制到页面级别的内容下。
2.色彩空间对象需要复制到page1->Resource->Colorspace下。
3.ExtGState对象需要复制到page1->Resource->ExtGState下。
4.properties需要复制到page1->Resource下(这里需要完全创建)
我尝试了一些代码
private static PDDocument parseFormXobject(PDDocument document, Integer pg_ind) throws IOException {
List<Object> tokens1 = (List<Object>) (getTokens(document, pg_ind)).get(pg_ind);
PDStream newContents = new PDStream(document);
OutputStream out = newContents.createOutputStream(COSName.FLATE_DECODE);
ContentStreamWriter writer = new ContentStreamWriter(out);
PDPage pageinner = document.getPage(pg_ind);
PDResources resources = pageinner.getResources();
PDResources new_resources = new PDResources();
new_resources = resources;
COSDictionary fntdict = new COSDictionary();
COSDictionary imgdict = new COSDictionary();
COSDictionary extgsdict = new COSDictionary();
COSDictionary colordict = new COSDictionary();
COSDictionary pattern = new COSDictionary();
int img_count = 0;
for (COSName xObjectName : resources.getXObjectNames()) {
PDXObject xObject = resources.getXObject(xObjectName);
if (xObject instanceof PDFormXObject
&& tokens1.toString().contains(xObjectName.toString()) ) {
PDFStreamParser parser = new PDFStreamParser(((PDFormXObject) xObject).getContentStream());
parser.parse();
List<Object> tokens3 = parser.getTokens();
int ind =0;
//isTextContains will check is there any Tj operators or there or not
if (isTextContains(tokens3)){
for (COSName colorname :((PDFormXObject) xObject).getResources().getColorSpaceNames())
{
COSName new_name = COSName.getPDFName(colorname.getName());
PDColorSpace pdcolor = ((PDFormXObject) xObject).getResources().getColorSpace(colorname);
colordict.setItem(new_name,pdcolor);
}
for (COSName fontName :((PDFormXObject) xObject).getResources().getFontNames() )
{
COSName new_name = COSName.getPDFName(fontName.getName());
PDFont font =((PDFormXObject) xObject).getResources().getFont(fontName);
font.getCOSObject().setItem(COSName.NAME, new_name);
fntdict.setItem(new_name,font);
}
for (COSName ExtGSName :((PDFormXObject) xObject).getResources().getExtGStateNames() )
{
COSName new_name = COSName.getPDFName(ExtGSName.getName());
PDExtendedGraphicsState ExtGState =((PDFormXObject) xObject).getResources().getExtGState(ExtGSName);
ExtGState.getCOSObject().setItem(COSName.NAME, new_name);
extgsdict.setItem(new_name,ExtGState);
}
imgdict.setItem(xObjectName, xObject);
for (COSName Imgname :((PDFormXObject) xObject).getResources().getXObjectNames() )
{
COSName new_name = COSName.getPDFName(Imgname.getName());
xObject.getCOSObject().setItem(COSName.NAME, new_name);
PDXObject img =((PDFormXObject) xObject).getResources().getXObject(Imgname);
imgdict.setItem(new_name, img);
}
for (COSName paternname :((PDFormXObject) xObject).getResources().getPatternNames() )
{
COSName new_name = COSName.getPDFName(paternname.getName());
PDAbstractPattern pat = ((PDFormXObject) xObject).getResources().getPattern(paternname);
pat.getCOSObject().setItem(COSName.NAME, new_name);
pattern.setItem(new_name,pat);
}
for (int k=0; k< tokens1.size(); k++) {
if ( ((tokens1.get(k) instanceof Operator) && ((Operator)tokens1.get(k)).getName().toString().equals("Do"))
&& ((COSName)tokens1.get(k-1)).getName().toString().equals(xObjectName.getName().toString()) ) {
tokens1.remove(k-1);
tokens1.remove(k-1);
tokens1.add(k-1, Operator.getOperator("q"));
if(((PDFormXObject) xObject).getMatrix() != null) {
tokens1.add(k, new COSFloat(((PDFormXObject) xObject).getMatrix().getScaleX()));
tokens1.add(k + 1, new COSFloat(((PDFormXObject) xObject).getMatrix().getShearY()));
tokens1.add(k + 2, new COSFloat(((PDFormXObject) xObject).getMatrix().getShearX()));
tokens1.add(k + 3, new COSFloat(((PDFormXObject) xObject).getMatrix().getScaleY()));
tokens1.add(k + 4, new COSFloat(((PDFormXObject) xObject).getMatrix().getTranslateX()));
tokens1.add(k + 5, new COSFloat(((PDFormXObject) xObject).getMatrix().getTranslateY()));
tokens1.add(k + 6, Operator.getOperator("cm"));
tokens1.add(k+7, Operator.getOperator("Q"));
ind =k+7;
}else{
tokens1.add(k, Operator.getOperator("Q"));
ind =k;
}
break;
}
}
for (int k=0; k< tokens3.size(); k++) {
if ( (tokens3.size() > k+1) && (tokens3.get(k+1) instanceof Operator) && (((Operator)tokens3.get(k+1)).getName().toString().equals("Do")
|| ((Operator)tokens3.get(k+1)).getName().toString().equals("gs")
|| ((Operator)tokens3.get(k+1)).getName().toString().equals("cs")
|| ((Operator)tokens3.get(k+1)).getName().toString().equals("CS")) ) {
COSName new_name = COSName.getPDFName( ((COSName) tokens3.get(k)).getName() );
tokens1.add(ind+k, new_name );
}else if ( (tokens3.size() > k+2) && (tokens3.get(k+2) instanceof Operator)
&& ((Operator)tokens3.get(k+2)).getName().toString().equals("Tf") ) {
COSName new_name = COSName.getPDFName( ((COSName) tokens3.get(k)).getName() );
tokens1.add(ind+k, new_name );
}
else
tokens1.add(ind+k,tokens3.get(k));
}
img_count +=1;
}else {
imgdict.setItem(xObjectName, xObject);
img_count +=1;
}
}else
imgdict.setItem(xObjectName, xObject);
}
for (COSName fontName :new_resources.getFontNames() )
{
PDFont font =new_resources.getFont(fontName);
fntdict.setItem(fontName,font);
}
for (COSName ExtGSName :new_resources.getExtGStateNames() )
{
PDExtendedGraphicsState extg =new_resources.getExtGState(ExtGSName);
extgsdict.setItem(ExtGSName,extg);
}
for (COSName colorname :new_resources.getColorSpaceNames() )
{
PDColorSpace color =new_resources.getColorSpace(colorname);
colordict.setItem(colorname,color);
}
for (COSName patern :new_resources.getPatternNames() )
{
PDAbstractPattern pat =new_resources.getPattern(patern);
pattern.setItem(patern,pat);
}
resources.getCOSObject().setItem(COSName.EXT_G_STATE,extgsdict);
resources.getCOSObject().setItem(COSName.FONT,fntdict);
resources.getCOSObject().setItem(COSName.XOBJECT,imgdict);
resources.getCOSObject().setItem(COSName.COLORSPACE, colordict);
resources.getCOSObject().setItem(COSName.PATTERN, pattern);
writer.writeTokens(tokens1);
out.close();
document.getPage(pg_ind).setContents(newContents);
document.getPage(pg_ind).setResources(resources);
return document;
}
private static JSONObject getTokens(PDDocument oldDocument, Integer pageIndex) throws IOException {
// TODO Auto- it will return the tokens of pdf
JSONObject oldDocumentTokens = new JSONObject();
PDPage pg = oldDocument.getPage(pageIndex);
PDFStreamParser parser = new PDFStreamParser(pg);
parser.parse();
List<Object> tokens = PDFUtils.removeTokens(parser.getTokens());
oldDocumentTokens.put(pageIndex, tokens);
return oldDocumentTokens;
}
private static boolean isTextContains(List<Object> tokens3) {
for (int k=0; k< tokens3.size(); k++) {
if (tokens3.get(k) instanceof Operator) {
Operator op = (Operator) tokens3.get(k);
if(op.getName().equals("BT"))
return true;
}
}
return false;
}
但我无法获得精确的页面图形。我正在失去一些东西。
输入pdf https://drive.google.com/file/d/1lT4pwGAk4I6vaekTHhWnx_hngcKCm0DK/view?usp=sharing
输出pdf https://drive.google.com/file/d/1RikX4E5WRN6fq_0Cv4PBYaMabJuHONq9/view?usp=sharing