如何在合并删除空白 [英] How To Remove Whitespace on Merge

查看:283
本文介绍了如何在合并删除空白的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我有一些code,它需要3个不同的PDF字节数组,并合并他们。这code的伟大工程。问题(有些人)都具有是每个PDF被认为是一个完整的页面(如果打印),即使是仅有例如4英寸上它的内容,从而留下的白色空间7英寸垂直。然后中间文档被放入,并且可以或可以不具有在它的端部的垂直的空白。然后在页脚被放在自己的页面上也是如此。

下面是code:

 字节[]字节= rv.LocalReport.Render(PDF,空,出Mime类型,出编码,扩展出去,出去StreamIDs,出警告);
清单<字节[]> MergeSets = //这是此code之前填写//附加任何其他网页此主信
如果(MergeSets.Count大于0){
  MemoryStream的毫秒=新的MemoryStream();
  文档的文档=新的文件();
  PdfCopy副本=新PdfCopy(文件,MS);
  document.Open();
  PdfImportedPage页面;
  PdfReader读卡器=新PdfReader(字节); //读取生成的主信
  诠释页= reader.NumberOfPages;  的for(int i = 0; I<页面;){
    页= copy.GetImportedPage(读者,我++);
    copy.AddPage(页);
  } //在求职信的页面的foreach  //现在追加合并套
  的foreach(在MergeSets的byte [] BA){
    读者=新PdfReader(BA);
    页= reader.NumberOfPages;    的for(int i = 0; I<页面;){
      页= copy.GetImportedPage(读者,我++);
      copy.AddPage(页);
    } //在当前合并集的页面的foreach
  } //该组数据的的foreach  document.Close();  ServerSaved = SaveGeneratedLetter(ms.GetBuffer(),DateTime.Now.Year,hl.LetterName,SAVENAME);
} //如果有任何合并

有没有当我合并的每一页夹/删除/在每个PDF年底抹去垂直的白色空间,以便使其显示为一个无缝的文件的方式?


更新:
下面是我试图合并一些样本.pdf文件。

,的身体页脚


  

更新2:使用应答:


  
  

我已经转换@ MKL的code到C#和在这里。


  
  

工具类:


 公共类PdfVeryDenseMergeTool {  私人矩形每页;
  私人浮动TOPMARGIN;
  私人浮动BottomMargin;
  私人浮动差距;
  私人文件文档= NULL;
  私人PdfWriter作家=无效;
  私人浮动YPosition = 0;  公共PdfVeryDenseMergeTool(矩形的大小,浮顶,浮底,浮动间隙){
    this.PageSize =大小;
    this.TopMargin =顶部;
    this.BottomMargin =底部;
    this.Gap =差距;
  } // PdfVeryDenseMergeTool  公共无效合并(MemoryStream的OutputStream中,列表与LT; PdfReader>输入){
    尝试{
      this.OpenDocument(OutputStream的);      的foreach(在输入PdfReader读者){
        this.Merge(读卡器);
      } // PDF文件的的foreach合并
    } {最后
      this.CloseDocument();
    } //的try-catch-最后
  } //合并  公共无效OpenDocument格式(MemoryStream的OutputStream中){
    this.Document =新的文件(每页,36,36,this.TopMargin,this.BottomMargin);
    this.Writer = PdfWriter.GetInstance(文档,OutputStream的);    this.Document.Open();
    this.NewPage();
  } // OpenDocument格式  公共无效CloseDocument(){
    尝试{
      this.Document.Close();
    } {最后
      this.Document = NULL;
      this.Writer = NULL;
      this.YPosition = 0;
    } //尝试,终于
  } // CloseDocument  公共无效NewPage公司(){
    this.Document.NewPage();
    this.YPosition = PageSize.GetTop(this.TopMargin);
  } //合并  公共无效合并(PdfReader读者){
    PdfReaderContentParser分析器=新PdfReaderContentParser(读卡器);    对于(INT的PageIndex = 1;的PageIndex< = reader.NumberOfPages;的PageIndex ++){
      this.Merge(阅读器,分析器,PageIndex的);
    } //当前PDF的页面的foreach
  } //合并  公共无效合并(PdfReader读卡器,PdfReaderContentParser解析器,诠释的PageIndex){
    PdfImportedPage importedPage = Writer.GetImportedPage(读者的PageIndex);
    PdfContentByte directContent = Writer.DirectContent;    PageVerticalAnalyzer取景器= parser.ProcessContent(PageIndex的,新PageVerticalAnalyzer());    如果(finder.VerticalFlips.Count 2)
      返回;    矩形pageSizeToImport = reader.GetPageSize(PageIndex的);    INT startFlip = finder.VerticalFlips.Count - 1;
    布尔第一= TRUE;    而(startFlip大于0){
      如果(!第一)
        this.NewPage();      浮可用空间= this.YPosition - PageSize.GetBottom(BottomMargin);
      INT endFlip = startFlip + 1;      而((endFlip→1)及及(finder.VerticalFlips [startFlip] - finder.VerticalFlips [endFlip - 2]所述,可用空间))
        endFlip - = 2;      如果(endFlip< startFlip){
        浮动高度= finder.VerticalFlips [startFlip] - finder.VerticalFlips [endFlip]        directContent.SaveState();
        directContent.Rectangle(0,this.YPosition - 高度,pageSizeToImport.Width,高度);
        directContent.Clip();
        directContent.NewPath();        this.Writer.DirectContent.AddTemplate(importedPage,0,this.YPosition - (finder.VerticalFlips [startFlip] - pageSizeToImport.Bottom));        directContent.RestoreState();
        this.YPosition - =身高+ this.Gap;
        startFlip = endFlip - 1;
      }否则如果(!第一){
        抛出新的ArgumentException(的String.Format(第{0}的内容太大,的PageIndex));
      } //如果      第一= FALSE;
    } //而
  } //合并
} // PdfVeryDenseMergeTool


  

该RenderListener类:

  更新3:固定1号线code和它的作品:见code注释


 公共类PageVerticalAnalyzer:IRenderListener {  公共PageVerticalAnalyzer(){}  公开名单<浮动> VerticalFlips =新的List<浮动>();  公共无效AddVerticalUseSection(从浮动,浮动){
    如果(至<从){
      浮TEMP =来;
      为从=;
      从=气温;
    }    INT I = 0;
    INT J = 0;    对于(i = 0; I< VerticalFlips.Count;我++){
      浮翻转= VerticalFlips [I]
      如果(翻转<从)
        继续;      为(J = I; J< VerticalFlips.Count; J ++){
        翻转= VerticalFlips [J]。
        如果(翻转<到)
          继续;
        打破;
      }
      打破;
    } //垂直翻转的的foreach    布尔fromOutsideInterval = I%2 == 0;
    布尔toOutsideInterval = j的2%== 0;    而(j-- I标记)
      VerticalFlips.RemoveAt(J); //这是问题符合刚卸下摆臂(J)
    如果(toOutsideInterval)
      VerticalFlips.Insert(i到);
    如果(fromOutsideInterval)
      VerticalFlips.Insert(ⅰ,从);
  } // AddVerticalUseSection  公共无效BeginTextBlock(){/ *什么也不做* /}  公共无效EndTextBlock(){/ *什么也不做* /}  公共无效RenderImage(ImageRenderInfo renderInfo){
    矩阵CTM = renderInfo.GetImageCTM();
    清单<浮动> YCoords =新的List<浮动>(4){0,0,0,0};    为(中间体X = 0; X 2; X ++){
      为(中间体Y = 0; Y 2; Y ++){
        矢量角落=新的向量(X,Y,1).Cross(CTM)。
        YCoords [2 * X + Y =角球[Vector.I2]
      }
    }    YCoords.Sort();
    AddVerticalUseSection(YCoords [0],YCoords [3]);
  } // RenderImage  公共无效RenderText(TextRenderInfo renderInfo){
    线段ascentLine = renderInfo.GetAscentLine();
    线段descentLine = renderInfo.GetDescentLine();
    清单<浮动> YCoords =新的List<浮动>(四){
      ascentLine.GetStartPoint()[Vector.I2]
      ascentLine.GetEndPoint()[Vector.I2]
      descentLine.GetStartPoint()[Vector.I2]
      descentLine.GetEndPoint()[Vector.I2]
    };    YCoords.Sort();
    AddVerticalUseSection(YCoords [0],YCoords [3]);
  } // RenderText
} // PageVericalAnalyzer


  

code,收集文件,并运行此工具:


 公共无效TestMergeDocuments(){
  PdfVeryDenseMergeTool工具=新PdfVeryDenseMergeTool(iTextSharp.text.PageSize.A4,18,18,10);
  清单<字节[]>文件=新的List<字节[]>();  // code来装载每个3个文件,我需要这个字节数组列表  使用(MemoryStream的毫秒=新的MemoryStream()){
    清单< PdfReader>文件=新的List< PdfReader>();    的foreach(在文件中的byte [] BA){
      files.Add(新PdfReader(BA));
    } //该组数据的的foreach    tool.Merge(MS,文件);    //保存使用文件:ms.GetBuffer()
  } //使用内存流
} // TestMergeDocuments


解决方案

下面的示例工具一直沿着工具的想法实施 PdfDenseMergeTool 从<一个HREF =htt​​p://stackoverflow.com/a/28024276/1729265>其中OP曾评论这个答案是的如此接近到什么 [他] 的必要性的秒。就像 PdfDenseMergeTool 这个工具在这里是用Java / iText的实施而我更在家里比C#/ iTextSharp的。由于OP 已经翻译 PdfDenseMergeTool 到C#/ iTextSharp的,翻译这个工具在这里也应该不是太大的问题。

PdfVeryDenseMergeTool

同样这个工具来 PdfDenseMergeTool 需要的页面内容从一些 PdfReader 实例的网页,并尝试密集合并它们,即把多个来源的网页内容到一个单一的目标页面是否有足够的可用空间来这样做。相较于更早的工具,这个工具甚至分裂源页面的内容,以便更密集的合并。

就像其他工具 PdfVeryDenseMergeTool 不考虑矢量图形考虑,因为iText的(夏普)解析API确实只能向前文字和位图图像

PdfVeryDenseMergeTool 拆分源的网页其在未通过文字字形或位图图形的边框相交的水平线不完全贴合到目标页面。

工具类:


 公共类PdfVeryDenseMergeTool
{
    公共PdfVeryDenseMergeTool(矩形的大小,浮顶,浮底,浮动间隙)
    {
        this.pageSize =大小;
        this.topMargin =顶部;
        this.bottomMargin =底部;
        this.gap =差距;
    }    公共无效合并(OutputStream的OutputStream的,可迭代&LT; PdfReader&GT;输入)抛出DocumentException,IOException异常
    {
        尝试
        {
            使用openDocument(OutputStream的);
            对于(PdfReader读者:输入)
            {
                合并(读卡器);
            }
        }
        最后
        {
            closeDocument();
        }
    }    无效使用openDocument(OutputStream的OutputStream中)抛出DocumentException
    {
        最终文档文件=新的文件(的pageSize,36,36,TOPMARGIN,bottomMargin);
        最后PdfWriter作家= PdfWriter.getInstance(文件,OutputStream的);
        document.open();
        this.document =文件;
        this.writer =作家;
        NEWPAGE();
    }    无效closeDocument()
    {
        尝试
        {
            document.close();
        }
        最后
        {
            this.document = NULL;
            this.writer = NULL;
            this.yPosition = 0;
        }
    }    新页无效()
    {
        document.newPage();
        yPosition = pageSize.getTop(TOPMARGIN);
    }    无效合并(PdfReader阅读器)抛出IOException异常
    {
        PdfReaderContentParser分析器=新PdfReaderContentParser(读卡器);
        对于(INT页= 1;页&LT; = reader.getNumberOfPages();页++)
        {
            合并(阅读器,分析器,页);
        }
    }    无效合并(PdfReader读卡器,PdfReaderContentParser解析器,诠释页)抛出IOException异常
    {
        PdfImportedPage importedPage = writer.getImportedPage(读卡器,页);
        PdfContentByte directContent = writer.getDirectContent();        PageVerticalAnalyzer取景= parser.processContent(页面,新PageVerticalAnalyzer());
        如果(finder.verticalFlips.size()2)
            返回;
        矩形pageSizeToImport = reader.getPageSize(页);        INT startFlip = finder.verticalFlips.size() - 1;
        布尔第一= TRUE;
        而(startFlip大于0)
        {
            如果(!第一)
                NEWPAGE();            浮可用空间= yPosition - pageSize.getBottom(bottomMargin);
            INT endFlip = startFlip + 1;
            而((endFlip→1)及及(finder.verticalFlips.get(startFlip) - finder.verticalFlips.get(endFlip - 2)&下; FREESPACE))
                endFlip - = 2;
            如果(endFlip&LT; startFlip)
            {
                浮球高度= finder.verticalFlips.get(startFlip) - finder.verticalFlips.get(endFlip);                directContent.saveState();
                directContent.rectangle(0,yPosition - 高度,pageSizeToImport.getWidth(),高度);
                directContent.clip();
                directContent.newPath();                。writer.getDirectContent()addTemplate(importedPage,0,yPosition - (finder.verticalFlips.get(startFlip) - pageSizeToImport.getBottom()));                directContent.restoreState();
                yPosition - =身高+差距;
                startFlip = endFlip - 1;
            }
            否则,如果(!第一)
                抛出新抛出:IllegalArgumentException(的String.format(页%S含量的部分过大,页));
            第一= FALSE;
        }
    }    文献文件= NULL;
    PdfWriter作家=无效;
    浮yPosition = 0;    最后矩形的pageSize;
    最终浮动TOPMARGIN;
    最终浮动bottomMargin;
    最终浮动差距;
}

(<一个href=\"https://github.com/mkl-public/testarea-itext5/blob/master/src/main/java/mkl/testarea/itext5/merge/PdfVeryDenseMergeTool.java\">PdfVeryDenseMergeTool.java)


此工具可使用自定义的 RenderListener 用于与iText的解析器API使用:


 公共类PageVerticalAnalyzer实现RenderListener
{
    @覆盖
    公共无效beginTextBlock(){}
    @覆盖
    公共无效endTextBlock(){}    / *
     * @see RenderListener#renderText(TextRenderInfo)
     * /
    @覆盖
    公共无效renderText(TextRenderInfo renderInfo)
    {
        线段ascentLine = renderInfo.getAscentLine();
        线段descentLine = renderInfo.getDescentLine();
        浮动[] = yCoords新的浮动[] {
                ascentLine.getStartPoint()获得(Vector.I2)
                ascentLine.getEndPoint()获得(Vector.I2)
                descentLine.getStartPoint()获得(Vector.I2)
                descentLine.getEndPoint()获得(Vector.I2)
        };
        Arrays.sort(yCoords);
        addVerticalUseSection(yCoords [0],yCoords [3]);
    }    / *
     * @see RenderListener#renderImage(ImageRenderInfo)
     * /
    @覆盖
    公共无效renderImage(ImageRenderInfo renderInfo)
    {
        矩阵CTM = renderInfo.getImageCTM();
        浮动[] = yCoords新的浮动[4];
        为(中间体X = 0; X 2; X ++)
            为(中间体Y = 0; Y 2; Y +)
            {
                矢量角落=新的向量(X,Y,1).cross(CTM)。
                yCoords [2 * X + Y = corner.get(Vector.I2);
            }
        Arrays.sort(yCoords);
        addVerticalUseSection(yCoords [0],yCoords [3]);
    }    / **
     *由于使用该方法标志着给定区间。
     * /
    无效addVerticalUseSection(从浮动,浮动)
    {
        如果(至&lt;从)
        {
            浮TEMP =来;
            为从=;
            从=气温;
        }        INT I = 0,J = 0;
        对于(; I&LT; verticalFlips.size();我++)
        {
            浮翻转= verticalFlips.get(I)
            如果(翻转&LT;从)
                继续;            为(J = I; J&LT; verticalFlips.size(); J ++)
            {
                翻转= verticalFlips.get(J);
                如果(翻转&LT;到)
                    继续;
                打破;
            }
            打破;
        }
        布尔fromOutsideInterval = I%2 == 0;
        布尔toOutsideInterval = j的2%== 0;        而(j-- I标记)
            verticalFlips.remove(J);
        如果(toOutsideInterval)
            verticalFlips.add(i到);
        如果(fromOutsideInterval)
            verticalFlips.add(ⅰ,从);
    }    最终名单&LT;浮球GT; verticalFlips =新的ArrayList&LT;浮球GT;();
}

(<一个href=\"https://github.com/mkl-public/testarea-itext5/blob/master/src/main/java/mkl/testarea/itext5/merge/PageVerticalAnalyzer.java\">PageVerticalAnalyzer.java)


它用于这样的:


  PdfVeryDenseMergeTool工具=新PdfVeryDenseMergeTool(PageSize.A4,18,18,5);
tool.merge(输出,输入);

(<一个href=\"https://github.com/mkl-public/testarea-itext5/blob/master/src/test/java/mkl/testarea/itext5/merge/VeryDenseMerging.java\">VeryDenseMerging.java)


应用到OP的样本文件

Header.pdf

Body.pdf

Footer.pdf

它生成

如果你需要定义一个目标文档页面大小为A5景观:


  PdfVeryDenseMergeTool工具=新PdfVeryDenseMergeTool(新RectangleReadOnly(595421),18,18,5);
tool.merge(输出,输入);

(<一个href=\"https://github.com/mkl-public/testarea-itext5/blob/master/src/test/java/mkl/testarea/itext5/merge/VeryDenseMerging.java\">VeryDenseMerging.java)


它生成的:

当心!这只是一个概念证明,它并没有考虑所有的可能性。例如。源或目标页面与一个不平凡的旋转值的情况下,不妥善处理。因此,不能用于生产用途呢。


改善电流(5.5.6快照)iText的版本

5.5.6对目前iText的开发版本增强了解析器的功能也信号矢量图形。因此,我延长了 PageVerticalAnalyzer 来利用这一点:


 公共类PageVerticalAnalyzer实现ExtRenderListener
{
    @覆盖
    公共无效beginTextBlock(){}
    @覆盖
    公共无效endTextBlock(){}
    @覆盖
    公共无效clipPath(INT规则){}
    ...
    静态类SubPathSection
    {
        公共SubPathSection(浮点X,浮法Y,矩阵m)
        {
            浮effectiveY = getTransformedY(X,Y,M);
            pathFromY = effectiveY;
            pathToY = effectiveY;
        }        无效extendTo(浮法X,浮法Y,矩阵m)
        {
            浮effectiveY = getTransformedY(X,Y,M);
            如果(effectiveY&LT; pathFromY)
                pathFromY = effectiveY;
            否则,如果(effectiveY&GT; pathToY)
                pathToY = effectiveY;
        }        浮getTransformedY(浮法X,浮法Y,矩阵m)
        {
            返回新的Vector(X,Y,1).cross(M)获得(Vector.I2);
        }        浮动getFromY()
        {
            返回pathFromY;
        }        浮动getToY()
        {
            返回pathToY;
        }        私人浮动pathFromY;
        私人浮动pathToY;
    }    / *
     *请注意:执行是不正确的,因为它包含曲线的控制点
     *它可以是远远超出实际曲线。
     *
     * @see ExtRenderListener#modifyPath(PathConstructionRenderInfo)
     * /
    @覆盖
    公共无效modifyPath(PathConstructionRenderInfo renderInfo)
    {
        矩阵CTM = renderInfo.getCtm();
        清单&LT;浮球GT; segmentData = renderInfo.getSegmentData();        开关(renderInfo.getOperation())
        {
        案例PathConstructionRenderInfo.MOVETO:
            子路径= NULL;
        案例PathConstructionRenderInfo.LINETO:
        案例PathConstructionRenderInfo.CURVE_123:
        案例PathConstructionRenderInfo.CURVE_13:
        案例PathConstructionRenderInfo.CURVE_23:
            的for(int i = 0; I&LT; segmentData.size() - 1; I + = 2)
            {
                如果(子路径== NULL)
                {
                    子路径=新SubPathSection(segmentData.get(ⅰ),segmentData.get第(i + 1),CTM);
                    path.add(子路径);
                }
                其他
                    subPath.extendTo(segmentData.get(ⅰ),segmentData.get第(i + 1),CTM);
            }
            打破;
        案例PathConstructionRenderInfo.RECT​​:
            浮X = segmentData.get(0);
            浮Y = segmentData.get(1);
            浮W = segmentData.get(2);
            浮动H = segmentData.get(3);
            SubPathSection节=新SubPathSection(X,Y,CTM)。
            section.extendTo(X + W,Y,CTM)。
            section.extendTo(X,Y + H,CTM)。
            section.extendTo(X + W,Y + H,CTM)。
            path.add(部分);
        案例PathConstructionRenderInfo.CLOSE:
            子路径= NULL;
            打破;
        默认:
        }
    }    / *
     * @see ExtRenderListener#renderPath(PathPaintingRenderInfo)
     * /
    @覆盖
    公共路径renderPath(PathPaintingRenderInfo renderInfo)
    {
        如果(renderInfo.getOperation()!= PathPaintingRenderInfo.NO_OP)
        {
            对于(SubPathSection部分:路径)
                addVerticalUseSection(section.getFromY(),section.getToY());
        }        path.clear();
        子路径= NULL;
        返回null;
    }    清单&LT; SubPathSection&GT;路径=新的ArrayList&LT; SubPathSection&GT;();
    SubPathSection子路径= NULL;
    ...
}

(<一个href=\"https://github.com/mkl-public/testarea-itext5/blob/master/src/main/java/mkl/testarea/itext5/merge/PageVerticalAnalyzer.java\">PageVerticalAnalyzer.java)


一个简单的测试(<一个href=\"https://github.com/mkl-public/testarea-itext5/blob/master/src/test/java/mkl/testarea/itext5/merge/VeryDenseMerging.java\">VeryDenseMerging.java方法 testMergeOnlyGraphics )合并这些文件

这个:

但再次请注意:的这个概念仅仅是证明。尤其是 modifyPath()有待提高,实现是不正确的,因为它包含曲线的控制点,这可能是目前为止实际曲线之外。

I have some code that takes 3 different PDF byte arrays and merges them. This code works great. The issue (some people) are having is that each PDF is considered to be a full page (if printed) even if there is only say 4 inches of content on it, thus leaving 7 inches of white space vertically. Then the middle document gets put in and may or may not have vertical white space at the end of it. Then the footer gets put on its own page as well.

Here is the code:

byte[] Bytes = rv.LocalReport.Render("PDF", null, out MimeType, out Encoding, out Extension, out StreamIDs, out Warnings);
List<byte[]> MergeSets = // This is filled prior to this code

// Append any other pages to this primary letter
if (MergeSets.Count > 0) {
  MemoryStream ms = new MemoryStream();
  Document document = new Document();
  PdfCopy copy = new PdfCopy(document, ms);
  document.Open();
  PdfImportedPage page;
  PdfReader reader = new PdfReader(Bytes); // read the generated primary Letter
  int pages = reader.NumberOfPages;

  for (int i = 0; i < pages; ) {
    page = copy.GetImportedPage(reader, ++i);
    copy.AddPage(page);
  } // foreach of the pages in the Cover Letter

  // Now append the merge sets
  foreach (byte[] ba in MergeSets) {
    reader = new PdfReader(ba);
    pages = reader.NumberOfPages;

    for (int i = 0; i < pages; ) {
      page = copy.GetImportedPage(reader, ++i);
      copy.AddPage(page);
    } // foreach of the pages in the current merge set
  } // foreach of the sets of data

  document.Close();

  ServerSaved = SaveGeneratedLetter(ms.GetBuffer(), DateTime.Now.Year, hl.LetterName, SaveName);
} // if there is anything to merge

Is there a way when I am merging each page to clip/remove/erase the vertical white space at the end of each pdf so it appears as one seamless document?

UPDATE: Here are some sample .pdf files I am trying to merge.

header, body, footer

UPDATE 2: USING THE ANSWER:

I have converted @mkl's code to C# and here it is.

The tool class:

public class PdfVeryDenseMergeTool {

  private Rectangle PageSize;
  private float TopMargin;
  private float BottomMargin;
  private float Gap;
  private Document Document = null;
  private PdfWriter Writer = null;
  private float YPosition = 0;

  public PdfVeryDenseMergeTool(Rectangle size, float top, float bottom, float gap) {
    this.PageSize = size;
    this.TopMargin = top;
    this.BottomMargin = bottom;
    this.Gap = gap;
  } // PdfVeryDenseMergeTool

  public void Merge(MemoryStream outputStream, List<PdfReader> inputs) {
    try {
      this.OpenDocument(outputStream);

      foreach (PdfReader reader in inputs) {
        this.Merge(reader);
      } // foreach of the PDF files to merge
    } finally {
      this.CloseDocument();
    } // try-catch-finally
  } // Merge

  public void OpenDocument(MemoryStream outputStream) {
    this.Document = new Document(PageSize, 36, 36, this.TopMargin, this.BottomMargin);
    this.Writer = PdfWriter.GetInstance(Document, outputStream);

    this.Document.Open();
    this.NewPage();
  } // OpenDocument

  public void CloseDocument() {
    try {
      this.Document.Close();
    } finally {
      this.Document = null;
      this.Writer = null;
      this.YPosition = 0;
    } // try-finally
  } // CloseDocument

  public void NewPage() {
    this.Document.NewPage();
    this.YPosition = PageSize.GetTop(this.TopMargin);
  } // Merge

  public void Merge(PdfReader reader) {
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);

    for (int pageIndex = 1; pageIndex <= reader.NumberOfPages; pageIndex++) {
      this.Merge(reader, parser, pageIndex);
    } // foreach of the pages of the current PDF
  } // Merge

  public void Merge(PdfReader reader, PdfReaderContentParser parser, int pageIndex) {
    PdfImportedPage importedPage = Writer.GetImportedPage(reader, pageIndex);
    PdfContentByte directContent = Writer.DirectContent;

    PageVerticalAnalyzer finder = parser.ProcessContent(pageIndex, new PageVerticalAnalyzer());

    if (finder.VerticalFlips.Count < 2)
      return;

    Rectangle pageSizeToImport = reader.GetPageSize(pageIndex);

    int startFlip = finder.VerticalFlips.Count - 1;
    bool first = true;

    while (startFlip > 0) {
      if (!first)
        this.NewPage();

      float freeSpace = this.YPosition - PageSize.GetBottom(BottomMargin);
      int endFlip = startFlip + 1;

      while ((endFlip > 1) && (finder.VerticalFlips[startFlip] - finder.VerticalFlips[endFlip - 2] < freeSpace))
        endFlip -= 2;

      if (endFlip < startFlip) {
        float height = finder.VerticalFlips[startFlip] - finder.VerticalFlips[endFlip];

        directContent.SaveState();
        directContent.Rectangle(0, this.YPosition - height, pageSizeToImport.Width, height);
        directContent.Clip();
        directContent.NewPath();

        this.Writer.DirectContent.AddTemplate(importedPage, 0, this.YPosition - (finder.VerticalFlips[startFlip] - pageSizeToImport.Bottom));

        directContent.RestoreState();
        this.YPosition -= height + this.Gap;
        startFlip = endFlip - 1;
      } else if (!first) {
        throw new ArgumentException(string.Format("Page {0} content too large", pageIndex));
      } // if

      first = false;
    } // while
  } // Merge
} // PdfVeryDenseMergeTool

The RenderListener class:
UPDATE 3: FIXED 1 LINE OF CODE AND IT WORKS: See comment in code

public class PageVerticalAnalyzer : IRenderListener {

  public PageVerticalAnalyzer() { }

  public List<float> VerticalFlips = new List<float>();

  public void AddVerticalUseSection(float from, float to) {
    if (to < from) {
      float temp = to;
      to = from;
      from = temp;
    }

    int i = 0;
    int j = 0;

    for (i = 0; i < VerticalFlips.Count; i++) {
      float flip = VerticalFlips[i];
      if (flip < from)
        continue;

      for (j = i; j < VerticalFlips.Count; j++) {
        flip = VerticalFlips[j];
        if (flip < to)
          continue;
        break;
      }
      break;
    } // foreach of the vertical flips

    bool fromOutsideInterval = i % 2 == 0;
    bool toOutsideInterval = j % 2 == 0;

    while (j-- > i)
      VerticalFlips.RemoveAt(j); // This was the problem line with just .Remove(j)
    if (toOutsideInterval)
      VerticalFlips.Insert(i, to);
    if (fromOutsideInterval)
      VerticalFlips.Insert(i, from);
  } // AddVerticalUseSection

  public void BeginTextBlock() { /* Do nothing */  }

  public void EndTextBlock() { /* Do nothing */ }

  public void RenderImage(ImageRenderInfo renderInfo) {
    Matrix ctm = renderInfo.GetImageCTM();
    List<float> YCoords = new List<float>(4) { 0, 0, 0, 0 };

    for (int x = 0; x < 2; x++) {
      for (int y = 0; y < 2; y++) {
        Vector corner = new Vector(x, y, 1).Cross(ctm);
        YCoords[2 * x + y] = corner[Vector.I2];
      }
    }

    YCoords.Sort();
    AddVerticalUseSection(YCoords[0], YCoords[3]);
  } // RenderImage

  public void RenderText(TextRenderInfo renderInfo) {
    LineSegment ascentLine = renderInfo.GetAscentLine();
    LineSegment descentLine = renderInfo.GetDescentLine();
    List<float> YCoords = new List<float>(4) {
      ascentLine.GetStartPoint()[Vector.I2],
      ascentLine.GetEndPoint()[Vector.I2],
      descentLine.GetStartPoint()[Vector.I2],
      descentLine.GetEndPoint()[Vector.I2],
    };

    YCoords.Sort();
    AddVerticalUseSection(YCoords[0], YCoords[3]);
  } // RenderText
} // PageVericalAnalyzer

Code to gather files and run the tool:

public void TestMergeDocuments() {
  PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(iTextSharp.text.PageSize.A4, 18, 18, 10);
  List<byte[]> Files = new List<byte[]>();

  // Code to load each of the 3 files I need into this byte array list

  using (MemoryStream ms = new MemoryStream()) {
    List<PdfReader> files = new List<PdfReader>();

    foreach (byte[] ba in Files) {
      files.Add(new PdfReader(ba));
    } // foreach of the sets of data

    tool.Merge(ms, files);

    // Save the file using: ms.GetBuffer()
  } // using the memory stream
} // TestMergeDocuments

解决方案

The following sample tool has been implemented along the ideas of the tool PdfDenseMergeTool from this answer which the OP has commented to be SO close to what [he] NEEDs. Just like PdfDenseMergeTool this tool here is implemented in Java/iText which I'm more at home with than C#/iTextSharp. As the OP has already translated PdfDenseMergeTool to C#/iTextSharp, translating this tool here also should not be too great a problem.

PdfVeryDenseMergeTool

This tool similarly to PdfDenseMergeTool takes the page contents of pages from a number of PdfReader instances and tries to merge them densely, i.e. putting contents of multiple source pages onto a single target page if there is enough free space to do so. In contrast to that earlier tool, this tool even splits source page contents to allow for an even denser merge.

Just like that other tool the PdfVeryDenseMergeTool does not take vector graphics into account because the iText(Sharp) parsing API does only forward text and bitmap images

The PdfVeryDenseMergeTool splits source pages which do not completely fit onto a target page at a horizontal line which is not intersected by the bounding boxes of text glyphs or bitmap graphics.

The tool class:

public class PdfVeryDenseMergeTool
{
    public PdfVeryDenseMergeTool(Rectangle size, float top, float bottom, float gap)
    {
        this.pageSize = size;
        this.topMargin = top;
        this.bottomMargin = bottom;
        this.gap = gap;
    }

    public void merge(OutputStream outputStream, Iterable<PdfReader> inputs) throws DocumentException, IOException
    {
        try
        {
            openDocument(outputStream);
            for (PdfReader reader: inputs)
            {
                merge(reader);
            }
        }
        finally
        {
            closeDocument();
        }
    }

    void openDocument(OutputStream outputStream) throws DocumentException
    {
        final Document document = new Document(pageSize, 36, 36, topMargin, bottomMargin);
        final PdfWriter writer = PdfWriter.getInstance(document, outputStream);
        document.open();
        this.document = document;
        this.writer = writer;
        newPage();
    }

    void closeDocument()
    {
        try
        {
            document.close();
        }
        finally
        {
            this.document = null;
            this.writer = null;
            this.yPosition = 0;
        }
    }

    void newPage()
    {
        document.newPage();
        yPosition = pageSize.getTop(topMargin);
    }

    void merge(PdfReader reader) throws IOException
    {
        PdfReaderContentParser parser = new PdfReaderContentParser(reader);
        for (int page = 1; page <= reader.getNumberOfPages(); page++)
        {
            merge(reader, parser, page);
        }
    }

    void merge(PdfReader reader, PdfReaderContentParser parser, int page) throws IOException
    {
        PdfImportedPage importedPage = writer.getImportedPage(reader, page);
        PdfContentByte directContent = writer.getDirectContent();

        PageVerticalAnalyzer finder = parser.processContent(page, new PageVerticalAnalyzer());
        if (finder.verticalFlips.size() < 2)
            return;
        Rectangle pageSizeToImport = reader.getPageSize(page);

        int startFlip = finder.verticalFlips.size() - 1;
        boolean first = true;
        while (startFlip > 0)
        {
            if (!first)
                newPage();

            float freeSpace = yPosition - pageSize.getBottom(bottomMargin);
            int endFlip = startFlip + 1;
            while ((endFlip > 1) && (finder.verticalFlips.get(startFlip) - finder.verticalFlips.get(endFlip - 2) < freeSpace))
                endFlip -=2;
            if (endFlip < startFlip)
            {
                float height = finder.verticalFlips.get(startFlip) - finder.verticalFlips.get(endFlip);

                directContent.saveState();
                directContent.rectangle(0, yPosition - height, pageSizeToImport.getWidth(), height);
                directContent.clip();
                directContent.newPath();

                writer.getDirectContent().addTemplate(importedPage, 0, yPosition - (finder.verticalFlips.get(startFlip) - pageSizeToImport.getBottom()));

                directContent.restoreState();
                yPosition -= height + gap;
                startFlip = endFlip - 1;
            }
            else if (!first) 
                throw new IllegalArgumentException(String.format("Page %s content sections too large.", page));
            first = false;
        }
    }

    Document document = null;
    PdfWriter writer = null;
    float yPosition = 0; 

    final Rectangle pageSize;
    final float topMargin;
    final float bottomMargin;
    final float gap;
}

(PdfVeryDenseMergeTool.java)

This tool makes use of a custom RenderListener for use with the iText parser API:

public class PageVerticalAnalyzer implements RenderListener
{
    @Override
    public void beginTextBlock() { }
    @Override
    public void endTextBlock() { }

    /*
     * @see RenderListener#renderText(TextRenderInfo)
     */
    @Override
    public void renderText(TextRenderInfo renderInfo)
    {
        LineSegment ascentLine = renderInfo.getAscentLine();
        LineSegment descentLine = renderInfo.getDescentLine();
        float[] yCoords = new float[]{
                ascentLine.getStartPoint().get(Vector.I2),
                ascentLine.getEndPoint().get(Vector.I2),
                descentLine.getStartPoint().get(Vector.I2),
                descentLine.getEndPoint().get(Vector.I2)
        };
        Arrays.sort(yCoords);
        addVerticalUseSection(yCoords[0], yCoords[3]);
    }

    /*
     * @see RenderListener#renderImage(ImageRenderInfo)
     */
    @Override
    public void renderImage(ImageRenderInfo renderInfo)
    {
        Matrix ctm = renderInfo.getImageCTM();
        float[] yCoords = new float[4];
        for (int x=0; x < 2; x++)
            for (int y=0; y < 2; y++)
            {
                Vector corner = new Vector(x, y, 1).cross(ctm);
                yCoords[2*x+y] = corner.get(Vector.I2);
            }
        Arrays.sort(yCoords);
        addVerticalUseSection(yCoords[0], yCoords[3]);
    }

    /**
     * This method marks the given interval as used.
     */
    void addVerticalUseSection(float from, float to)
    {
        if (to < from)
        {
            float temp = to;
            to = from;
            from = temp;
        }

        int i=0, j=0;
        for (; i<verticalFlips.size(); i++)
        {
            float flip = verticalFlips.get(i);
            if (flip < from)
                continue;

            for (j=i; j<verticalFlips.size(); j++)
            {
                flip = verticalFlips.get(j);
                if (flip < to)
                    continue;
                break;
            }
            break;
        }
        boolean fromOutsideInterval = i%2==0;
        boolean toOutsideInterval = j%2==0;

        while (j-- > i)
            verticalFlips.remove(j);
        if (toOutsideInterval)
            verticalFlips.add(i, to);
        if (fromOutsideInterval)
            verticalFlips.add(i, from);
    }

    final List<Float> verticalFlips = new ArrayList<Float>();
}

(PageVerticalAnalyzer.java)

It is used like this:

PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(PageSize.A4, 18, 18, 5);
tool.merge(output, inputs);

(VeryDenseMerging.java)

Applied to the OP's sample documents

Header.pdf

Body.pdf

Footer.pdf

it generates

If one defines the target document page size to be A5 landscape:

PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(new RectangleReadOnly(595,421), 18, 18, 5);
tool.merge(output, inputs);

(VeryDenseMerging.java)

it generates this:

Beware! This is only a proof of concept and it does not consider all possibilities. E.g. the case of source or target pages with a non-trivial Rotate value is not properly handled. Thus, it is not ready for production use yet.


Improvement in current (5.5.6 SNAPSHOT) iText version

The current iText development version towards 5.5.6 enhances the parser functionality to also signal vector graphics. Thus, I extended the PageVerticalAnalyzer to make use of this:

public class PageVerticalAnalyzer implements ExtRenderListener
{
    @Override
    public void beginTextBlock() { }
    @Override
    public void endTextBlock() { }
    @Override
    public void clipPath(int rule) { }
    ...
    static class SubPathSection
    {
        public SubPathSection(float x, float y, Matrix m)
        {
            float effectiveY = getTransformedY(x, y, m);
            pathFromY = effectiveY;
            pathToY = effectiveY;
        }

        void extendTo(float x, float y, Matrix m)
        {
            float effectiveY = getTransformedY(x, y, m);
            if (effectiveY < pathFromY)
                pathFromY = effectiveY;
            else if (effectiveY > pathToY)
                pathToY = effectiveY;
        }

        float getTransformedY(float x, float y, Matrix m)
        {
            return new Vector(x, y, 1).cross(m).get(Vector.I2);
        }

        float getFromY()
        {
            return pathFromY;
        }

        float getToY()
        {
            return pathToY;
        }

        private float pathFromY;
        private float pathToY;
    }

    /*
     * Beware: The implementation is not correct as it includes the control points of curves
     * which may be far outside the actual curve.
     * 
     * @see ExtRenderListener#modifyPath(PathConstructionRenderInfo)
     */
    @Override
    public void modifyPath(PathConstructionRenderInfo renderInfo)
    {
        Matrix ctm = renderInfo.getCtm();
        List<Float> segmentData = renderInfo.getSegmentData();

        switch (renderInfo.getOperation())
        {
        case PathConstructionRenderInfo.MOVETO:
            subPath = null;
        case PathConstructionRenderInfo.LINETO:
        case PathConstructionRenderInfo.CURVE_123:
        case PathConstructionRenderInfo.CURVE_13:
        case PathConstructionRenderInfo.CURVE_23:
            for (int i = 0; i < segmentData.size()-1; i+=2)
            {
                if (subPath == null)
                {
                    subPath = new SubPathSection(segmentData.get(i), segmentData.get(i+1), ctm);
                    path.add(subPath);
                }
                else
                    subPath.extendTo(segmentData.get(i), segmentData.get(i+1), ctm);
            }
            break;
        case PathConstructionRenderInfo.RECT:
            float x = segmentData.get(0);
            float y = segmentData.get(1);
            float w = segmentData.get(2);
            float h = segmentData.get(3);
            SubPathSection section = new SubPathSection(x, y, ctm);
            section.extendTo(x+w, y, ctm);
            section.extendTo(x, y+h, ctm);
            section.extendTo(x+w, y+h, ctm);
            path.add(section);
        case PathConstructionRenderInfo.CLOSE:
            subPath = null;
            break;
        default:
        }
    }

    /*
     * @see ExtRenderListener#renderPath(PathPaintingRenderInfo)
     */
    @Override
    public Path renderPath(PathPaintingRenderInfo renderInfo)
    {
        if (renderInfo.getOperation() != PathPaintingRenderInfo.NO_OP)
        {
            for (SubPathSection section : path)
                addVerticalUseSection(section.getFromY(), section.getToY());
        }

        path.clear();
        subPath = null;
        return null;
    }

    List<SubPathSection> path = new ArrayList<SubPathSection>();
    SubPathSection subPath = null;
    ...
}

(PageVerticalAnalyzer.java)

A simple test (VeryDenseMerging.java method testMergeOnlyGraphics) merges these files

into this:

But once again beware: this is a mere proof of concept. Especially modifyPath() needs to be improved, the implementation is not correct as it includes the control points of curves which may be far outside the actual curve.

这篇关于如何在合并删除空白的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆