搜索PDF文本，在获取坐标后通过绘制矩形突出显示找到的单词保存PDF并突出显示文本 [英] search PDF text, highlight found words by drawing rectangle after getting their coordinates save PDF with text highlighted

查看：147 发布时间：2018/11/16 17:31:13 c# pdf itextsharp

本文介绍了搜索PDF文本，在获取坐标后通过绘制矩形突出显示找到的单词保存PDF并突出显示文本的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

任何人都可以提供如何获取文本坐标的帮助？这有可能吗？因为我只想要一个Windows窗体应用程序，用户在文本框中键入一个单词，应用程序使用iTextSharp读取现有PDF，突出显示匹配的单词（如果找到），并使用突出显示的文本保存PDF。到目前为止我几乎已经完成了所有工作，包括绘制黄色矩形，但缺少的是如何获取匹配模式的文本坐标以突出显示它们，提前感谢:(顺便说一句：sb是搜索文本框，tb是一个富文本框，其中展示了PDF文本）

Anyone can help with how to get a text coordinates? can this be possible? because I just wanted a windows form app where the user types a word in a text box, and the app reads existing PDF using iTextSharp, highlights the matched words if found, and saves the PDF with highlighted text. so far i have almost everything done, including the drawing of a yellow rectangle, but what is lacking is how to get the text coordinates of the matched patterns to highlight them, thanks in advance: (by the way: sb is the search text box, tb is a rich text box where the PDF text is exhibited)

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.IO;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
using iTextSharp.text;
using System.Text.RegularExpressions;

namespace manipulatePDF
{
    public partial class Form1 : Form
    {
        string oldFile;
        Document document = new Document();
        StringBuilder text = new StringBuilder();
    public Form1()
    {
        InitializeComponent();
    }
    private void open_Click(object sender, EventArgs e)
    {
        reset_Click(sender, e);

        openFileDialog1.Filter = "PDF Files (.pdf)|*.pdf";
        openFileDialog1.FilterIndex = 1;

        if (openFileDialog1.ShowDialog() == System.Windows.Forms.DialogResult.OK)
        {
            label1.Text = "File Location: " + openFileDialog1.FileName;
            oldFile = openFileDialog1.FileName;

            // open the reader
            PdfReader reader = new PdfReader(oldFile);

            iTextSharp.text.Rectangle size = reader.GetPageSizeWithRotation(1);
            document.SetPageSize(size);

            for (int cPage = 1; cPage <= reader.NumberOfPages; cPage++)
            {
                ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                string currentText = PdfTextExtractor.GetTextFromPage(reader, cPage, strategy);
                currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
                text.Append(currentText);
                reader.Close();
            }
            tb.Text = text.ToString();
        }
    }
    private void save_Click(object sender, EventArgs e)
    {
        saveFileDialog1.InitialDirectory = "C: ";
        saveFileDialog1.Title = "Save the PDF File";
        saveFileDialog1.Filter = "PDF files (*.pdf)|*.pdf";

        if (saveFileDialog1.ShowDialog() == System.Windows.Forms.DialogResult.OK)
        {
            PdfReader reader = new PdfReader(oldFile);
            string newFile = saveFileDialog1.FileName;

            // open the writer
            FileStream fs = new FileStream(newFile, FileMode.Create, FileAccess.Write);
            PdfWriter writer = PdfWriter.GetInstance(document, fs);

            document.Open();

            // the pdf content
            PdfContentByte cb = writer.DirectContent;

            // select the font properties
            PdfGState graphicsState = new PdfGState();
            graphicsState.FillOpacity = 10;
            cb.SetGState(graphicsState);

            int index = 0;
            while (index < text.ToString().LastIndexOf(sb.Text))
            {
                if (contain.Checked == true)
                {
                    tb.Find(sb.Text, index, tb.TextLength, RichTextBoxFinds.MatchCase);
                    tb.SelectionBackColor = Color.Gold;
                    index = tb.Text.IndexOf(sb.Text, index) + 1;
                }
                else if (exact.Checked == true)
                {
                    tb.Find(sb.Text, index, tb.TextLength, RichTextBoxFinds.WholeWord);
                    tb.SelectionBackColor = Color.Gold;
                    index = tb.Text.IndexOf(sb.Text, index) + 1;
                }
            }

            int count = 0; //counts the pattern occurance
            for (int cPage = 1; cPage <= reader.NumberOfPages; cPage++)
            {
                ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                string currentText = PdfTextExtractor.GetTextFromPage(reader, cPage, strategy);
                currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
                string textToSearch = sb.Text;
                int lastStartIndex = currentText.IndexOf(textToSearch, 0, StringComparison.CurrentCulture);

                while (lastStartIndex != -1)//if the pattern was found
                {
                    count++;
                    lastStartIndex = currentText.IndexOf(textToSearch, lastStartIndex + 1, StringComparison.CurrentCulture);

                    BaseFont bf = BaseFont.CreateFont(BaseFont.HELVETICA, BaseFont.CP1252, BaseFont.NOT_EMBEDDED);
                    cb.SetFontAndSize(bf, 10);

                    cb.SetColorFill(new CMYKColor(0f, 0f, 1f, 0f));
                    cb.Rectangle(document.PageSize.Width - 500f, 600f, 100f, 100f);
                    cb.Fill();
                }

                if (count != 0)
                {
                    if (contain.Checked == true)
                    {
                        label2.Text = "Number of pages: " + cPage + " - " + textToSearch + " found " + count + " times. \n";
                    }
                    else if (exact.Checked == true)
                    {
                        //finds the words that are bounded by a space or a dot and store in cCount
                        //returns the count of matched pattern = count - cCount
                    }
                }

                text.Append(currentText);
                // create the new page and add it to the pdf
                PdfImportedPage page = writer.GetImportedPage(reader, cPage);
                cb.AddTemplate(page, 0, 0);

                document.NewPage();
                //PdfStamper stamper = new PdfStamper(reader, fs);
                ////Create a rectangle for the highlight. NOTE: Technically this isn't used but it helps with the quadpoint calculation
                //iTextSharp.text.Rectangle rect = new iTextSharp.text.Rectangle(60.6755f, 749.172f, 94.0195f, 735.3f);
                ////Create an array of quad points based on that rectangle. NOTE: The order below doesn't appear to match the actual spec but is what Acrobat produces
                //float[] quad = { rect.Left, rect.Bottom, rect.Right, rect.Bottom, rect.Left, rect.Top, rect.Right, rect.Top };

                ////Create our hightlight
                //PdfAnnotation highlight = PdfAnnotation.CreateMarkup(stamper.Writer, rect, null, PdfAnnotation.MARKUP_HIGHLIGHT, quad);

                ////Set the color
                //highlight.Color = BaseColor.YELLOW;

                ////Add the annotation
                //stamper.AddAnnotation(highlight, 1);
            }

            // close the streams
            document.Close();
            fs.Close();
            writer.Close();
            reader.Close();
        }
    }
    private void reset_Click(object sender, EventArgs e)
    {
        tb.Text = "";
    }
}

搜索PDF文本，在获取坐标后通过绘制矩形突出显示找到的单词保存PDF并突出显示文本 [英] search PDF text, highlight found words by drawing rectangle after getting their coordinates save PDF with text highlighted

问题描述

推荐答案

相关文章

C#/.NET最新文章

热门教程

热门工具

登录关闭

搜索PDF文本，在获取坐标后通过绘制矩形突出显示找到的单词保存PDF并突出显示文本 [英] search PDF text, highlight found words by drawing rectangle after getting their coordinates save PDF with text highlighted

问题描述

推荐答案

相关文章

C#/.NET最新文章

热门教程

热门工具

登录 关闭

登录关闭