Excel VBA可以搜索PDF中的文本并提取和命名页 [英] Excel VBA to Search for Text in PDF and Extract and Name Pages

查看：691 发布时间：2020/5/25 3:58:08 excel vba pdf acrobat

本文介绍了Excel VBA可以搜索PDF中的文本并提取和命名页的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我有以下代码，该代码查看电子表格的A列中的每个单元格，在指定的PDF中搜索它在那里找到的文本，然后提取找到该文本为PDF的页面，并用电子表格单元格中的值.该代码有效，但速度很慢，我可能需要在PDF中搜索多达200个单词，而这些单词可能长达600页.有没有办法使代码更快?当前，它在每个单元格中循环搜索，在每个页面中循环搜索每个单词，直到在该单元格中找到单词为止.

I have the following code, which looks at each cell in column A of my spreadsheet, searches for the text it finds there in the specified PDF and then extracts the page where it finds the text as a PDF, naming it with the value in the cell of the spreadsheet. The code works but is rather slow, I may need to search for as many as 200 words in a PDF which could be as long as 600 pages. Is there a way to make the code faster? Currently it loops through each cell searches through each page looping through each word until it finds the word in the cell.

    Sub test_with_PDF()

    Dim objApp As Object
    Dim objPDDoc As Object
    Dim objjso As Object
    Dim wordsCount As Long
    Dim page As Long
    Dim i As Long
    Dim strData As String
    Dim strFileName As String
    Dim lastrow As Long, c As Range
    Dim PageNos As Integer
    Dim newPDF As Acrobat.CAcroPDDoc
    Dim NewName As String
    Dim Folder As String
    lastrow = Sheets("Sheet1").Cells(Rows.Count, "A").End(xlUp).Row

    strFileName = selectFile()
    Folder = GetFolder()

    Set objApp = CreateObject("AcroExch.App")
    Set objPDDoc = CreateObject("AcroExch.PDDoc")
    'AD.1 open file, if =false file is damage
    If objPDDoc.Open(strFileName) Then
        Set objjso = objPDDoc.GetJSObject

 PageNos = 0
 For Each c In Sheets("Sheet1").Range("A2:A" & lastrow)

        For page = 0 To objPDDoc.GetNumPages - 1
            wordsCount = objjso.GetPageNumWords(page)
            For i = 0 To wordsCount

                If InStr(1, c.Value, ", ") = 0 Then

                    If objjso.getPageNthWord(page, i) = c.Value Then
                        PageNos = PageNos + 1
                        If FileExist(Folder & "\" & c.Offset(0, 4) & ".pdf") Then

                                Set newPDF = CreateObject("AcroExch.pdDoc")
                                NewName = Folder & "\" & c.Offset(0, 4) & ".pdf"
                                newPDF.Open (NewName)
                                newPDF.InsertPages lngPages, objPDDoc, page, 1, 0
                                newPDF.Save 1, NewName
                                newPDF.Close
                                Set newPDF = Nothing
                                Exit For
                         Else
                                Set newPDF = CreateObject("AcroExch.PDDoc")
                                newPDF.Create
                                NewName = Folder & "\" & c.Offset(0, 4) & ".pdf"
                                newPDF.InsertPages -1, objPDDoc, page, 1, 0
                                newPDF.Save 1, NewName
                                newPDF.Close
                                Set newPDF = Nothing
                                Exit For

                        End If
                    End If
                Else

                If objjso.getPageNthWord(page, i) = c.Offset(0, 1).Value Then
                    If objjso.getPageNthWord(page, i + 1) = c.Offset(0, 2).Value Then
                        PageNos = PageNos + 1
                         If FileExist(Folder & "\" & c.Offset(0, 4) & ".pdf") Then

                                Set newPDF = CreateObject("AcroExch.pdDoc")
                                NewName = Folder & "\" & c.Offset(0, 4) & ".pdf"
                                newPDF.Open (NewName)
                                newPDF.InsertPages lngPages, objPDDoc, page, 1, 0
                                newPDF.Save 1, NewName
                                newPDF.Close
                                Set newPDF = Nothing
                                Exit For
                         Else
                                Set newPDF = CreateObject("AcroExch.PDDoc")
                                newPDF.Create
                                NewName = Folder & "\" & c.Offset(0, 4) & ".pdf"
                                newPDF.InsertPages -1, objPDDoc, page, 1, 0
                                newPDF.Save 1, NewName
                                newPDF.Close
                                Set newPDF = Nothing
                                Exit For

                        End If
                        Exit For
                    End If
                End If
            End If
            Next i
        Next page
        c.Offset(0, 3).Value = PageNos
        PageNos = 0
    Next c
    MsgBox "Done"
    Else
        MsgBox "error!"
    End If
End Sub

Function FileExist(path As String) As Boolean
    If Dir(path) <> vbNullString Then FileExist = True
End Function
Function selectFile()
Dim fd As FileDialog, fileName As String

On Error GoTo ErrorHandler

Set fd = Application.FileDialog(msoFileDialogFilePicker)

fd.AllowMultiSelect = False

If fd.Show = True Then
    If fd.SelectedItems(1) <> vbNullString Then
        fileName = fd.SelectedItems(1)
    End If
Else
    'Exit code if no file is selected
    End
End If

'Return Selected FileName
selectFile = fileName

Set fd = Nothing

Exit Function

ErrorHandler:
Set fd = Nothing
MsgBox "Error " & Err & ": " & Error(Err)

End Function
Function GetFolder() As String
    Dim fldr As FileDialog
    Dim sItem As String
    Set fldr = Application.FileDialog(msoFileDialogFolderPicker)
    With fldr
        .Title = "Select the Folder where you want you new PDFs to go"
        .AllowMultiSelect = False
        .InitialFileName = Application.DefaultFilePath
        If .Show <> -1 Then GoTo NextCode
        sItem = .SelectedItems(1)
    End With
NextCode:
    GetFolder = sItem
    Set fldr = Nothing
End Function

非常感谢.

Excel VBA可以搜索PDF中的文本并提取和命名页 [英] Excel VBA to Search for Text in PDF and Extract and Name Pages

问题描述

推荐答案

相关文章

其他开发最新文章

热门教程

热门工具

登录关闭

Excel VBA可以搜索PDF中的文本并提取和命名页 [英] Excel VBA to Search for Text in PDF and Extract and Name Pages

问题描述

推荐答案

相关文章

其他开发最新文章

热门教程

热门工具

登录 关闭

登录关闭