在Excel中使用正则表达式的通用UDF [英] General Purpose UDFs for using Regular Expressions in Excel

查看:88
本文介绍了在Excel中使用正则表达式的通用UDF的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我需要每周解析和总结几千条文本行。 Excel通配符不够灵活,我想删除额外的步骤,粘贴到记事本++中以处理或馈送到脚本。

I need to parse and summarize and batches of several thousand text lines on a weekly basis. Excel wildcards weren't flexible enough, and I wanted to remove the extra step of either pasting into Notepad++ for processing or feeding to a script.

这里是我来的工具与。他们还是有点慢 - 可能在公司的笔记本电脑上每秒3000行,但是它们很方便。

Here are the tools I came up with. They're still a bit slow -- perhaps 3000 lines per second on a company laptop -- but they are handy.

RXMatch - 返回第一个匹配,返回一个子组的选项。

RXMatch -- return first match, option to return a subgroup.

=RXMatch("Apple","A(..)",1) -> "pp"

RXCount - 计数匹配数量

RXCount -- count number of matches

=RXCount("Apple","p") -> 2

RXPrint - 将第一个匹配和/或子组嵌入到模板中字符串

RXPrint -- embed first match and/or subgroups into a template string

=RXPrint("Apple","(\S)\S+","\1 is for \0") -> "A is for Apple"

RXPrintAll - 将每个匹配嵌入到模板字符串,加入结果

RXPrintAll -- embed each match into a template string, join the results

=RXPrintAll("Apple Banana","(\S)\S+","\1 is for \0") -> "A is for Apple, B is for Banana"

RXMatches 返回一个垂直的匹配数组,返回一个子组的选项

RXMatches -- return a vertical array of matches, option to return a subgroup

=RXMatches("Apple Banana","\S+") -> {"Apple";"Banana"}


推荐答案

RXMatch

Public Function RXMatch(Text As String, Pattern As String, Optional Group As Integer = 0, Optional IgnoreCase As Boolean = True) As String
    Dim retval As String
    ' Takes a string and returns the matching text
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' Group (optional) selects a parenthesized group (count the number of left parentheses preceding it to get the group number)
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim RE As Object
    Dim Matches As Object

    Set RE = CreateObject("vbscript.regexp")
    RE.IgnoreCase = IgnoreCase
    RE.Pattern = Pattern

    Set Matches = RE.Execute(Text)

    If (Matches.Count > 0) Then
        If (Group > 0) Then
            retval = Matches(0).submatches(Group - 1)
        Else
            retval = Matches(0)
        End If
    Else
        retval = ""
    End If

    RXMatch = retval
End Function

RXCount

Public Function RXCount(Text As String, Pattern As String, Optional IgnoreCase As Boolean = True) As Integer
    Dim retval As Integer
    ' Counts the number of matches
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim RE As Object
    Dim Matches As Object

    Set RE = CreateObject("vbscript.regexp")
    RE.IgnoreCase = IgnoreCase
    RE.Global = True

    RE.Pattern = Pattern
    Set Matches = RE.Execute(Text)

    retval = Matches.Count

    RXCount = retval
End Function

RXP rint

Public Function RXPrint(Text As String, Pattern As String, Optional Template As String = "\0", Optional IgnoreCase As Boolean = True) As String
    Dim retval As String
    ' Takes a string and returns a new string formatted according to the given template, using the first match found
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' Template (optional) is a string which should contain group identifiers (\0 - \9) to be substituted with groups in the match
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim REText, RETemplate As Object
    Dim MatchesText, MatchesTemplate As Object

    Set REText = CreateObject("vbscript.regexp")
    REText.IgnoreCase = IgnoreCase
    REText.Pattern = Pattern

    Set MatchesText = REText.Execute(Text)

    Set RETemplate = CreateObject("vbscript.regexp")
    RETemplate.Global = True
    RETemplate.Pattern = "(?:\\(.))|([^\\]+)"

    Set MatchesTemplate = RETemplate.Execute(Template)

    If (MatchesText.Count > 0) Then
        ReDim retArray(0 To MatchesTemplate.Count - 1) As String
        Dim escaped As String
        Dim plaintext As String
        For i = 0 To MatchesTemplate.Count - 1
            escaped = MatchesTemplate(i).submatches(0)
            plaintext = MatchesTemplate(i).submatches(1)
            If (Len(escaped) > 0) Then
                If (IsNumeric(escaped)) Then
                    Dim groupnum As Integer
                    groupnum = CInt(escaped)
                    If groupnum = 0 Then
                        retArray(i) = MatchesText(0)
                    ElseIf (groupnum > MatchesText(0).submatches.Count) Then
                        retArray(i) = "?"
                    Else
                        retArray(i) = MatchesText(0).submatches(groupnum - 1)
                    End If
                Else
                    retArray(i) = escaped
                End If
            Else
                retArray(i) = plaintext
            End If
        Next i
        retval = Join(retArray, "")
    Else
        retval = ""
    End If

    RXPrint = retval
End Function

RXPrintAll

RXPrintAll

Public Function RXPrintAll(Text As String, Pattern As String, Optional Template As String = "\0", Optional Delimiter As String = ", ", Optional IgnoreCase As Boolean = True) As String
    Dim retval As String
    ' Takes a string and returns a new string formatted according to the given template, repeated for each match
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' Template (optional) is a string which should contain group identifiers (\0 - \9) to be substituted with groups in the match
    ' Delimiter (optional) specified how the results will be joined
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim REText, RETemplate As Object
    Dim MatchesText, MatchesTemplate As Object

    Set REText = CreateObject("vbscript.regexp")
    REText.IgnoreCase = IgnoreCase
    REText.Global = True
    REText.Pattern = Pattern

    Set MatchesText = REText.Execute(Text)

    Set RETemplate = CreateObject("vbscript.regexp")
    RETemplate.Global = True
    RETemplate.Pattern = "(?:\\(.))|([^\\]+)"

    Set MatchesTemplate = RETemplate.Execute(Template)

    If (MatchesText.Count > 0) Then
        ReDim retArrays(0 To MatchesText.Count - 1)
        For j = 0 To MatchesText.Count - 1
            ReDim retArray(0 To MatchesTemplate.Count - 1) As String
            Dim escaped As String
            Dim plaintext As String
            For i = 0 To MatchesTemplate.Count - 1
                escaped = MatchesTemplate(i).submatches(0)
                plaintext = MatchesTemplate(i).submatches(1)
                If (Len(escaped) > 0) Then
                    If (IsNumeric(escaped)) Then
                        Dim groupnum As Integer
                        groupnum = CInt(escaped)
                        If groupnum = 0 Then
                            retArray(i) = MatchesText(j)
                        ElseIf (groupnum > MatchesText(j).submatches.Count) Then
                            retArray(i) = "?"
                        Else
                            retArray(i) = MatchesText(j).submatches(groupnum - 1)
                        End If
                    Else
                        retArray(i) = escaped
                    End If
                Else
                    retArray(i) = plaintext
                End If
            Next i
            retArrays(j) = Join(retArray, "")
        Next j
        retval = Join(retArrays, Delimiter)
    Else
        retval = ""
    End If
    RXPrintAll = retval
End Function

RXMatches

RXMatches

Public Function RXMatches(Text As String, Pattern As String, Optional Group As Integer = 0, Optional IgnoreCase As Boolean = True) As Variant
    Dim retval() As String
    ' Takes a string and returns all matches in a vertical array
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' Group (optional) selects a parenthesized group (count the number of left parentheses preceding it to get the group number)
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim RE As Object
    Dim Matches As Object

    Set RE = CreateObject("vbscript.regexp")
    RE.IgnoreCase = IgnoreCase
    RE.Global = True
    RE.Pattern = Pattern

    Set Matches = RE.Execute(Text)

    If (Matches.Count > 0) Then
        ReDim retval(0 To Matches.Count - 1)
        For i = 0 To Matches.Count - 1
            If (Group > 0) Then
                retval(i) = Matches(i).submatches(Group - 1)
            Else
                retval(i) = Matches(i)
            End If
        Next i
    Else
        ReDim retval(1)
        retval(0) = ""
    End If

    RXMatches = Application.Transpose(retval)
End Function

这篇关于在Excel中使用正则表达式的通用UDF的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆