在Excel中使用正则表达式的通用UDF [英] General Purpose UDFs for using Regular Expressions in Excel
问题描述
我需要每周解析和总结几千条文本行。 Excel通配符不够灵活,我想删除额外的步骤,粘贴到记事本++中以处理或馈送到脚本。
I need to parse and summarize and batches of several thousand text lines on a weekly basis. Excel wildcards weren't flexible enough, and I wanted to remove the extra step of either pasting into Notepad++ for processing or feeding to a script.
这里是我来的工具与。他们还是有点慢 - 可能在公司的笔记本电脑上每秒3000行,但是它们很方便。
Here are the tools I came up with. They're still a bit slow -- perhaps 3000 lines per second on a company laptop -- but they are handy.
RXMatch - 返回第一个匹配,返回一个子组的选项。
RXMatch -- return first match, option to return a subgroup.
=RXMatch("Apple","A(..)",1) -> "pp"
RXCount - 计数匹配数量
RXCount -- count number of matches
=RXCount("Apple","p") -> 2
RXPrint - 将第一个匹配和/或子组嵌入到模板中字符串
RXPrint -- embed first match and/or subgroups into a template string
=RXPrint("Apple","(\S)\S+","\1 is for \0") -> "A is for Apple"
RXPrintAll - 将每个匹配嵌入到模板字符串,加入结果
RXPrintAll -- embed each match into a template string, join the results
=RXPrintAll("Apple Banana","(\S)\S+","\1 is for \0") -> "A is for Apple, B is for Banana"
RXMatches 返回一个垂直的匹配数组,返回一个子组的选项
RXMatches -- return a vertical array of matches, option to return a subgroup
=RXMatches("Apple Banana","\S+") -> {"Apple";"Banana"}
推荐答案
RXMatch
Public Function RXMatch(Text As String, Pattern As String, Optional Group As Integer = 0, Optional IgnoreCase As Boolean = True) As String
Dim retval As String
' Takes a string and returns the matching text
' Text is the string to be searched
' Pattern is the regex pattern
' Group (optional) selects a parenthesized group (count the number of left parentheses preceding it to get the group number)
' IgnoreCase (optional) set to False for a case-sensitive search
Dim RE As Object
Dim Matches As Object
Set RE = CreateObject("vbscript.regexp")
RE.IgnoreCase = IgnoreCase
RE.Pattern = Pattern
Set Matches = RE.Execute(Text)
If (Matches.Count > 0) Then
If (Group > 0) Then
retval = Matches(0).submatches(Group - 1)
Else
retval = Matches(0)
End If
Else
retval = ""
End If
RXMatch = retval
End Function
RXCount
Public Function RXCount(Text As String, Pattern As String, Optional IgnoreCase As Boolean = True) As Integer
Dim retval As Integer
' Counts the number of matches
' Text is the string to be searched
' Pattern is the regex pattern
' IgnoreCase (optional) set to False for a case-sensitive search
Dim RE As Object
Dim Matches As Object
Set RE = CreateObject("vbscript.regexp")
RE.IgnoreCase = IgnoreCase
RE.Global = True
RE.Pattern = Pattern
Set Matches = RE.Execute(Text)
retval = Matches.Count
RXCount = retval
End Function
RXP rint
Public Function RXPrint(Text As String, Pattern As String, Optional Template As String = "\0", Optional IgnoreCase As Boolean = True) As String
Dim retval As String
' Takes a string and returns a new string formatted according to the given template, using the first match found
' Text is the string to be searched
' Pattern is the regex pattern
' Template (optional) is a string which should contain group identifiers (\0 - \9) to be substituted with groups in the match
' IgnoreCase (optional) set to False for a case-sensitive search
Dim REText, RETemplate As Object
Dim MatchesText, MatchesTemplate As Object
Set REText = CreateObject("vbscript.regexp")
REText.IgnoreCase = IgnoreCase
REText.Pattern = Pattern
Set MatchesText = REText.Execute(Text)
Set RETemplate = CreateObject("vbscript.regexp")
RETemplate.Global = True
RETemplate.Pattern = "(?:\\(.))|([^\\]+)"
Set MatchesTemplate = RETemplate.Execute(Template)
If (MatchesText.Count > 0) Then
ReDim retArray(0 To MatchesTemplate.Count - 1) As String
Dim escaped As String
Dim plaintext As String
For i = 0 To MatchesTemplate.Count - 1
escaped = MatchesTemplate(i).submatches(0)
plaintext = MatchesTemplate(i).submatches(1)
If (Len(escaped) > 0) Then
If (IsNumeric(escaped)) Then
Dim groupnum As Integer
groupnum = CInt(escaped)
If groupnum = 0 Then
retArray(i) = MatchesText(0)
ElseIf (groupnum > MatchesText(0).submatches.Count) Then
retArray(i) = "?"
Else
retArray(i) = MatchesText(0).submatches(groupnum - 1)
End If
Else
retArray(i) = escaped
End If
Else
retArray(i) = plaintext
End If
Next i
retval = Join(retArray, "")
Else
retval = ""
End If
RXPrint = retval
End Function
RXPrintAll
RXPrintAll
Public Function RXPrintAll(Text As String, Pattern As String, Optional Template As String = "\0", Optional Delimiter As String = ", ", Optional IgnoreCase As Boolean = True) As String
Dim retval As String
' Takes a string and returns a new string formatted according to the given template, repeated for each match
' Text is the string to be searched
' Pattern is the regex pattern
' Template (optional) is a string which should contain group identifiers (\0 - \9) to be substituted with groups in the match
' Delimiter (optional) specified how the results will be joined
' IgnoreCase (optional) set to False for a case-sensitive search
Dim REText, RETemplate As Object
Dim MatchesText, MatchesTemplate As Object
Set REText = CreateObject("vbscript.regexp")
REText.IgnoreCase = IgnoreCase
REText.Global = True
REText.Pattern = Pattern
Set MatchesText = REText.Execute(Text)
Set RETemplate = CreateObject("vbscript.regexp")
RETemplate.Global = True
RETemplate.Pattern = "(?:\\(.))|([^\\]+)"
Set MatchesTemplate = RETemplate.Execute(Template)
If (MatchesText.Count > 0) Then
ReDim retArrays(0 To MatchesText.Count - 1)
For j = 0 To MatchesText.Count - 1
ReDim retArray(0 To MatchesTemplate.Count - 1) As String
Dim escaped As String
Dim plaintext As String
For i = 0 To MatchesTemplate.Count - 1
escaped = MatchesTemplate(i).submatches(0)
plaintext = MatchesTemplate(i).submatches(1)
If (Len(escaped) > 0) Then
If (IsNumeric(escaped)) Then
Dim groupnum As Integer
groupnum = CInt(escaped)
If groupnum = 0 Then
retArray(i) = MatchesText(j)
ElseIf (groupnum > MatchesText(j).submatches.Count) Then
retArray(i) = "?"
Else
retArray(i) = MatchesText(j).submatches(groupnum - 1)
End If
Else
retArray(i) = escaped
End If
Else
retArray(i) = plaintext
End If
Next i
retArrays(j) = Join(retArray, "")
Next j
retval = Join(retArrays, Delimiter)
Else
retval = ""
End If
RXPrintAll = retval
End Function
RXMatches
RXMatches
Public Function RXMatches(Text As String, Pattern As String, Optional Group As Integer = 0, Optional IgnoreCase As Boolean = True) As Variant
Dim retval() As String
' Takes a string and returns all matches in a vertical array
' Text is the string to be searched
' Pattern is the regex pattern
' Group (optional) selects a parenthesized group (count the number of left parentheses preceding it to get the group number)
' IgnoreCase (optional) set to False for a case-sensitive search
Dim RE As Object
Dim Matches As Object
Set RE = CreateObject("vbscript.regexp")
RE.IgnoreCase = IgnoreCase
RE.Global = True
RE.Pattern = Pattern
Set Matches = RE.Execute(Text)
If (Matches.Count > 0) Then
ReDim retval(0 To Matches.Count - 1)
For i = 0 To Matches.Count - 1
If (Group > 0) Then
retval(i) = Matches(i).submatches(Group - 1)
Else
retval(i) = Matches(i)
End If
Next i
Else
ReDim retval(1)
retval(0) = ""
End If
RXMatches = Application.Transpose(retval)
End Function
这篇关于在Excel中使用正则表达式的通用UDF的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!