常规模块

常规模块

下面的 VBA 代码从给定的数据中提取最常见的重复对和三元组,需要 6 列数据(A1、B1、C1、D1、E1、F1),并显示它们的出现次数。我希望此代码从包含 22 列数据(A1、B1、C1、....U1、V1)的数据集中仅提取四元组,并以类似的方式显示它们的出现次数。我不确定这是否是 Excel 无法处理的太多数据。

Sub MostCommonPairAndTriplet()
Dim rng As Range
Dim c As Range
Dim strPair As String
Dim strTriplet As String
Dim wsResult As Worksheet
Dim lRow As Long
Dim lRow2 As Long
Dim i As Integer
Dim j As Integer

Application.ScreenUpdating = False
Application.DisplayAlerts = False

Set rng = Intersect(ActiveSheet.UsedRange, ActiveSheet.Range("A:F"))

If Not rng Is Nothing Then

'Get the result worksheet
On Error Resume Next
Set wsResult = ActiveWorkbook.Worksheets("Results")
If wsResult Is Nothing Then
Set wsResult = ActiveWorkbook.Worksheets.Add
wsResult.Name = "Results"
Else
wsResult.UsedRange.Delete
End If
'column labels
With wsResult
.Range("B1").Value = "Value1"
.Range("C1").Value = "Value2"
.Range("D1").Value = "Count"
.Range("F1").Value = "Value1"
.Range("G1").Value = "Value2"
.Range("H1").Value = "Value3"
.Range("I1").Value = "Count"
End With
On Error GoTo 0

'Find Pairs
lRow = 2
For Each c In rng
    If c.Column <= 5 Then
        For i = 1 To 6 - c.Column
            strPair = c.Value & "_" & c.Offset(0, i).Value

            On Error Resume Next
            lRow2 = Application.WorksheetFunction.Match(strPair, wsResult.Range("A:A"), False)
            If Err.Number > 0 Then
                wsResult.Range("A" & lRow).Value = strPair
                wsResult.Range("B" & lRow).Value = c.Value
                wsResult.Range("C" & lRow).Value = c.Offset(0, i).Value
                wsResult.Range("D" & lRow).Value = 1
                lRow = lRow + 1
            Else
                wsResult.Range("D" & lRow2).Value = wsResult.Range("D" & lRow2).Value + 1
            End If
            On Error GoTo 0
        Next i
    End If
Next c

'Find Triplets
lRow = 2
For Each c In rng
    If c.Column <= 5 Then
        For i = 1 To 6 - c.Column
            For j = 1 To 6 - c.Offset(0, i).Column
                strTriplet = c.Value & "_" & c.Offset(0, i).Value & "_" & c.Offset(0, i + j).Value

                On Error Resume Next
                lRow2 = Application.WorksheetFunction.Match(strTriplet, wsResult.Range("E:E"), False)
                If Err.Number > 0 Then
                    wsResult.Range("E" & lRow).Value = strTriplet
                    wsResult.Range("F" & lRow).Value = c.Value
                    wsResult.Range("G" & lRow).Value = c.Offset(0, i).Value
                    wsResult.Range("H" & lRow).Value = c.Offset(0, i + j).Value
                    wsResult.Range("I" & lRow).Value = 1
                    lRow = lRow + 1
                Else
                    wsResult.Range("I" & lRow2).Value = wsResult.Range("I" & lRow2).Value + 1
                End If
                On Error GoTo 0
            Next j
        Next i
    End If
Next c
End If

wsResult.Columns("E").Clear
wsResult.Columns("A").Delete

'Sort the pairs
With wsResult
.Columns("A:C").Sort Key1:=.Range("C2"), Order1:=xlDescending
.Columns("E:H").Sort Key1:=.Range("H2"), Order1:=xlDescending
End With


Application.DisplayAlerts = True
Application.ScreenUpdating = True

子目录结束

答案1

由于处理每行 22 个单元格时可能出现大量四元组,因此我建议采用与双元组和三元组不同的方法。

我将创建一个用户定义对象(类)来包含有关四边形的内容和计数的信息。(我还引入了一个方法来创建四边形项目的数组)。然后,我将把四边形收集到一个字典对象中——在下面的代码中,我使用了早期绑定(设置一个引用,在 下Tools --> ReferencesMicrosoft Scripting Runtime但如果要分发,您可能应该更改为后期绑定。

当收集第一行之后的四边形时,我们可以使用字典来测试四边形是否已经存在;如果存在,我们将计数加一;如果不存在,我们将其存储为一个新的四边形。

通过查找 A 列中的最后一行和第 1 行中的最后一列来确定范围的大小。它假设您的数据从 A1 开始(如您在工作表上所示),并且没有标题行。如果不是这种情况,您可能需要进行一些调整。

它还假设每行中的条目已排序。如果不是这样,则需要在生成 Quad 之前添加排序例程。

编辑请注意,由于 Excel 的行数限制,如果您所需的输出包含超过 2^20 个四边形,则例程将崩溃(出现 1004 错误)。至少有两种方法可以处理此问题:

  • 增加阈值,以便仅输出计数为 2、3 或适合单个列集所需的任何数量的四边形(可能是最简单的方法)

  • 改变输出例程以便将输出分布到多组列上。

类模块

确保将其重命名为 cQuad

Option Explicit
'Rename cQuad
Private pQ1 As Long
Private pQ2 As Long
Private pQ3 As Long
Private pQ4 As Long
Private pCnt As Long
Private pArr As Variant

Public Property Get Q1() As Long
    Q1 = pQ1
End Property
Public Property Let Q1(Value As Long)
    pQ1 = Value
End Property

Public Property Get Q2() As Long
    Q2 = pQ2
End Property
Public Property Let Q2(Value As Long)
    pQ2 = Value
End Property

Public Property Get Q3() As Long
    Q3 = pQ3
End Property
Public Property Let Q3(Value As Long)
    pQ3 = Value
End Property

Public Property Get Q4() As Long
    Q4 = pQ4
End Property
Public Property Let Q4(Value As Long)
    pQ4 = Value
End Property

Public Property Get Arr() As Variant
    Dim V(1 To 4)
        V(1) = Me.Q1
        V(2) = Me.Q2
        V(3) = Me.Q3
        V(4) = Me.Q4
    Arr = V
End Property

Public Property Get Cnt() As Long
    Cnt = pCnt
End Property
Public Property Let Cnt(Value As Long)
    pCnt = Value
End Property

常规模块

Option Explicit
'Set Reference to Microsoft Scripting Runtime
Sub CheckForQuads()
    Dim cQ As cQuad, dQ As Dictionary
    Dim vSrc As Variant, vRes As Variant
    Dim I As Long, J As Long
    Dim wsData As Worksheet, wsRes As Worksheet, rRes As Range
    Dim V, W
    Dim sKey As String

Set wsData = Worksheets("Data")
Set wsRes = Worksheets("Results")
    Set rRes = wsRes.Cells(1, 10)

With wsData
    I = .Cells(.Rows.Count, 1).End(xlUp).Row 'Last Row
    J = .Cells(1, .Columns.Count).End(xlToLeft).Column 'Last Column
    vSrc = .Range(.Cells(1, 1), .Cells(I, J))
End With

Set dQ = New Dictionary
For I = 1 To UBound(vSrc, 1)

    'Size array for number of combos in each row
    V = Combos(Application.WorksheetFunction.Index(vSrc, I, 0))

    'create an object for each Quad, including each member, and the count
    For J = 1 To UBound(V, 1)
    Set cQ = New cQuad
        With cQ
            .Q1 = V(J, 1)
            .Q2 = V(J, 2)
            .Q3 = V(J, 3)
            .Q4 = V(J, 4)
            .Cnt = 1
            sKey = Join(.Arr, Chr(1))

            'Add one to the count if Quad already exists
            If Not dQ.Exists(sKey) Then
                dQ.Add sKey, cQ
            Else
                dQ(sKey).Cnt = dQ(sKey).Cnt + 1
            End If

        End With
    Next J
Next I

'Output the results
'set a threshold
Const TH As Long = 1

'Size the output array
I = 0
For Each V In dQ.Keys
    If dQ(V).Cnt >= TH Then I = I + 1
Next V
ReDim vRes(0 To I, 1 To 5)

'Headers
vRes(0, 1) = "Value 1"
vRes(0, 2) = "Value 2"
vRes(0, 3) = "Value 3"
vRes(0, 4) = "Value 4"
vRes(0, 5) = "Count"

'Output the data
I = 0
For Each V In dQ.Keys
    Set cQ = dQ(V)
    With cQ
        If .Cnt >= TH Then
            I = I + 1
            vRes(I, 1) = .Q1
            vRes(I, 2) = .Q2
            vRes(I, 3) = .Q3
            vRes(I, 4) = .Q4
            vRes(I, 5) = .Cnt
        End If
    End With
Next V

'Output the data
Set rRes = rRes.Resize(UBound(vRes, 1) + 1, UBound(vRes, 2))
With rRes
    .EntireColumn.Clear
    .Value = vRes
    With .Rows(1)
        .Font.Bold = True
        .HorizontalAlignment = xlCenter
    End With
    .EntireColumn.AutoFit
    .Sort key1:=.Columns(.Columns.Count), _
        order1:=xlDescending, Header:=xlYes, MatchCase:=False
End With
End Sub

Function Combos(Vals)
    Dim I As Long, J As Long, K As Long, L As Long, M As Long
    Dim V

ReDim V(1 To WorksheetFunction.Combin(UBound(Vals), 4), 1 To 4)
M = 0
For I = 1 To UBound(Vals) - 3
    For J = I + 1 To UBound(Vals) - 2
        For K = J + 1 To UBound(Vals) - 1
            For L = K + 1 To UBound(Vals)
                M = M + 1
                V(M, 1) = Vals(I)
                V(M, 2) = Vals(J)
                V(M, 3) = Vals(K)
                V(M, 4) = Vals(L)
            Next L
        Next K
    Next J
Next I

Combos = V

End Function

相关内容