I am using the below code to:
Delete the similar rows, keeping only one and combine cells values in the range "N", separated by vbLf
it works ,but with big range (e.g. 30 thousands rows) the macro takes a very long time to finish.
After debugging the code, I find out that using union causes macro to takes a very long time to finish.
Set rngDel = Union(rngDel, ws.Range("A" & i + m))
So with the below code , How to adapt a faster method to delete that range of rows other that using union?
In advance, grateful for any helpful comments and answers.
Sub DeleteSimilarRows_combine_Last_Column_N()
Dim LastRow As Long, ws As Worksheet, arrWork, rngDel As Range, i As Long, j As Long, k As Long
Dim strVal As String, m As Long
Set ws = ActiveSheet: LastRow = ws.Cells(ws.Rows.Count, 1).End(xlUp).Row
arrWork = ws.Range("A1:A" & LastRow).Value2 'Place the range in an array to make iteration faster
Application.DisplayAlerts = False: Application.ScreenUpdating = False
For i = 2 To UBound(arrWork) - 1 'Iterate between the array elements:
If arrWork(i, 1) = arrWork(i + 1, 1) Then
'Determine how many consecutive similar rows exist:______
For k = 1 To LastRow
If i + k + 1 >= UBound(arrWork) Then Exit For
If arrWork(i, 1) <> arrWork(i + k + 1, 1) Then Exit For
Next k '__
For j = 14 To 14 'Build the concatenated string of cells in range "N":
strVal = ws.Cells(i, j).Value
For m = 1 To k
strVal = strVal & vbLf & ws.Cells(i + m, j).Value
Next m
ws.Cells(i, j).Value = strVal: strVal = ""
Next j
For m = 1 To k 'Place the cells for rows to be deleted in a Union range, to delete at the end, at once
If rngDel Is Nothing Then
Set rngDel = ws.Range("A" & i + m)
Else
Set rngDel = Union(rngDel, ws.Range("A" & i + m)) 'This line causes macro takes very long time to finish.
End If
Next m
i = i + k: If i >= UBound(arrWork) - 1 Then Exit For 'Increment the i variable and exiting if the resulted value exits the array size
End If
Next i
If Not rngDel Is Nothing Then rngDel.EntireRow.Delete 'Delete the not necessary rows
Application.DisplayAlerts = True: Application.ScreenUpdating = True
End Sub
Union gets progressively slower as you add more cells/areas to the range (see numbers here: https://stackoverflow.com/a/56573408/478884). If you were working "bottom up" you could delete rngDel every (eg) 500 rows, but you can't take that approach since you're working top-down.
Here's a different approach - adding cells to a Collection and then processing the collection "bottom-up" at the end, using a batch-delete process.
Sub TestRowDeletion()
Dim rngRows As Range, data, rngDel As Range, i As Long
Dim t, nRows As Long, colCells As New Collection
Set rngRows = Range("A1:A10000") '10k rows for testing
'Approach #1 - your existing method
DummyData rngRows 'populate some dummy data
data = rngRows.Value
t = Timer
For i = 1 To UBound(data, 1)
'removing ~25% of cells...
If data(i, 1) > 0.75 Then BuildRange rngDel, rngRows.Cells(i)
Next i
If Not rngDel Is Nothing Then rngDel.EntireRow.Delete
Debug.Print "Regular single delete", Timer - t
'Approach #2 - batch-deleting rows
DummyData rngRows 'reset data
data = rngRows.Value
t = Timer
For i = 1 To UBound(data, 1)
If data(i, 1) > 0.75 Then colCells.Add rngRows.Cells(i)
Next i
RemoveRows colCells
Debug.Print "Batch-deleted", Timer - t
'Approach #3 - array of "delete" flags plus SpecialCells()
DummyData rngRows 'reset data
data = rngRows.Value
t = Timer
ReDim flags(1 To UBound(data, 1), 1 To UBound(data, 2))
For i = 1 To UBound(data, 1)
If data(i, 1) > 0.75 Then
flags(i, 1) = "x"
bDelete = True 'flag we have rows to delete
End If
Next i
If bDelete Then
With rngRows.Offset(0, 10) 'use an empty column....
.Value = flags 'populate with flags for deletion
.SpecialCells(xlCellTypeConstants).EntireRow.Delete
End With
End If
Debug.Print "Specialcells", Timer - t
End Sub
'Delete the row for any cell in `col`
' cells were added to `col` in a "top down" order
Sub RemoveRows(col As Collection)
Dim rngDel As Range, n As Long
For n = col.Count To 1 Step -1 'working from the bottom up...
BuildRange rngDel, col(n)
If n Mod 250 = 0 Then
rngDel.EntireRow.Delete
Set rngDel = Nothing
End If
Next n
If Not rngDel Is Nothing Then rngDel.EntireRow.Delete
End Sub
Sub DummyData(rng As Range)
With rng
.Formula = "=RAND()"
.Value = .Value
End With
End Sub
Sub BuildRange(ByRef rngTot As Range, rngAdd As Range)
If rngTot Is Nothing Then
Set rngTot = rngAdd
Else
Set rngTot = Application.Union(rngTot, rngAdd)
End If
End Sub
Times (sec) - note how differently the single-delete and batch-delete approaches scale as more rows are added.
# of rows deleted ~2.5k/10k ~5k/20k ~7.5k/30k
------------------------------------------------------------
1. Regular single delete 10.01 65.9 226
2. Batch-deleted 2.2 4.7 7.8
3. SpecialCells 1.6 3.1 4.7
You could also consider populating a "delete" flag in your dataset, then using the autofilter/delete visible rows approach (EDIT: added as method #3)
Posting this as a working (but faster) version of your actual use case, since my other answer is really just about timing the different approaches.
Sub DeleteSimilarRowsCombineColumnN()
Const SEP As String = ","
Dim arrKeys, arrVals, arrFlags, rngRows As Range, rngVals As Range, i As Long, key, currKey, s As String
Dim ws As Worksheet, ub As Long, t, n As Long
t = Timer
Set ws = ActiveSheet
Set ws = ActiveSheet
Set rngRows = ws.Range("A1:A" & ws.Cells(Rows.Count, 1).End(xlUp).Row)
Set rngVals = rngRows.EntireRow.Columns("N")
arrKeys = rngRows.Value
ub = UBound(arrKeys, 1)
arrVals = rngVals.Value
ReDim arrFlags(1 To UBound(arrKeys, 1), 1 To 1)
currKey = Chr(0) 'non-existing key...
For i = ub To 1 Step -1 'looping from bottom up
key = arrKeys(i, 1) 'this row's key
If key <> currKey Then 'different key from row below?
If i < ub Then arrVals(i + 1, 1) = s 'populate the collected info for any previous key
s = arrVals(i, 1) 'collect this row's "N" value
currKey = key 'set as current key
Else
If i < ub Then
arrFlags(i + 1, 1) = "x" 'flag for deletion
n = n + 1
End If
s = arrVals(i, 1) & SEP & s 'concatenate the "N" value
End If
Next i
arrVals(1, 1) = s 'populate the last (first) row...
rngVals.Value = arrVals 'drop the concatenated values
If n > 0 Then 'any rows to delete?
Debug.Print "About to delete " & n & " of " & ub & " rows", Timer - t
With rngRows.Offset(0, 100) 'use any empty column
.Value = arrFlags
.SpecialCells(xlCellTypeConstants).EntireRow.Delete
End With
Debug.Print "Done deleting in " & Round(Timer - t, 2) & " sec"
End If
End Sub
Related
I am currently reading a range into an array to perform a few calculations before outputting into another worksheet. My reason for using the array is speed as I am often dealing with thousands of rows.
I have one particular calculation that I am struggling with for some reason.
This is the part I am struggling with (rest of sample of this code is further down):
For i = non_rev_rows To 2 Step -1.
**' Remove Blank Rows from array
If data_range(i, 2) = "No WBS/CC" Then
If Application.WorksheetFunction.CountA(Range("C" & i & ":M" & i)) = 0 Then
Rows(i).Delete
End If
So basically when a row in column 2 is equal to "No WBS/CC" then I need to run a CountA or any other method you can recommend to calcuate the total value of columns C to M on that row. I am essentially looking for any row that = "No WBS/CC" and where columns C:M have no value. If so, then delete the entire row. If there is a value in columns C:M then I would not wish to delete the row.
'Row Count
With Sheets("array")
non_rev_rows = .Range("E" & .Rows.Count).End(xlUp).Row
End With
' Remove Blank Rows from array
' Replace "NO WBS/CC" with Co Code Over-Ride if supplied
' Set Debit / Credit
' Round to 2 decimal places
Set data = array_sheet.Range("A1:M" & non_rev_rows)
data_range = data.Value
For i = non_rev_rows To 2 Step -1.
**' Remove Blank Rows from array
If data_range(i, 2) = "No WBS/CC" Then
If Application.WorksheetFunction.CountA(Range("C" & i & ":M" & i)) = 0 Then
Rows(i).Delete
End If
' Replace "NO WBS/CC" with Co Code Over-Ride if supplied
If data_range(i, 13) <> 0 Then
data_range(i, 2) = data_range(i, 13)
End If
End If**
' Set Debit / Credit
data_range(i, 3) = Replace(data_range(i, 3), "Debit", 41)
data_range(i, 3) = Replace(data_range(i, 3), "Credit", 51)
' Round to 2 decimal places
data_range(i, 5) = WorksheetFunction.Round(data_range(i, 5), 2)
' If data_range(i, 3) = "Debit" Then
' data_range(i, 3).Value = 41
' ElseIf data_range(i, 3) = "Credit" Then
' data_range(i, 3).Value = 51
' End If
'data_range(i, 5).Value = Application.WorksheetFunction.Round(Range(data_range(i, 5)).Value, 2)
'Range("E" & i).Value = Application.WorksheetFunction.Round(Range("E" & i).Value, 2)
Next i
**' Remove Blank Rows from array
If data_range(i, 2) = "No WBS/CC" Then
If Application.WorksheetFunction.CountA(Range("C" & i & ":M" & i)) = 0 Then
Rows(i).Delete
End If
This code does not result in an error but it also does not have the desired impact. I have several rows in my test data that contain "No WBS/CC" in column 2 and zero values in columns C:M but the code is not deleting those rows.
If you want learning/understanding how an array row can be deleted (adapted for your case), please test the next way. It will return the array without deleted rows starting from "O2" of the same sheet, so the range after M:M column must be empty. You can easily adapt last code line to return wherever you need (in other sheet, other workbook...):
Sub DeleteArrayRows()
Dim array_sheet As Worksheet, non_rev_rows As Long, Data As Range, count2 As Long, data_range, arrRow, i As Long
Set array_sheet = ActiveSheet 'worksheets("array")
non_rev_rows = array_sheet.Range("E" & array_sheet.rows.count).End(xlUp).row
Set Data = array_sheet.Range("A1:M" & non_rev_rows)
data_range = Data.Value
For i = 1 To UBound(data_range)
count2 = 0
If data_range(i, 2) = "No WBS/CC" Then
With Application
arrRow = .Transpose(.Transpose(.Index(data_range, i, 0))) 'extract a slice of the row array
End With
Debug.Print Join(arrRow, ""): Stop 'just to see the joinned respecitve slice In Immediate Window
'comment it after seeing what it represents and press F5
If data_range(i, 1) <> "" Then count2 = Len(data_range(i, 1))
If Len(Join(arrRow, "")) - count2 = Len(data_range(i, 2)) Then
data_range = DeleteArrayRow_(data_range, i): i = i - 1
End If
End If
If i = UBound(data_range) Then Exit For
Next i
'drop the array (without deleted rows) in a range:
array_sheet.Range("O1").Resize(UBound(data_range), UBound(data_range, 2)).Value = data_range
End Sub
Private Function DeleteArrayRow_(arr As Variant, RowToDelete As Long) As Variant 'interesting...
'It does not work to eliminate the first array row...
Dim Rws As Long, cols As String
Rws = UBound(arr) - LBound(arr)
cols = "A:" & Split(Columns(UBound(arr, 2) - LBound(arr, 2) + 1).address(, 0), ":")(0)
DeleteArrayRow_ = Application.Index(arr, Application.Transpose(Split(Join(Application.Transpose(Evaluate("Row(1:" & _
(RowToDelete - 1) & ")"))) & " " & Join(Application.Transpose(Evaluate("Row(" & _
(RowToDelete + 1) & ":" & UBound(arr) & ")"))))), Evaluate("COLUMN(" & cols & ")"))
End Function
It is not extremely fast, I tried showing it only for didactic purpose. To see that it is and how it is possible...
Note: I did not pay attention to all at the code lines after deletion. It can be easily adapted to include that part...
You can do both tests on the array rather than partially in array and partially in the worksheet.
Only delete the row in the worksheet when you find a full match.
Public Sub Test2()
Dim data_range As Variant
Dim lRows As Long
Dim lColumns As Long
Dim lCounter As Long
data_range = Sheet1.Range("A1:M6")
' Add the data to an array
For lRows = UBound(data_range) To LBound(data_range) Step -1
'Step through the array in reverse
If data_range(lRows, 2) = "No WBS/CC" Then
'Check for the "No WBS/CC" value in the second column of the array
lCounter = 0
'Reset the counter
For lColumns = 3 To 13
If Not IsEmpty(data_range(lRows, lColumns)) Then
lCounter = lCounter + 1
End If
Next lColumns
'Check columns in the array row to see if they have data
'Add to the counter for each cell having value
If lCounter = 0 Then
Sheet1.Rows(lRows).EntireRow.Delete
End If
'If the counter is zero delete the current row in the Workbook
End If
Next lRows
End Sub
Sample data before the macro is run. The row we expected to be removed highlighted in green.
Sample data after the macro is run. The expected row has been removed.
An alternate option is to write the valid rows to a new array.
Clear the data on the worksheet, then write the new array to the worksheet.
Remove Rows
Sub DoStuff()
Dim wb As Workbook: Set wb = ThisWorkbook
Dim ws As Worksheet: Set ws = wb.Worksheets("Array")
Dim LastRow As Long: LastRow = ws.Cells(ws.Rows.Count, "E").End(xlUp).Row
Dim rg As Range: Set rg = ws.Range("A2", ws.Cells(LastRow, "M"))
Dim rCount As Long: rCount = rg.Rows.Count
Dim cCount As Long: cCount = rg.Columns.Count
Dim Data() As Variant: Data = rg.Value
Dim sr As Long
Dim dr As Long
Dim c As Long
For sr = 1 To rCount
If Not IsRowBlank(Data, sr, 3, 13) Then ' is not blank
' Replace "NO WBS/CC" with Co Code Over-Ride if supplied
If CStr(Data(sr, 1)) = "No WBS/CC" Then
If Data(sr, 13) <> 0 Then
Data(sr, 2) = Data(sr, 13)
End If
End If
' Set Debit / Credit
Data(sr, 3) = Replace(Data(sr, 3), "Debit", 41)
Data(sr, 3) = Replace(Data(sr, 3), "Credit", 51)
' Round to 2 decimal places
Data(sr, 5) = Application.Round(Data(sr, 5), 2)
' Copy source row to destination row.
dr = dr + 1
For c = 1 To cCount
Data(dr, c) = Data(sr, c)
Next c
'Else ' is blank; do nothing
End If
Next sr
' Clear bottom source data.
If dr < rCount Then
For sr = dr + 1 To rCount
For c = 1 To cCount
Data(sr, c) = Empty
Next c
Next sr
End If
rg.Value = dData
End Sub
Function IsRowBlank( _
Data() As Variant, _
ByVal DataRow As Long, _
ByVal StartColumn As Long, _
ByVal EndColumn As Long) _
As Boolean
Dim c As Long
For c = StartColumn To EndColumn
If Len(CStr(Data(DataRow, c))) > 0 Then Exit For
Next c
IsRowBlank = c > EndColumn
End Function
I have a code that matches a cell value in Column C on Sheet1 to a pivot table on Sheet3 and then copies certain columns over.
Code will check how many entries there are on Sheet1 that need to be checked
Loop 2: For every value in Column C/Sheet1 with a match in Column A on Sheet 2 it will then copy over the corresponding data from Column B,C,D,E.
Since there are multiple matches possible by value/Sheet I am limiting the data pull to three matches (three loops in the code). To achieve that I am increasing i +1 or i+2 to get the next row in the pivot table.
The table on Sheet 2 is sometimes 10,000+ rows and excel crashes.
Does anyone have an idea how to speed up the loop codes (Loop2,3,4 are the same) to make it less work intensive e.g. array possibly? They are causing the lock up since I think the code keeps running up and down column A.
Set sheet3 = Sheets("OrbitPivotTable")
CellChanged = Sheet1.Range("A1").Value + 1
LastRow = sheet3.Cells(Rows.Count, "A").End(xlUp).Row
LastData = Sheet1.Cells(Rows.Count, "C").End(xlUp).Row
'Loop1
For i = 1 To LastRow
If Sheet1.Range("C" & CellChanged).Value = "" Then GoTo Nextstep2
If Sheet1.Range("C" & CellChanged).Value = sheet3.Range("A" & i) Then
Sheet1.Range("H" & CellChanged).Value = sheet3.Range("B" & i).Value 'Customer
Sheet1.Range("I" & CellChanged).Value = sheet3.Range("C" & i).Value 'Rate Val start
Sheet1.Range("J" & CellChanged).Value = sheet3.Range("D" & i).Value 'ATA All in
Sheet1.Range("K" & CellChanged).Value = sheet3.Range("E" & i).Value 'Special Remarks
Found = True
End If
If Found = True Or i = LastRow Then
If CellChanged = LastData Then
Exit For
End If
If Found = True Then
Found = False
Nextstep2:
CellChanged = CellChanged + 1
End If
i = 0
End If
Next i
'Loop2
etc....
Excel File
I might have misunderstood the process in the file you shared, but this should be faster (and much less code overall).
I put the pivot table lookup in a loop, switched to Match(), and reduced the number of read/writes using arrays where possible.
EDITED to fix an embarrassing bug where I forgot to adjust the Match() result m to account for the starting row of the range I run match() against...
Sub HB_IPT_Rate_Check()
Dim wsReport As Worksheet, wsCPK As Worksheet, wsOrbitPivot As Worksheet
Dim c As Range, rwReport As Range, lastPivotRow As Long
Dim ata, m, numMatches As Long, matchFrom As Long, matchRow As Long
Set wsReport = ThisWorkbook.Worksheets("Comparison Report")
Set wsCPK = ThisWorkbook.Worksheets("CPK")
Set wsOrbitPivot = ThisWorkbook.Worksheets("OrbitPivotTable")
'loop over the rows in the report sheet
For Each c In wsReport.Range("C3", wsReport.Cells(Rows.Count, "C").End(xlUp)).Cells
ata = c.Value 'read this once....
Set rwReport = c.EntireRow
'1st Database Match "CPK"
m = Application.Match(ata, wsCPK.Columns("A"), 0)
If Not IsError(m) Then
With wsCPK.Rows(m)
rwReport.Columns("D").Resize(1, 4).Value = _
Array(.Columns("B").Value, .Columns("C").Value, _
.Columns("F").Value, .Columns("H").Value)
'Sum of HB CWGT (KG),Sum of MB CWGT (KG),Achiev CPK,Density
End With
Else
'no match...
End If
'2nd Database Match "Orbit"
lastPivotRow = wsOrbitPivot.Cells(Rows.Count, "A").End(xlUp).Row
numMatches = 0 'reset match count
matchFrom = 2
m = Application.Match(ata, wsOrbitPivot.Range("A" & matchFrom & ":A" & lastPivotRow), 0)
'keep going while we still have a match and we've not reached the max result count
Do While Not IsError(m) And numMatches < 3
numMatches = numMatches + 1
matchRow = matchFrom + (m - 1) 'adjust the matched row index according to where we started looking...
'sanity check
Debug.Print "Matched " & ata & " on row " & matchRow
rwReport.Columns("H").Offset(0, (numMatches - 1) * 4).Resize(1, 4).Value = _
wsOrbitPivot.Cells(matchRow, "B").Resize(1, 4).Value
'find the next match if any, starting below the last match
matchFrom = matchRow + 1
m = Application.Match(ata, wsOrbitPivot.Range("A" & matchFrom & ":A" & lastPivotRow), 0)
Loop
Next c 'next report row
End Sub
Use Dictionary to set row and column number.
Data is assigned to fit rows and columns in a virtual array.
Sub test()
Dim Ws(1 To 4) As Worksheet
Dim DicR As Object ' Dictionary
Dim DicC As Object ' Dictionary
Dim vDB, arr()
Dim s As String
Dim i As Long, n As Long, j As Integer
Dim r As Long, c As Integer
Set Ws(1) = Sheets("Comparison Report")
Set Ws(2) = Sheets("CPK")
Set Ws(3) = Sheets("OrbitPivotTable")
Set Ws(4) = Sheets("Orbit")
'Row index dictionary
Set DicR = CreateObject("Scripting.Dictionary") 'New Scripting.Dictionary
'Column index dictionary
Set DicC = CreateObject("Scripting.Dictionary") ' New Scripting.Dictionary
vDB = Ws(1).UsedRange
For i = 3 To UBound(vDB, 1)
s = vDB(i, 3)
If s <> "" Then
If DicR.Exists(s) Then
'DicC(s) = DicC(s) + 1
Else
n = n + 1
DicR.Add s, n 'row index
DicC.Add s, 0 'column index
End If
End If
Next i
'Create an array of virtual tables based on the number of dictionaries.
'Since the number of columns cannot be predicted, a specific number of 1000 was entered.
'in my test, number 100 is too small
ReDim arr(1 To DicR.Count, 1 To 1000)
For j = 2 To 4
vDB = Ws(j).Range("a1").CurrentRegion
For i = 2 To UBound(vDB, 1)
s = vDB(i, 1)
If DicR.Exists(s) Then
r = DicR(s)
c = DicC(s) * 4 + 1
DicC(s) = DicC(s) + 1
arr(r, c) = vDB(i, 2)
arr(r, c + 1) = vDB(i, 3)
arr(r, c + 2) = vDB(i, 4)
arr(r, c + 3) = vDB(i, 5)
End If
Next i
Next j
With Ws(1)
.Range("d3").Resize(UBound(arr, 1), UBound(arr, 2)) = arr
End With
End Sub
Result image
I use the code hereunder to calculate max values as described in this post (vba max value of group of values). The code works great but once I have more than 65k lines I get a data type mismatch when trying to pase the array:
sht.Range(Cells(1, lColumn), Cells(last.Row, lColumn)).Value = Application.Index(groupsArray, , lColumn)
Could somebody help me to slice the array in chunks. I have tried to get it working myself but without any luck.
Sub FillGroupsMax()
Dim lColumn As Long
Dim sht As Worksheet
Dim groupsArray As Variant 'array with all group infomation
Dim groupsSeen As Variant 'array with group infomation already seen
Application.ScreenUpdating = False 'stop screen updating makes vba perform better
Set sht = ThisWorkbook.Worksheets("import")
Set last = sht.Range("A:A").Find("*", Cells(1, 1), searchdirection:=xlPrevious) 'last cell with value in column A
lColumn = sht.Cells(1, Columns.Count).End(xlToLeft).Column
groupsArray = sht.Range(Cells(1, 1), Cells(last.Row, lColumn))
'collect all the information on the Sheet into an array
'Improves performance by not visiting the sheet
For dRow = 2 To last.Row 'for each of the rows skipping header
'check if group as already been seen
If inArrayValue(Cells(dRow, 1).Value, groupsSeen) > 0 Then
'if it has been seen/calculated attribute value
'Cells(dRow, 4).Value = inArrayValue(Cells(dRow, 1).Value, groupsSeen)
groupsArray(dRow, lColumn) = inArrayValue(Cells(dRow, 1).Value, groupsSeen)
Else
'if it hasn't been seen then find max
'Cells(dRow, 4).Value = getMax(Cells(dRow, 1).Value, groupsArray)
groupsArray(dRow, lColumn) = getMax(Cells(dRow, 1).Value, groupsArray, lColumn)
'array construction from empty
If IsEmpty(groupsSeen) Then
ReDim groupsSeen(0)
'groupsSeen(0) = Array(Cells(dRow, 1).Value, Cells(dRow, 4).Value)
groupsSeen(0) = Array(groupsArray(dRow, 1), groupsArray(dRow, lColumn))
'attribute value to array
Else
ReDim Preserve groupsSeen(0 To UBound(groupsSeen) + 1)
groupsSeen(UBound(groupsSeen)) = Array(groupsArray(dRow, 1), groupsArray(dRow, lColumn))
End If
End If
Next
sht.Range(Cells(1, lColumn), Cells(last.Row, lColumn)).Value = Application.Index(groupsArray, , lColumn)
'reactivate Screen updating
Application.ScreenUpdating = True
End Sub
Function getMax(group As String, groupsArray As Variant, lColumn As Long) As Double
'for each in array
For n = 1 To UBound(groupsArray)
'if its the same group the Max we seen so far the record
If groupsArray(n, 1) = group And groupsArray(n, lColumn - 1) > maxSoFar Then
maxSoFar = groupsArray(n, lColumn - 1)
End If
Next
'set function value
getMax = maxSoFar
End Function
Function inArrayValue(group As String, groupsSeen As Variant) As Double
'set function value
inArrayValue = 0
'if array is empty then exit
If IsEmpty(groupsSeen) Then Exit Function
'for each in array
For n = 0 To UBound(groupsSeen)
'if we find the group
If groupsSeen(n)(0) = group Then
'set function value to the Max value already seen
inArrayValue = groupsSeen(n)(1)
'exit function earlier
Exit Function
End If
Next
End Function
You can write a helper function to use instead of Application.Index
Bonus - it will be much faster than using Index (>5x)
Sub Tester()
Dim arr, arrCol
arr = Range("A2:J80000").Value
arrCol = GetColumn(arr, 5) '<< get the fifth column
Range("L2").Resize(UBound(arrCol, 1), 1).Value = arrCol
End Sub
'extract a single column from a 1-based 2-D array
Function GetColumn(arr, colNumber)
Dim arrRet, i As Long
ReDim arrRet(1 To UBound(arr, 1), 1 To 1)
For i = 1 To UBound(arr, 1)
arrRet(i, 1) = arr(i, colNumber)
Next i
GetColumn = arrRet
End Function
EDIT - since QHarr asked about timing here's a basic example
Sub Tester()
Dim arr, arrCol, t, i as long
arr = Range("A2:J80000").Value
t = Timer
For i = 1 to 100
arrCol = GetColumn(arr, 5) '<< get the fifth column
Next i
Debug.print Timer - t '<<# of seconds for execution
End Sub
Below, whilst not as tidy as could be, is a way to process an array in chunks and Index to access a column and write out to the sheet.
I populated two columns (A:B) with data. Both had 132,000 rows, populated incrementally, with values from 1 to 132,000 in each column for my test run.
You can fiddle with cutOff to get the chunk size just below the point where the fail happens.
The code below is simply to demonstrate the principle of looping in batches, upto the set cutoff in each batch, until all rows have been processed.
Option Explicit
Public Sub WriteArrayToSheet()
Dim wb As Workbook
Dim ws As Worksheet
Set wb = ThisWorkbook
Set sht = wb.Worksheets("Sheet1") 'change as appropriate
Dim myArr() 'dynamic array
myArr = sht.Range("A1").CurrentRegion.Value 'you may want a more robust method
Dim cutOff As Long 'the max value - what ever it is before error occurs
cutOff = 1000
Dim totalRows As Long 'total rows in array read in from sheet
totalRows = UBound(myArr, 1)
Dim totalArraysNeeded As Long
'Determine how many lots of cutOff chunks there are in the total number of array rows
totalArraysNeeded = Application.WorksheetFunction.Ceiling(totalRows / cutOff, 1)
Dim rotations As Long 'number of times to loop original array to handle all rows
Dim rowCountTotal As Long
Dim rowCount As Long
Dim tempArr() 'this will hold the chunk of the original array
Dim rowCounter As Long
Dim lastRow As Long
Dim nextRow As Long
Dim i As Long
Dim j As Long
Dim numRows As Long
rotations = 1
Do While rotations < totalArraysNeeded
If rotations < totalArraysNeeded - 1 Then
ReDim tempArr(1 To cutOff, 1 To UBound(myArr, 2)) 'size chunk array
numRows = cutOff
Else
numRows = totalRows - rowCountTotal
ReDim tempArr(1 To numRows, 1 To UBound(myArr, 2)) 'size chunk array
End If
For i = 1 To numRows
rowCount = 1 'rows in this chunk looped
rowCountTotal = rowCountTotal + 1 'rows in original array looped
For j = LBound(myArr, 2) To UBound(myArr, 2)
tempArr(i, j) = myArr(rowCountTotal, j)
Next j
rowCount = rowCount + 1
Next i
With sht
lastRow = .Cells(.Rows.Count, "E").End(xlUp).Row 'Column where I am writing the sliced column out to
End With
If lastRow = 1 Then
nextRow = 1
Else
nextRow = lastRow + 1
End If
sht.Range("E" & nextRow).Resize(UBound(tempArr, 1), 1) = Application.Index(tempArr, , 1) 'write out to sheet
rotations = rotations + 1
Loop
End Sub
As #Tim suggested, the best way to slice a large array is use a loop to copy the column.
Though in your case, most of the processing time is spent on computing the maximum since your code is using a nested loop.
If you want to reduce significantly the processing time, then use a dictionary:
Sub Usage
GetMaxByGroupTo _
sourceGroups := ThisWorkbook.Range("Sheet1!A2:A100"), _
sourceValues := ThisWorkbook.Range("Sheet1!B2:B100"), _
target := ThisWorkbook.Range("Sheet1!C2")
End Sub
Sub GetMaxByGroupTo(sourceGroups As Range, sourceValues As Range, target As Range)
Dim dict As Object, groups(), values(), r As Long, max
Set dict = CreateObject("Scripting.Dictionary")
groups = sourceGroups.Value2
values = sourceValues.Value2
' store the maximum value of each group in a dictionary for an efficient lookup '
For r = Lbound(groups) to Ubound(groups)
max = dict(groups(r, 1))
If VarType(max) And values(r, 1) <= max Then Else dict(groups(r, 1)) = values(r, 1)
Next
' build and copy the result array to the sheet '
For r = Lbound(groups) to Ubound(groups)
values(r, 1) = dict(groups(r, 1))
Next
target.Resize(Ubound(groups), 1).Value2 = values
End Sub
I have a giant dataset that looks like this
I am trying to go down the list of different companies and grab 3 per company and combine them. Based on the photo above, I would have 2 different lists with 3 companies each (except TH Repair which will have 2 in the final list).
My real dataset contains hundreds of different companies, each with dozens/hundreds of entries so I would finish with dozens of lists (each potentially hundreds long).
I tried to record a macro and ended up with this code
Sub Loop1()
'
' Loop1 Macro
'
'
Range("A4:E6").Select
Selection.Copy
Sheets("Sheet3").Select
Range("A18").Select
ActiveSheet.Paste
Sheets("Sheet2").Select
Range("A11:E13").Select
Application.CutCopyMode = False
Selection.Copy
Sheets("Sheet3").Select
Range("A21").Select
ActiveSheet.Paste
Sheets("Sheet2").Select
Range("A17:E19").Select
Application.CutCopyMode = False
Selection.Copy
Sheets("Sheet3").Select
Range("A24").Select
ActiveSheet.Paste
End Sub
However, this turned out to be WAY more complicated then I expected.
I am looking for the end result to look like this
See if something like this works for you. I only ran one scenario through it so you will want to test it more.
This makes the assumption that the data is sorted by column B on the original sheet
This procedure makes an assumption that there is either headers or no data on row 1.
You will need to change the "Sheet1" in this line Set ws1 = ActiveWorkbook.Worksheets("Sheet1") to the name of the sheet you are starting with.
Option Explicit
Public Sub MoveData()
Dim ws1 As Worksheet
Set ws1 = ActiveWorkbook.Worksheets("Sheet1")
Dim ws2 As Worksheet
Set ws2 = ActiveWorkbook.Worksheets.Add()
Dim rw As Long
Dim match_count As Integer
Dim list_multiplier As Integer
list_multiplier = 7
Dim list_row() As Long
ReDim list_row(0)
list_row(0) = 2
For rw = 2 To ws1.Range("A" & ws1.Rows.Count).End(xlUp).Row
If ws1.Range("B" & rw).Value <> ws1.Range("B" & rw).Offset(-1, 0).Value Then
match_count = 0
Else
match_count = match_count + 1
End If
Dim list_num As Integer
list_num = match_count \ 3
If list_num > UBound(list_row, 1) Then
ReDim Preserve list_row(list_num)
list_row(list_num) = 2
End If
ws2.Cells(list_row(list_num), 1 + list_multiplier * list_num).Value = ws1.Range("A" & rw).Value
ws2.Cells(list_row(list_num), 2 + list_multiplier * list_num).Value = ws1.Range("B" & rw).Value
ws2.Cells(list_row(list_num), 3 + list_multiplier * list_num).Value = ws1.Range("C" & rw).Value
ws2.Cells(list_row(list_num), 4 + list_multiplier * list_num).Value = ws1.Range("D" & rw).Value
ws2.Cells(list_row(list_num), 5 + list_multiplier * list_num).Value = ws1.Range("E" & rw).Value
list_row(list_num) = list_row(list_num) + 1
Next rw
End Sub
When you record your macro, ensure that "Use Relative References" on the Developer Ribbon tab is enabled, :)
assuming row 3 has your data headers, you could try this:
Option Explicit
Sub main()
Dim nLists As Long, iList As Long
Dim data As Variant
Dim dataToDelete As Range
With Range("F3", Cells(Rows.Count, 1).End(xlUp))
data = .Value
nLists = WorksheetFunction.Max(.Resize(,1))
nLists = nLists \ 3 + IIf(nLists - 3 * (nLists \ 3) = 0, -1, 0)
End With
With Range("A3").Resize(, 6)
For iList = 0 To nLists
Set dataToDelete = Nothing
With .Offset(, iList * 6).Resize(UBound(data))
.Value = data
.AutoFilter Field:=1, Criteria1:="<=" & iList * 3, Criteria2:=">" & (iList + 1) * 3, Operator:=xlOr
If Application.WorksheetFunction.Subtotal(103, .Cells) > 1 Then Set dataToDelete = .Offset(1).Resize(.Rows.Count - 1).SpecialCells(xlCellTypeVisible)
.Parent.AutoFilterMode = False
If Not dataToDelete Is Nothing Then dataToDelete.Delete xlShiftUp
End With
Next
End With
End Sub
Your task is actually slightly trickier than your online advice suggests. Basically, you have to do the following:
Find out how many unique 'keys' (ie unique items in column B) you have. This will tell you the total number of rows you need (ie number of unique keys * 3)
Count the number of items for each 'key'. This will tell you how many columns you need (ie max item count / 3 * number of columns in array [A:E = 5])
Loop through each line of data and it put on appropriate row for that 'key'. Once three has been reached, jump the column for that key 6 columns to the right, and continue.
If you were to use a Class object and Collection type of object, this could be really quite concise code, but judging by your post you are at the beginning of your programming journey in VBA. Therefore, I've broken down each task into separate chunks of code so you will hopefully see how arrays can work for you. Once you practise with arrays a little, perhaps you could have a go at making this code more efficient by combining some of the loops:
Public Sub RunMe()
Dim data As Variant
Dim r As Long, c As Long, i As Long, dataRows As Long, dataCols As Long, keyLen As Long, maxCount As Long
Dim keys As String
Dim k As Variant
Dim keyArray() As String
Dim keyCount() As Long, threeCount() As Long, rowNum() As Long, colNum() As Long
Dim output() As Variant
'Read the data - change "Sheet1" to your sheet name.
'Shows how to write range values into a variant to
'create an array of variants.
data = ThisWorkbook.Worksheets("Sheet1").UsedRange.Value2
dataRows = UBound(data, 1)
dataCols = UBound(data, 2)
'Create a list of unique keys.
'Note: not the most efficient way, but shows how to
'create an array from a value-separated string.
For r = 1 To dataRows
If InStr(keys, CStr(data(r, 2))) = 0 Then
If Len(keys) > 0 Then keys = keys & "|"
keys = keys & CStr(data(r, 2))
End If
Next
keyArray = Split(keys, "|")
keyLen = UBound(keyArray)
'Initialise the row and column numbers for each key.
'Shows how to iterate an array using For Each loop.
ReDim rowNum(keyLen)
ReDim colNum(keyLen)
r = 1
i = 0
For Each k In keyArray
rowNum(i) = r
colNum(i) = 1
r = r + 3
i = i + 1
Next
'Count the number of items for each key.
'Shows how to iterate an array using For [index] loop.
ReDim keyCount(keyLen)
For r = 1 To dataRows
i = IndexOfKey(keyArray, CStr(data(r, 2)))
keyCount(i) = keyCount(i) + 1
If keyCount(i) > maxCount Then maxCount = keyCount(i)
Next
'Size the output array.
c = WorksheetFunction.Ceiling(maxCount / 3, 1)
ReDim output(1 To (keyLen + 1) * 3, 1 To c * dataCols + c - 1)
'Populate the output array.
ReDim threeCount(keyLen)
For r = 1 To dataRows
i = IndexOfKey(keyArray, CStr(data(r, 2)))
'Copy the columns for this row.
For c = 1 To dataCols
output(rowNum(i), colNum(i) + c - 1) = data(r, c)
Next
'Increment the count and if it's equals 3 then
'reset the row num and increase the column number.
threeCount(i) = threeCount(i) + 1
rowNum(i) = rowNum(i) + 1
If threeCount(i) = 3 Then
rowNum(i) = rowNum(i) - 3
colNum(i) = colNum(i) + dataCols + 1
threeCount(i) = 0
End If
Next
'Write the data - change "Sheet2" to your sheet name.
'Shows how to write an array to a Range.
ThisWorkbook.Worksheets("Sheet2").Range("A3") _
.Resize(UBound(output, 1), UBound(output, 2)).Value = output
End Sub
Private Function IndexOfKey(list() As String, key As String) As Long
Dim i As Long
Dim k As Variant
'Helper function to find index position of key in array.
For Each k In list
If key = k Then
IndexOfKey = i
Exit Function
End If
i = i + 1
Next
IndexOfKey = -1
End Function
Please look at my sample data and code to understand what I'm trying to do.
I need to use the value of Cells(, 3) to define a range to populate a Trialnumber(18) array. I need the array to iterate through a For loop, to count filled cells in column H for each trial and print the count to column T in the last row of each trial. I will also need the array for further data analysis in future(Unless someone can come up with a better solution).
At the moment I am experimenting with 3 modules of code, trying to get the desired solution.
Module 2 is the only one with no errors, and prints the value in the right cell, but it is printing the total filled cell count (562), rather than per trial (expected value = 1 or 2).
Module 1 is as follows:
Sub dotcountanalysis()
Dim startpoint As Long
startpoint = 1
Dim lastrow As Long
lastrow = Cells(Rows.Count, 3).End(xlUp).Row
Dim i As Long
With Worksheets("full test")
For i = 1 To 18
For n = startpoint To lastrow + 1
If Cells(n, 3).Value <> "Trial, " & CStr(i) Then
Dim nMinusOne As Long
nMinusOne = n - 1
Dim trialCount As Long
'Set Trialnumber(i-1) = Range(cells(startpoint, 3), cells(n-1, 3))
trialCount = Application.WorksheetFunction.CountA(Range("H" & CStr(startpoint) & ":" & "H" & CStr(nMinusOne)))
Range("T" & CStr(startpoint) & ":" & "T" & CStr(nMinusOne)).Value = trialCount
startpoint = n
Exit For
End If
Next n
Next i
End With
End Sub
It returns a "method _range of object _global falied" error on line: trialCount = Application.WorksheetFunction.CountA(Range("H" & CStr(startpoint) & ":" & "H" & CStr(nMinusOne)))
Module 3 is as follows:
Sub dotcountanalysis3()
Dim pressedCount As Long
Dim myCell As Range
Dim pressedRange As Range
'create trials array
Dim t(18) As Range
'set range for trialnumber (t)
Dim startpoint As Long
startpoint = 1
Dim lastrow As Long
lastrow = Cells(Rows.Count, 3).End(xlUp).Row
For i = 1 To 18
For n = startpoint To lastrow
startpoint = 7
If Cells(n, 3).Value <> "Trial, " & CStr(i) Then
Set t(i - 1) = Range(Cells(startpoint, 3), Cells(n, 3))
n = n + 1
startpoint = n
Exit For
End If
Next n
Next i
'count presses in each trial
With Worksheets("full test")
For i = 0 To 17
pressedCount = Application.WorksheetFunction.CountA _
(.Range(.Cells(t(), "H"), .Cells(.Rows.Count, "H")))
If pressedCount = 0 Then Exit Sub
'make sure there are cells or else the next line will fail
Set pressedRange = .Columns("H").SpecialCells(xlCellTypeConstants)
For Each myCell In pressedRange.Cells
'only loop through the cells containing something
.Cells(myCell.Row, "T").Value = pressedCount
Next myCell
Next i
End With
End Sub
It returns a run-time "type mismatch" error on line: pressedCount = Application.WorksheetFunction.CountA _
(.Range(.Cells(t(), "H"), .Cells(.Rows.Count, "H")))
Edit: I have updated code in mod 3 and updated error.
When counting things I like to use a dictionary object, and arrays are faster than going row by row on the sheet.
This will count unique combinations of Block+Trial: to count only by trial you would just use k = d(r, COL_TRIAL)
Dim dBT As Object 'global dictionary
Sub dotcountanalysis()
'constants for column positions
Const COL_BLOCK As Long = 1
Const COL_TRIAL As Long = 2
Const COL_ACT As Long = 7
Dim rng As Range, lastrow As Long, sht As Worksheet
Dim d, r As Long, k, resBT()
Set sht = Worksheets("full test")
lastrow = Cells(Rows.Count, 3).End(xlUp).Row
Set dBT = CreateObject("scripting.dictionary")
Set rng = sht.Range("B7:H" & lastrow)
d = rng.Value 'get the data into an array
ReDim resBT(1 To UBound(d), 1 To 1) 'resize the array which will
' be placed in ColT
'get unique combinations of Block and Trial and counts for each
For r = 1 To UBound(d, 1)
k = d(r, COL_BLOCK) & "|" & d(r, COL_TRIAL) 'create key
dBT(k) = dBT(k) + IIf(d(r, COL_ACT) <> "", 1, 0)
Next r
'populate array with appropriate counts for each row
For r = 1 To UBound(d, 1)
k = d(r, 1) & "|" & d(r, 2) 'create key
resBT(r, 1) = dBT(k) 'get the count
Next r
'place array to sheet
sht.Range("T7").Resize(UBound(resBT, 1), 1) = resBT
'show the counts in the Immediate pane (for debugging)
For Each k In dBT
Debug.Print k, dBT(k)
Next k
End Sub