Excel Log Function is Log(Base10)
VBA Log Function is Natural Log
Change this line as shown
New result =Code:K_Array(CR) = Log(Split(FileLines(CR), ",")(F)) / Log(10#)
table_aapl_test.csv 0.50476314127199400
Printable View
Excel Log Function is Log(Base10)
VBA Log Function is Natural Log
Change this line as shown
New result =Code:K_Array(CR) = Log(Split(FileLines(CR), ",")(F)) / Log(10#)
table_aapl_test.csv 0.50476314127199400
What I learned:
Filter cannot return a value by location in an empty array. It raises a "Run-time error '9': Subscript out of range"
Math functions in VBA are not the same as the same function in Excel!
The final (Oh, I so hope,) product:
Code:Option Explicit
Sub SamT_4()
Dim Filename As String
Dim NameLength As Long
Dim FileNames As Variant
Dim FileLines As Variant
Const F As Long = 5 'CSV field number counting from zero
Dim K_Array As Variant
Dim L_Array As Variant
Dim Sum_L As Double
Dim Fn As Long 'Fn = Index number for FileNames
Dim CR As Long 'CR = Column Arrays Index number. Same As Column Row number 'j
Const FolderPath As String = "C:\TestFolder\" '<<<<<<<<<< include ending \
'''' Put all the file names in the path in Array
FileNames = Filter(Split(CreateObject("wscript.shell").exec("cmd /c Dir " & _
FolderPath & "*.csv /b /s").stdout.readall, vbCrLf), ".")
'''' Open one file at a time
With CreateObject("scripting.filesystemobject")
For Fn = 0 To UBound(FileNames)
''''Put all lines from one file in Arrays
FileLines = Split(.opentextfile(FileNames(Fn)).readall, vbLf)
'Compensate for extra vbLf's in FileLines
Do While FileLines(UBound(FileLines)) = ""
ReDim Preserve FileLines(UBound(FileLines) - 1)
Loop
ReDim K_Array(UBound(FileLines))
ReDim L_Array(UBound(FileLines) + 1)
''''Log(F) into Column K
For CR = 0 To UBound(FileLines)
K_Array(CR) = Log(Split(FileLines(CR), ",")(F)) / Log(10#) '/Log(10) to match Excel Log Function
Next CR
''''Calculate Formula on Column K, put in Column L
For CR = 0 To UBound(K_Array) - 1 '-1 to compensate for column formula offsets
L_Array(CR + 1) = (100 * (K_Array(CR + 1) - K_Array(CR))) ^ 2
Next CR
''''Sum of Column L
For CR = 1 To UBound(L_Array)
Sum_L = Sum_L + L_Array(CR)
Next CR
'''' Put results in sheet
'Get FileName
NameLength = Len(FileNames(Fn)) - InStrRev(FileNames(Fn), "\")
Filename = Right(FileNames(Fn), NameLength)
'Place result
With ActiveSheet.Rows(Fn + 1)
.Columns(1) = Filename
.Columns(2) = Sum_L
End With
Next Fn 'Work on next File
End With
End Sub
Hi Sam, it is weird, this code works and the calculation in the first row (for table_aapl.csv) is correct, however to confirm the correctness of the calculation in the code I tried manually calculate next two files (table_abbv.csv and table_abt.csv) and they give me different results than the code:
table_abbv.csv 3.086788296 table_abt.csv 4.311690684
manual calculation:
table_abbv_test.xlsx 2.582025
table_abt_test.xlsx 1.224902
I send the xlsx files so you may see the equation I used.
Attachment 13670Attachment 13671
Send me the original CSV files.. Don't Zip them, just change the Extensions to "txt"
I changed the extension to txt and tried to upload it, but "Invalid file" occured, thus I upload it via czech data share website:
http://ulozto.cz/x9ifNzX8/table-aapl-txt
http://ulozto.cz/xdejuKhn/table-abbv-txt
http://ulozto.cz/xHjaJgZv/table-abt-txt
just pres "Sthánout", it means download.
If it helps you, I can share the whole dataset via ftp (cca 220 MB).
I have the files. Check back in a couple of hours. Maybe as much as 8 hours. I am busy today.
Those can not be downloaded by some. Zip your file(s) and then attach.
Here is how I found it. Left no possibilities to chance! First I performs all calculations by Sheet Formulas, then ran the code until I got a wrong value. Of course it was at the very end. A stupid omission. :banghead::banghead::banghead:Code:With Sheets("Sheet1").Rows(Fn + 1)
.Columns(1) = Filename
.Columns(2) = Sum_L
Sum_L = 0 '<<<<<<<<<<<<<<<Add this Line
End With
Code:Option Explicit
Sub SamT_4()
Dim Filename As String
Dim NameLength As Long
Dim FileNames As Variant
Dim FileLines As Variant
Const F As Long = 5 'CSV field number counting from zero
Dim F_Array() As Double
Dim K_Array() As Double
Dim L_Array() As Double
Dim Sum_L As Double
Dim FName As String
Dim X
Dim Fn As Long 'Fn = Index number for FileNames
Dim CR As Long 'CR = Column Arrays Index number. Same As Column Row number 'j
Const FolderPath As String = "C:\TestFolder\" '<<<<<<<<<< include ending \
'''' Put all the file names in the path in Array
FileNames = Filter(Split(CreateObject("wscript.shell").exec("cmd /c Dir " & _
FolderPath & "*.csv /b /s").stdout.readall, vbCrLf), ".")
'''' Open one file at a time
With CreateObject("scripting.filesystemobject")
For Fn = 0 To UBound(FileNames)
FName = Mid(FileNames(Fn), 17, Len(FileNames(Fn)) - 20)
''''Put all lines from one file in Arrays
FileLines = Split(.opentextfile(FileNames(Fn)).readall, vbLf)
'Compensate for extra vbLf's in FileLines
Do While FileLines(UBound(FileLines)) = ""
ReDim Preserve FileLines(UBound(FileLines) - 1)
Loop
ReDim F_Array(UBound(FileLines))
ReDim K_Array(UBound(FileLines))
ReDim L_Array(UBound(FileLines) + 1)
''''Value F into F_Array
For CR = 0 To UBound(FileLines)
F_Array(CR) = Split(FileLines(CR), ",")(F)
X = Sheets(FName).Cells(CR + 1, 6).Value
If CDbl(F_Array(CR)) <> X Then
MsgBox FName & " Cell F" & CR + 1 & " Error."
Exit Sub
End If
Next CR
''''Log(F) into Column K
For CR = 0 To UBound(FileLines)
K_Array(CR) = Log(F_Array(CR)) / Log(10#) '/Log(10) to match Excel Log Function
X = Sheets(FName).Cells(CR + 1, 11).Value
If CDbl(K_Array(CR)) <> X Then
MsgBox FName & " Cell K" & CR + 1 & " Error."
Exit Sub
End If
Next CR
''''Calculate Formula on Column K, put in Column L
For CR = 0 To UBound(K_Array) - 1 '-1 to compensate for column formula offsets
L_Array(CR + 1) = (100 * (K_Array(CR + 1) - K_Array(CR))) ^ 2
X = Sheets(FName).Cells(CR + 2, 12).Value
If CDbl(L_Array(CR + 1)) <> X Then
MsgBox FName & " Cell K" & CR + 1 & " Error."
Exit Sub
End If
Next CR
''''Sum of Column L
For CR = 0 To UBound(L_Array)
Sum_L = Sum_L + L_Array(CR)
X = Sheets(FName).Cells(CR + 2, 13).Value
If CDbl(Sum_L) <> X Then
MsgBox FName & " Cell M" & CR + 1 & " Error" .'<<<<<<< Found Error Here
Exit Sub
End If
Next CR
'''' Put results in sheet
'Get FileName
NameLength = Len(FileNames(Fn)) - InStrRev(FileNames(Fn), "\")
Filename = Right(FileNames(Fn), NameLength)
'Place result
'With ActiveSheet.Rows(Fn + 1)
With Sheets("Sheet1").Rows(Fn + 1)
.Columns(1) = Filename
.Columns(2) = Sum_L
Sum_L = 0 '<<<<<<<<Add this Line
End With
Next Fn 'Work on next File
End With
End Sub
Hi Sam, it works!! you are great..thank you very much!!
and finally, as I wrote at the very beginning, I need to get the result of the second equation into the column C
the second equation is similar to first one, I attach the excel file with my manual calculation and pdf file to see the equation in real (see third equation BV_t),
just notice please that the number 94 that I use in the last step of the manual calculation stays for number of rows that are used for the calculation in given csv file,
thus it will be different in every csv file (it is not a constant), I believe there is a code in VBA that generates that number
btw I am writting to Jacob about the contribution, thanks again
Attachment 13730
http://ulozto.cz/x1LUbSuS/bv-calculation-pdf
Sorry Max, that PDF might as well be in Czech for all I know.
I asked you a long time before if there was only one equation to use on the CSV files. Now you have another. Maybe there are more equations.
take this attachment and put in all equations you need just like I put in this one and the first one. Use all the columns you need. Yellow cells show the formulas and Green shows the final result formula and all the notes.
Hi Sam, pdf is in English ofc, I uploaded it via Czech sharing website as it was not possible to upload it via "Manage attachments"
I wrote already in #5 that there will be two equations and subsequently I will perform some calculation directly in final excel file between columns B and C, , no other equations :)
It is exactly as you describe it in the attachment in #92, I will need only these two equations and put results in columns B and C
Maybe only it would be helpful if in column D there was this number of the rows that enter into the second calculation (for table_aabv_test.xlsx it is 94), but that's all :))
Maxxino
I knew the PDF was English, but it was Advanced Mathematics, which I forgot 40 years ago.
I am trying humor.
BTW, you will owe me a new Tatra, Left hand drive, 2 axles and Air-conditioned, please. :D
SamT -- you really earned your money on this one:beerchug:
Maxxino,
Here ya go.
Be sure and change the FolderPath Constant.
See Wiki article: Numeric precision in Microsoft Excel
Basically, Excel is limited to an arbitrary number of significant digits. When you multiply two decimal numbers the result has as many more significant digits than the first two. Both formulas start with Logs, which are very likely to have many significant digits.
The first formula merely adds the square of the logs, so probably only gains 1 or 2 significant digits and will probably be accurate to at least 10 digits.
The second formula performs chain-multiplication, X = (((((L1*L2)*L3)*L4)*L5),,,). The result of each of those operations is rounded at an arbitrary digit location. After a hundred or so operations the cumulative error starts creeping up towards a more significant location in the result.
Contrary to the Wiki article, I did not see an increase in presision when using Variant type variables instead of Long types. YMMV
Minor observations
1. I'd put Sub Subfolder_File_Processing() in a standard module and not in the sheet module
2. If precision is important (I haven't been following this too closely), maybe a Decimal data sub-type of a Variant would work. It's a bit more work to use CDec() but might be idea
Quote:
Decimal variables are stored as 96-bit (12-byte) signed integers scaled by a variable power of 10. The power of 10 scaling factor specifies the number of digits to the right of the decimal point, and ranges from 0 to 28. With a scale of 0 (no decimal places), the largest possible value is +/-79,228,162,514,264,337,593,543,950,335. With a 28 decimal places, the largest value is +/-7.9228162514264337593543950335 and the smallest, non-zero value is +/-0.0000000000000000000000000001.
Note
At this time the Decimal data type can only be used within a Variant, that is, you cannot declare a variable to be of type Decimal. You can, however, create a Variant whose subtype is Decimal using the CDec function.
I'm not sure that this would work any better (or at all)Code:
Option Explicit
' Thanks to all @ http://www.vbaexpress.com/forum/show...bFolders/page3
Sub Subfolder_File_Processing()
Const F As Long = 5 'CSV field number counting from zero
Const FolderPath As String = "C:\TestFolder\" 'include ending \
Dim Filename As String
Dim NameLength As Long
Dim FileNames As Variant
Dim FileLines As Variant
Dim F_Array() As Variant '-----------------------
Dim Sum_L As Variant '-----------------------
Dim Q_Array() As Variant '-----------------------
Dim Sum_Q As Variant '-----------------------
Dim Pie As Double
Pie = Application.WorksheetFunction.Pi() * 1# '----------------------- why?
Dim Fn As Long 'Fn = Index number for FileNames
Dim CR As Long 'CR = FileLines Index number
Dim NumRows As Long
' Put all the file names in the path in Array
FileNames = Filter(Split(CreateObject("wscript.shell").exec("cmd /c Dir " & _
FolderPath & "*.csv /b /s").stdout.readall, vbCrLf), ".")
' Open one file at a time
With CreateObject("scripting.filesystemobject")
For Fn = 0 To UBound(FileNames)
'Put all lines from one file in Arrays
FileLines = Split(.opentextfile(FileNames(Fn)).readall, vbLf)
'Compensate for extra vbLf's in FileLines
Do While FileLines(UBound(FileLines)) = ""
ReDim Preserve FileLines(UBound(FileLines) - 1)
Loop
ReDim F_Array(UBound(FileLines))
ReDim Q_Array(UBound(FileLines))
'Initialize
NumRows = UBound(FileLines) + 1
Sum_L = CDec(0) '-----------------------
Sum_Q = CDec(0) '-----------------------
'Calcuate first result for one file
For CR = 0 To UBound(FileLines)
'Replace file line with Log of 6th value. Split(BlahBlah)(5)
F_Array(CR) = CDec(0) '-----------------------
F_Array(CR) = Log(Split(FileLines(CR), ",")(F)) / Log(10#) '-----------------------
'After the first line
If CR > 0 Then
Q_Array(CR) = CDec(0) '-----------------------
Sum_L = Sum_L + ((F_Array(CR) - F_Array(CR - 1)) * 100#) ^ 2 '-----------------------
Q_Array(CR) = Abs((F_Array(CR) - F_Array(CR - 1)) * 100#) '-----------------------
Sum_Q = Sum_Q + (Q_Array(CR) * Q_Array(CR - 1))
End If
Next CR
' Put results in sheet
'Get FileName
NameLength = Len(FileNames(Fn)) - InStrRev(FileNames(Fn), "\")
Filename = Right(FileNames(Fn), NameLength)
'Place result
With Sheets("Sheet1").Rows(Fn + 1)
.Columns(1) = Filename
.Columns(2) = Sum_L 'Column B
.Columns(3) = (Sum_Q * Pie * (94 / (94 - 1))) / 2
.Columns(4) = NumRows
End With
Next Fn 'Work on next File
End With
End Sub
Variants are 16 bytes. I ran it with all variants, only converting to Decimal when placing the results on the sheet and without even that conversion and saws no difference.
After converting all numeric variable to Variants and using these calculations the results were identical to the 10th digit with the other methods. Note that I kept the Magic number 100. It made a difference when I subtituted the Variant C = CDec(100) for the Magic number
I believe that the VBA method is that much more precise than the Excel On-Sheet method, which is limited to 15 significant places.Code:''''Calcuate results for one file
For CR = 0 To UBound(FileLines)
'Replace file line with Log of 6th value. Split(BlahBlah)(5)
F_Array(CR) = CDec(Log(Split(FileLines(CR), ",")(F)) / Log(10#)) '<<<<<<<<<<
'After the first line
If CR > 0 Then
Sum_L = Sum_L + ((F_Array(CR) - F_Array(CR - 1)) * 100) ^ 2
Q_Array(CR) = Abs((F_Array(CR) - F_Array(CR - 1)) * 100)
Sum_Q = Sum_Q + (Q_Array(CR) * Q_Array(CR - 1))
End If
Next CR
I need to go back and edit my "Accuracy" post above.
With 100 to 150 lines, Excel is accurate to +-3 in the third
With With Double Type numerical variables, VBA is accurate to 11 places
Assuming that using all decimals is the most precise.
1st Formula Results table_abbv.csv table_abt.csv table_aapl.csv Number of lines 94 106 151 Excel 2.5820251543270400 1.2249023888699800 0.5047631412720350 Doubles 2.5820251543270400 1.2249023888699900 0.5047631412719940 Decimal Variables 2.5820251543248600 1.2249023888700300 0.5047631412714680 2nd Formula Results Excel 1.7327113499938000 0.8586603953426530 0.4404276525780680 Doubles 1.7327113499937800 0.8597056301514100 0.4422153315780190 Decimal Variables 1.7327113499905900 0.8597056301520180 0.4422153315780510
Most precise version to data. (IMO)
Code:Option Explicit
Sub Fast_Subfolder_File_Processing()
'8-Byte accuracy
' Thanks to all @ http://www.vbaexpress.com/forum/showthread.php?52649-Process-All-CSV-Files-In-SubFolders/page3
Dim Filename As String
Dim NameLength As Long
Dim FileNames As Variant
Dim FileLines As Variant
Const F As Long = 5 'CSV field number counting from zero
Dim F_Array() As Variant
Dim Sum_L As Variant
Dim Q_Array() As Variant
Dim Sum_Q As Variant
Dim C As Variant
Dim Pie As Variant
Pie = CDec(Application.WorksheetFunction.Pi())
Dim Fn As Long 'Fn = Index number for FileNames
Dim CR As Long 'CR = FileLines Index number
Dim NumRows As Long
Const FolderPath As String = "C:\TestFolder\" 'include ending \
'''' Put all the file names in the path in Array
FileNames = Filter(Split(CreateObject("wscript.shell").exec("cmd /c Dir " & _
FolderPath & "*.csv /b /s").stdout.readall, vbCrLf), ".")
'''' Open one file at a time
With CreateObject("scripting.filesystemobject")
For Fn = 0 To UBound(FileNames)
''''Put all lines from one file in Arrays
FileLines = Split(.opentextfile(FileNames(Fn)).readall, vbLf)
'Compensate for extra vbLf's in FileLines
Do While FileLines(UBound(FileLines)) = ""
ReDim Preserve FileLines(UBound(FileLines) - 1)
Loop
ReDim F_Array(UBound(FileLines))
ReDim Q_Array(UBound(FileLines))
'Initialize
NumRows = UBound(FileLines) + 1
Sum_L = 0
Sum_Q = 0
''''Calcuate first result for one file
For CR = 0 To UBound(FileLines)
'Replace file line with Log of 6th value. Split(BlahBlah)(5)
F_Array(CR) = CDec(Log(Split(FileLines(CR), ",")(F)) / Log(10#))
'After the first line
If CR > 0 Then
Sum_L = Sum_L + ((F_Array(CR) - F_Array(CR - 1)) * 100) ^ 2
Q_Array(CR) = Abs((F_Array(CR) - F_Array(CR - 1)) * 100)
Sum_Q = Sum_Q + (Q_Array(CR) * Q_Array(CR - 1))
End If
Next CR
'''' Put results in sheet
'Get FileName
NameLength = Len(FileNames(Fn)) - InStrRev(FileNames(Fn), "\")
Filename = Right(FileNames(Fn), NameLength)
'Place result
With Sheets("Sheet1").Rows(Fn + 1)
.Columns(1) = Filename
.Columns(2) = Sum_L 'Column B
.Columns(3) = (Sum_Q * Pie * (94 / (94 - 1))) / 2
.Columns(4) = NumRows
End With
Next Fn 'Work on next File
End With
End Sub
Sub Precise_Subfolder_File_Processing()
'16-Byte accuracy
' Thanks to all @ http://www.vbaexpress.com/forum/showthread.php?52649-Process-All-CSV-Files-In-SubFolders/page3
Dim Filename As String
Dim NameLength As Long
Dim FileNames As Variant
Dim FileLines As Variant
Const F As Long = 5 'CSV field number counting from zero
Dim F_Array() As Variant
Dim Sum_L As Variant
Dim Q_Array() As Variant
Dim Sum_Q As Variant
Dim Pie As Double
Pie = Application.WorksheetFunction.Pi() * 1
Dim Fn As Long 'Fn = Index number for FileNames
Dim CR As Long 'CR = FileLines Index number
Dim NumRows As Long
Const FolderPath As String = "C:\TestFolder\" 'include ending \
'''' Put all the file names in the path in Array
FileNames = Filter(Split(CreateObject("wscript.shell").exec("cmd /c Dir " & _
FolderPath & "*.csv /b /s").stdout.readall, vbCrLf), ".")
'''' Open one file at a time
With CreateObject("scripting.filesystemobject")
For Fn = 0 To UBound(FileNames)
''''Put all lines from one file in Arrays
FileLines = Split(.opentextfile(FileNames(Fn)).readall, vbLf)
'Compensate for extra vbLf's in FileLines
Do While FileLines(UBound(FileLines)) = ""
ReDim Preserve FileLines(UBound(FileLines) - 1)
Loop
ReDim F_Array(UBound(FileLines))
ReDim Q_Array(UBound(FileLines))
'Initialize
NumRows = UBound(FileLines) + 1
Sum_L = 0
Sum_Q = 0
''''Calcuate first result for one file
For CR = 0 To UBound(FileLines)
'Replace file line with Log of 6th value. Split(BlahBlah)(5)
F_Array(CR) = Log(Split(FileLines(CR), ",")(F)) / Log(10#)
'After the first line
If CR > 0 Then
Sum_L = Sum_L + ((F_Array(CR) - F_Array(CR - 1)) * 100) ^ 2
Q_Array(CR) = Abs((F_Array(CR) - F_Array(CR - 1)) * 100)
Sum_Q = Sum_Q + (Q_Array(CR) * Q_Array(CR - 1))
End If
Next CR
'''' Put results in sheet
'Get FileName
NameLength = Len(FileNames(Fn)) - InStrRev(FileNames(Fn), "\")
Filename = Right(FileNames(Fn), NameLength)
'Place result
With Sheets("Sheet1").Rows(Fn + 1)
.Columns(1) = Filename
.Columns(2) = (Sum_L)
.Columns(3) = ((Sum_Q * Pie * (94 / (94 - 1))) / 2)
.Columns(4) = NumRows
End With
Next Fn 'Work on next File
End With
End Sub