Consulting

Results 1 to 8 of 8

Thread: Check the file encode type?

  1. #1
    VBAX Newbie
    Joined
    Dec 2008
    Posts
    4
    Location

    Question Check the file encode type?

    Is there any way to check the file encode type? (ANSI or Unicode / UTF=8 etc)

    This can be simply checked by open the file with NOTEPAD and click SAVEAS, then check in the "ENCODE" field.

    Just don't know how to check it with vba, I have tried "ADODB.Stream.Charset" but it always return "Unicode" for all files with different encoder. And "Scripting.FileSystemObject" also can't do that.

    Any idea?

  2. #2
    Great question, here is a rudimentary way to do this:
    Option Explicit
     
    Public Enum abCharsets
        abError = 0
        abANSI = 1
        abUnicode = 2
        abUnicodeBigEndian = 3
        abUTF8 = 4
        ebUnknown = 5
    End Enum
     
    Public Sub Example()
        Dim objStream As ADODB.Stream
        Dim strPath As String
        strPath = "C:\test\Charset_Test\UTF8.txt"
        Set objStream = New ADODB.Stream
        With objStream
            .Open
            .Charset = CharsetToString(ReturnCharset(strPath))
            .LoadFromFile "C:\test\Charset_Test\UTF8.txt"
            MsgBox .ReadText
        End With
    End Sub
     
    Public Function ReturnCharset(ByVal filePath As String, Optional verifyANSI As Boolean = True) As abCharsets
        Const bytByte0Unicode_c As Byte = 255
        Const bytByte1Unicode_c As Byte = 254
        Const bytByte0UnicodeBigEndian_c As Byte = 254
        Const bytByte1UnicodeBigEndian_c As Byte = 255
        Const bytByte0UTF8_c As Byte = 239
        Const bytByte1UTF8_c As Byte = 187
        Const bytByte2UTF8_c As Byte = 191
        Const lngByte0 As Long = 0
        Const lngByte1 As Long = 1
        Const lngByte2 As Long = 2
        Dim bytHeader() As Byte
        Dim eRtnVal As abCharsets
        On Error GoTo Err_Hnd
        bytHeader() = GetFileBytes(filePath, lngByte2)
        Select Case bytHeader(lngByte0)
        Case bytByte0Unicode_c
            If bytHeader(lngByte1) = bytByte1Unicode_c Then
                eRtnVal = abCharsets.abUnicode
            End If
        Case bytByte0UnicodeBigEndian_c
            If bytHeader(lngByte1) = bytByte1UnicodeBigEndian_c Then
                eRtnVal = abCharsets.abUnicodeBigEndian
            End If
        Case bytByte0UTF8_c
            If bytHeader(lngByte1) = bytByte1UTF8_c Then
                If bytHeader(lngByte2) = bytByte2UTF8_c Then
                    eRtnVal = abCharsets.abUTF8
                End If
            End If
        End Select
        If Not CBool(eRtnVal) Then
            If verifyANSI Then
                If IsANSI(filePath) Then
                    eRtnVal = abCharsets.abANSI
                Else
                    eRtnVal = abCharsets.ebUnknown
                End If
            Else
                eRtnVal = abCharsets.abANSI
            End If
        End If
    Exit_Proc:
        On Error Resume Next
        Erase bytHeader
        ReturnCharset = eRtnVal
        Exit Function
    Err_Hnd:
        eRtnVal = abCharsets.abError
        Resume Exit_Proc
    End Function
     
    Private Function IsANSI(ByVal filePath As String) As Boolean
        Const lngKeyCodeNullChar_c As Long = 0
        Dim bytFile() As Byte
        Dim lngIndx As Long
        Dim lngUprBnd As Long
        bytFile = GetFileBytes(filePath)
        lngUprBnd = UBound(bytFile)
        For lngIndx = 0 To lngUprBnd
            If bytFile(lngIndx) = lngKeyCodeNullChar_c Then
                Exit For
            End If
        Next
        Erase bytFile
        IsANSI = (lngIndx > lngUprBnd)
    End Function
     
    Public Function GetFileBytes(ByVal path As String, Optional ByVal truncateToByte As Long = -1) As Byte()
        Dim lngFileNum As Long
        Dim bytRtnVal() As Byte
        If truncateToByte < 0 Then
            truncateToByte = FileLen(path) - 1
        End If
        lngFileNum = FreeFile
        If FileExists(path) Then
            Open path For Binary Access Read As lngFileNum
            ReDim bytRtnVal(truncateToByte) As Byte
            Get lngFileNum, , bytRtnVal
            Close lngFileNum
        End If
        GetFileBytes = bytRtnVal
        Erase bytRtnVal
    End Function
     
    Public Function FileExists(ByVal filePath As String) As Boolean
        FileExists = CBool(LenB(Dir(filePath, vbHidden + vbNormal + vbSystem + vbReadOnly + vbArchive)))
    End Function
    Public Function CharsetToString(ByVal value As abCharsets) As String
        Dim strRtnVal As String
        Select Case value
            Case abCharsets.abANSI
                strRtnVal = "us-ascii"
            Case abCharsets.abUTF8
                strRtnVal = "utf-8"
            Case Else
                strRtnVal = "Unicode"
        End Select
        CharsetToString = strRtnVal
    End Function
    Cordially,
    Aaron



    Keep Our Board Clean!
    • Please Mark your thread "Solved" if you get an acceptable response (under thread tools).
    • Enclose your code in VBA tags then it will be formatted as per the VBIDE to improve readability.

  3. #3
    VBAX Newbie
    Joined
    Dec 2008
    Posts
    4
    Location
    Thanks you so much, I've tested with the files and here below's the result:

    XML File:
    ANSI / Unicode: 5


    TEXT File:
    ANSI: 5
    Unicode: 2
    UTF-8: 4

    According to the result code definetion, the code works well with TXT file (Except ANSI) but not XML file. And if strPath is pointing to a file doesn't exist, a file with same name and zero size will be created (This can be solved by "(Dir(strPath) > "")" check at beginning)

    abANSI = 1
    abUnicode = 2
    abUnicodeBigEndian = 3
    abUTF8 = 4
    ebUnknown = 5

  4. #4
    Good catches. Fixed
    Cordially,
    Aaron



    Keep Our Board Clean!
    • Please Mark your thread "Solved" if you get an acceptable response (under thread tools).
    • Enclose your code in VBA tags then it will be formatted as per the VBIDE to improve readability.

  5. #5
    VBAX Newbie
    Joined
    Dec 2008
    Posts
    4
    Location
    Thanks so much! It really works for XML and TXT files now.

    I also find some problems are from my XML files which are auto-generated and zipped from SUN Microsystems. I have to unzip, open and save (without change) to make VBA return correct charset.


    I am sure if I should open a new post: (I will if you suggest)

    (1) Just quick open and save files:
    Use ADODB.Stream right? or any better suggestion?

    (2) Save with another encoder
    Is "ADODB.Stream.Charset" the best way to do it?

  6. #6
    I've updated the code in above to include an example of how to make it all work with an ado stream. As far as "better" goes, it depends on the requirements. The ado stream object should be fine for most of your needs. Unless you find plan on optimizing for speed, I'd stick with it But ado should be fast enough for the majority of tasks I could forsee.
    Cordially,
    Aaron



    Keep Our Board Clean!
    • Please Mark your thread "Solved" if you get an acceptable response (under thread tools).
    • Enclose your code in VBA tags then it will be formatted as per the VBIDE to improve readability.

  7. #7
    VBAX Newbie
    Joined
    Dec 2008
    Posts
    4
    Location
    Great works !

  8. #8
    Thanks for some great code!

    I only needed one change.

    I guess in europe this line:
    strRtnVal = "us-ascii"
    should be:
    strRtnVal = "windows-1252"

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •