高分求 VB 读取文本乱码问题,文本编码为UTF-8

   更新日期:2024.05.02
由于系统的需要,要国际化,但是由于那些字符串和翻译都写在EXCEL表格里面,如果一个一个的复制出来,那是相当麻烦的.所以老大让我写一个转换器,从EXECEL表格中导出数据,然后写到.PO扩展名的文本文件,要求UTF-8的编码格式的.

UTF-8是UNICODE编码格式的一种特殊情况.对于汉字的处理,它是采用了三个字节.对于它的具体情况,在这就不具体介绍,网上有相当多的资料.在VB中,有很多API可以提供,比如连接EXCEL、连接数据库、ADODB数据流等。这些API都可以很方便的引用。

在写的过程中,让我有点苦恼的是,用UTF-8编码编写的文本文件,都会产生一个BOM,这使得转换出来的文件,在系统编译中编译不过去,必须要把BOM去掉。接下来通过代码讲讲怎么实现。

Dim app As Excel.Application
Dim eworkbook As Workbook
Dim eworksheet As Worksheet
Dim eworksheet_count As Integer
Dim sheetName As String
Dim obj As Object
Dim FileNum

Dim file_path as String
Dim j as Integer
Dim filepath_save as String

filepath_save = "d:\"

Set app = New Excel.Application //连接EXCEL
Set eworkbook = app.Workbooks.Open(file_path)
eworkbook_count = eworkbook.Worksheets.count

For j = 1 To eworkbook_count

filepath_path = filepath_save & j & ".txt"
Set eworksheet = eworkbook.Sheets(j)
sheetName = eworksheet.Name
Set obj = New ADODB.Stream //设置ADODB流

With obj
.Open
.Charset = "UTF-8"
.Position = .Size
.WriteText "helloworld", 1
.SaveToFile filepath_save
.Close
End With

Set obj = Nothing

Open filepath_save For Input As #1 //消除UTF-8的BOM
Line Input #1, str
mm = Replace(str, str, "msgid """"")
Close #1
Open filepath_save For Binary As #FileNum
Put #FileNum, , mm
Close #FileNum
Next j

Set eworksheet = Nothing
eworkbook.Close
Set eworkbook = Nothing
app.Quit
Set app = Nothing
这是其中的一部分关键的代码,如果没有设置UTF-8的编码格式的话,一般NOTEPADE的写入格式都是默认为ANSI。好久没用VB写代码了,不过这次用起来还是感觉比较好的。所以在学校多学点是好的。

根据反馈,代码已作修改并调试通过:

分二步:
一、建立一个模块,复制下面代码
Option Explicit
Private Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
Private Const CP_UTF8 = 65001

'读文件至变量
Private Function GetFile(FileName As String) As String
Dim i As Integer, BB() As Byte
If Dir(FileName) = "" Then Exit Function
i = FreeFile
ReDim BB(FileLen(FileName) - 1)
Open FileName For Binary As #i
Get #i, , BB
Close #i
GetFile = BB
End Function

'功能: 把Utf8字符转化成ANSI字符
Public Function UTF8_Decode(FileName As String) As String
Dim sUTF8 As String
Dim lngUtf8Size As Long
Dim strBuffer As String
Dim lngBufferSize As Long
Dim lngResult As Long
Dim bytUtf8() As Byte
Dim n As Long
sUTF8 = GetFile(FileName)
If LenB(sUTF8) = 0 Then Exit Function
On Error GoTo EndFunction
bytUtf8 = sUTF8
lngUtf8Size = UBound(bytUtf8) + 1
lngBufferSize = lngUtf8Size * 2
strBuffer = String$(lngBufferSize, vbNullChar)
lngResult = MultiByteToWideChar(CP_UTF8, 0, bytUtf8(0), _
lngUtf8Size, StrPtr(strBuffer), lngBufferSize)
If lngResult Then
UTF8_Decode = Left(strBuffer, lngResult)
End If
EndFunction:

End Function

二、调用举例:
如果你想把一个"c:\1.txt"的UTF-8文件转换为ANSI编码,可这样调用
dim s as string
s=UTF8_Decode("c:\1.txt") '文件名请根据实际修改
此时,s存放的就是ANSI格式编码了,不会出现乱码问题

用VB进行 Utf8 格式字符的转换
'添加控件:Command1、Text1
'在属性窗口将 text1 的属性 MultiLine 设置为 True,属性 ScrollBars 设置为 3
Private Declare Function WideCharToMultiByte Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpDefaultChar As String, ByVal lpUsedDefaultChar As Long) As Long
Private Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpMultiByteStr As Long, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
Private Const CP_UTF8 = 65001
Private Sub Command1_Click()
Dim F As String
F = "C:\my.txt"
Text1.Text = ReadFile(F) '读出文本文件 或 Utf8 文件
End Sub
Private Function ReadFile(F As String) As String
Dim H As Long, S As Long, B() As Byte, S1 As Long, Su As Long
Dim IsUtf8 As Boolean, Str2000 As String

On Error GoTo Cuo
S = FileLen(F)
On Error GoTo 0

If S < 1 Then GoTo Cuo
H = FreeFile: ReDim B(0 To S - 1)
Open F For Binary As #H
Get #H, , B
Close #H

If S > 2 Then
If B(0) = 239 And B(1) = 187 And B(2) = 191 Then IsUtf8 = True '检查前三个字节,Utf8 格式文件的标记
End If
If Not IsUtf8 Then
ReadFile = StrConv(B, vbUnicode)
Str2000 = LCase(Left(ReadFile, 2000)) '检查是否是 Utf8 的网页文件
Do
S = InStr(S1 + 1, Str2000, "<meta ")
If S = 0 Then Exit Function
S1 = InStr(S + 1, Str2000, ">")
Su = InStr(S + 1, Str2000, "charset=utf-8")
If Su > 0 And Su < S1 Then Exit Do '是 Utf8 格式
Loop
End If
ReadFile = Utf8ToUni(B) '或:ReadFile = Utf8ToUniByAPI(B) '将 Utf8 转换为 Unicode
Exit Function
'----------------------------
Cuo:
Close #H
ReadFile = "文件没有找到:" & vbCrLf & F
End Function
Private Function Utf8ToUniByAPI(ByRef B() As Byte) As String
'使用 API,将 Utf8 转换为 Unicode
Dim ReS As Long, S1 As Long, S2 As Long

On Error Resume Next
S1 = UBound(B) - LBound(B) + 1
If S1 < 1 Then Exit Function
On Error GoTo 0

S2 = S1 * 2
Utf8ToUniByAPI = String$(S2, vbNullChar)
ReS = MultiByteToWideChar(CP_UTF8, 0, VarPtr(B(0)), S1, StrPtr(Utf8ToUniByAPI), S2)
If ReS > 0 Then Utf8ToUniByAPI = Left(Utf8ToUniByAPI, ReS)
dd = Asc(Utf8ToUniByAPI)
End Function
Private Function Utf8ToUni(B() As Byte) As String
'不使用 API,将 Utf8 转换为 Unicode
Dim BU As Long

On Error Resume Next
BU = -1: BU = UBound(B)
If BU = -1 Then Exit Function
On Error GoTo 0

Dim I As Long, K As Long, N As Long
Dim B1 As Byte, cnt As Byte

I = LBound(B)
If BU > I + 1 Then
If B(I) = 239 And B(I + 1) = 187 And B(I + 2) = 191 Then I = I + 3 '去掉前三个字符
End If

Do
If I > BU Then Exit Do
B1 = B(I)

If (B1 And &HFC) = &HFC Then
cnt = 6
ElseIf (B1 And &HF8) = &HF8 Then
cnt = 5
ElseIf (B1 And &HF0) = &HF0 Then
cnt = 4
ElseIf (B1 And &HE0) = &HE0 Then
cnt = 3
ElseIf (B1 And &HC0) = &HC0 Then
cnt = 2
Else
cnt = 1
End If

If I + cnt - 1 > BU Then Utf8ToUni = Utf8ToUni & "?": Exit Do

Select Case cnt
Case 2: N = B1 And &H1F
Case 3: N = B1 And &HF
Case 4: N = B1 And &H7
Case 5: N = B1 And &H3
Case 6: N = B1 And &H1
Case Else: Utf8ToUni = Utf8ToUni & Chr(B1): GoTo Next1
End Select

For K = 1 To cnt - 1
N = N * &H40 + (B(I + K) And &H3F)
Next

Utf8ToUni = Utf8ToUni & ChrW(N)
Next1:
I = I + cnt
Loop
End Function

相关链接

欢迎反馈与建议,请联系电邮
2024 © 视觉网