Skip to content

Commit e1a6b6e

Browse files
committed
Improvements
Added support for UTF-8 encoded files. Fixed infinite loop when not linebreak found.
1 parent 73fbee5 commit e1a6b6e

File tree

1 file changed

+77
-25
lines changed

1 file changed

+77
-25
lines changed

src/ECPTextStream.cls

Lines changed: 77 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,15 @@ Attribute VB_PredeclaredId = False
99
Attribute VB_Exposed = False
1010
'#
1111
'////////////////////////////////////////////////////////////////////////////////////////////
12-
' Copyright © 2021 W. García
12+
' Copyright © 2021-2022 W. García
1313
' GPL-3.0 license | https://www.gnu.org/licenses/gpl-3.0.html/
1414
' https://ingwilfredogarcia.wordpress.com
1515
'#
1616
'////////////////////////////////////////////////////////////////////////////////////////////
1717
' GENERAL INFO:
1818
' ECPTextStream is an easy-to-use class module developed to enable I/O operations over "big"
19-
' text files, at high speed, from VBA. The module hasn’t reference to any external API library.
19+
' text files, at high speed, from VBA. The module hasn’t reference to any external API
20+
' library and has the ability to read and write UTF-8 encoded files.
2021
'////////////////////////////////////////////////////////////////////////////////////////////
2122
'#
2223
Option Explicit
@@ -25,7 +26,7 @@ Option Explicit
2526
' CONSTANTS:
2627
Private Const DualLFchar As String = vbLf & vbLf
2728
Private Const InverseCRLF As String = vbLf & vbCr
28-
Private Const SizeFactor As Long = 524288
29+
Private Const sizeFactor As Long = 524288
2930
Private Const WhiteSpace As String = " "
3031
'////////////////////////////////////////////////////////////////////////////////////////////
3132
'#
@@ -38,10 +39,12 @@ Private P_ENDSTREAMONLINEBREAK As Boolean '--------If true, each stream ends on
3839
Private P_ISOPENSTREAM As Boolean '----------------Indicates if the object is linked to a file
3940
Private P_LINEBREAK As String '--------------------Holds the char used to end a Stream.
4041
Private P_LINEBREAKMATCHINGBEHAVIOR As EndLineMatchingBehavior 'How to find the next line break.
41-
Private P_UNIFIEDLFOUTPUT As Boolean '-------------If true, the buffer string will be returned
42-
' with the LF char as Line Break.
42+
Private P_UNIFIEDLFOUTPUT As Boolean '-------------If true, the buffer string will be returned _
43+
with the LF char as Line Break.
4344
Private P_STREAMLENGTH As Long '-------------------File len.
4445
Private P_TEXT As String '-------------------------Holds the current stream's text.
46+
Private P_UTF8ENCODED As Boolean '-----------------Indicates when the file is supposed to be _
47+
UTF8 encoded.
4548
'////////////////////////////////////////////////////////////////////////////////////////////
4649
'#
4750
'////////////////////////////////////////////////////////////////////////////////////////////
@@ -93,15 +96,19 @@ Attribute bufferSize.VB_Description = "Gets or sets the buffer
9396
End Property
9497
Public Property Let bufferSize(value As Single)
9598
P_BUFFERSIZE = value
96-
P_BUFFERLENGTH = CLng(P_BUFFERSIZE * SizeFactor)
99+
P_BUFFERLENGTH = CLng(P_BUFFERSIZE * sizeFactor)
97100
Buffer = SPACE$(P_BUFFERLENGTH)
98101
End Property
99102
Public Property Get bufferString() As String
100103
Attribute bufferString.VB_Description = "Gets the text data stored in the buffer."
101104
If P_UNIFIEDLFOUTPUT Then
102105
NormalizeLineBreaks
103106
End If
104-
bufferString = P_TEXT
107+
If Not P_UTF8ENCODED Then
108+
bufferString = P_TEXT
109+
Else
110+
bufferString = UTF8Decode(P_TEXT)
111+
End If
105112
End Property
106113
Public Property Get endStreamOnLineBreak() As Boolean
107114
Attribute endStreamOnLineBreak.VB_Description = "Allows to end buffer just after the first, from right to left, line break character."
@@ -113,9 +120,9 @@ End Property
113120
Public Property Get isOpenStream() As Boolean
114121
isOpenStream = P_ISOPENSTREAM
115122
End Property
116-
Public Property Get lineBreak() As String
117-
Attribute lineBreak.VB_Description = "Returns the character used to end the last received stream. The value is vbNullString when the last stream is not forced to end on line break."
118-
lineBreak = P_LINEBREAK
123+
Public Property Get LineBreak() As String
124+
Attribute LineBreak.VB_Description = "Returns the character used to end the last received stream. The value is vbNullString when the last stream is not forced to end on line break."
125+
LineBreak = P_LINEBREAK
119126
End Property
120127
Public Property Get linebreakMatchingBehavior() As EndLineMatchingBehavior
121128
linebreakMatchingBehavior = P_LINEBREAKMATCHINGBEHAVIOR
@@ -136,12 +143,19 @@ Attribute streamLength.VB_Description = "Gets the current opened file
136143
streamLength = P_STREAMLENGTH
137144
End Property
138145
Public Property Get unifiedLFOutput() As Boolean
139-
Attribute unifiedLFOutput.VB_Description = "Determines whether the buffer string is returned using only the LF character as a linefeed."
146+
Attribute unifiedLFOutput.VB_Description = "Determines whether the buffer string is returned using only the LF character as a line feed. Similarly, this property instruct to write files without the Unicode Byte Order Mark."
140147
unifiedLFOutput = P_UNIFIEDLFOUTPUT
141148
End Property
142149
Public Property Let unifiedLFOutput(value As Boolean)
143150
P_UNIFIEDLFOUTPUT = value
144151
End Property
152+
Public Property Get utf8EncodedFile() As Boolean
153+
Attribute utf8EncodedFile.VB_Description = "Indicates whether the buffer string is returned as a decoded string, assuming the file is UTF8 encoded."
154+
utf8EncodedFile = P_UTF8ENCODED
155+
End Property
156+
Public Property Let utf8EncodedFile(value As Boolean)
157+
P_UTF8ENCODED = value
158+
End Property
145159
'////////////////////////////////////////////////////////////////////////////////////////////
146160
'#
147161
'////////////////////////////////////////////////////////////////////////////////////////////
@@ -156,7 +170,7 @@ End Sub
156170
Private Sub DoubleBufferSize()
157171
Dim LCSt As Long
158172
P_BUFFERSIZE = 2 * P_BUFFERSIZE
159-
P_BUFFERLENGTH = CLng(P_BUFFERSIZE * SizeFactor)
173+
P_BUFFERLENGTH = CLng(P_BUFFERSIZE * sizeFactor)
160174
LCSt = P_STREAMLENGTH - P_BUFFERLENGTH
161175
Select Case LCSt
162176
Case Is > 0
@@ -318,44 +332,82 @@ Attribute SeekPointer.VB_Description = "Moves the pointer, over the target file,
318332
End Sub
319333
Private Sub StartVariables()
320334
CorrectedPos = 0
321-
P_BUFFERLENGTH = CLng(P_BUFFERSIZE * SizeFactor)
335+
P_BUFFERLENGTH = CLng(P_BUFFERSIZE * sizeFactor)
322336
Buffer = SPACE$(P_BUFFERLENGTH)
323337
LCS = P_STREAMLENGTH
324338
P_ATENDOFSTREAM = False
325339
End Sub
340+
Public Function UTF8Decode(ByVal sStr As String) As String
341+
Dim l As Long, sUTF8 As CSVArrayList, iChar As Long, iChar2 As Long
342+
343+
On Error GoTo UTF8_Decode_error
344+
Set sUTF8 = New CSVArrayList
345+
For l = 1 To LenB(sStr) Step 2
346+
iChar = Asc(MidB$(sStr, l, 2))
347+
If iChar > 127 Then
348+
If Not iChar And 32 Then ' 2 chars
349+
iChar2 = Asc(MidB$(sStr, l + 2, 2))
350+
sUTF8.Add ChrW$(((31 And iChar) * 64 + (63 And iChar2)))
351+
l = l + 2
352+
Else
353+
Dim iChar3 As Integer
354+
iChar2 = Asc(MidB$(sStr, l + 2, 2))
355+
iChar3 = Asc(MidB$(sStr, l + 4, 2))
356+
sUTF8.Add ChrW$(((iChar And 15) * 16 * 256) + ((iChar2 And 63) * 64) + (iChar3 And 63))
357+
l = l + 4
358+
End If
359+
Else
360+
sUTF8.Add ChrW$(iChar)
361+
End If
362+
Next l
363+
UTF8Decode = Join$(sUTF8.items, vbNullString)
364+
Set sUTF8 = Nothing
365+
Exit Function
366+
UTF8_Decode_error:
367+
Set sUTF8 = Nothing
368+
UTF8Decode = vbNullString
369+
End Function
326370
Public Sub WriteBlankLines(Lines As Long, Optional EndLineMark As EndLineChar = 0)
327371
Attribute WriteBlankLines.VB_Description = "Inserts a specified number of blank lines into the current opened text file."
328372
If P_ISOPENSTREAM Then
329-
Dim idx As Long
373+
Dim Idx As Long
330374
Select Case EndLineMark
331375
Case 0
332-
For idx = 1 To Lines
333-
Put #FileHandled, , vbCrLf
334-
Next idx
376+
For Idx = 1 To Lines
377+
WriteText vbCrLf
378+
Next Idx
335379
Case 1
336-
For idx = 1 To Lines
337-
Put #FileHandled, , vbCr
338-
Next idx
380+
For Idx = 1 To Lines
381+
WriteText vbCr
382+
Next Idx
339383
Case 2
340-
For idx = 1 To Lines
341-
Put #FileHandled, , vbLf
342-
Next idx
384+
For Idx = 1 To Lines
385+
WriteText vbLf
386+
Next Idx
343387
End Select
344388
P_STREAMLENGTH = LOF(FileHandled)
345389
End If
346390
End Sub
347391
Public Sub WriteText(ByRef TextData As String)
348392
Attribute WriteText.VB_Description = "Writes the given string to the current opened text file."
349393
If P_ISOPENSTREAM Then
350-
Put #FileHandled, , TextData
394+
If Not P_UTF8ENCODED Then
395+
Put #FileHandled, , TextData
396+
Else
397+
Dim BuffferBytes() As Byte
398+
BuffferBytes = TextData
399+
Put #FileHandled, , BuffferBytes
400+
Erase BuffferBytes
401+
End If
351402
P_STREAMLENGTH = LOF(FileHandled)
352403
End If
353404
End Sub
354405
'////////////////////////////////////////////////////////////////////////////////////////////
355406
Private Sub Class_Initialize()
356407
P_BUFFERSIZE = 0.5
357-
P_BUFFERLENGTH = CLng(P_BUFFERSIZE * SizeFactor)
408+
P_BUFFERLENGTH = CLng(P_BUFFERSIZE * sizeFactor)
358409
P_ENDSTREAMONLINEBREAK = False
410+
P_UTF8ENCODED = False
359411
P_LINEBREAKMATCHINGBEHAVIOR = EndLineMatchingBehavior.Bidirectional
360412
P_UNIFIEDLFOUTPUT = False
361413
Buffer = SPACE$(P_BUFFERLENGTH)

0 commit comments

Comments
 (0)