Skip to content

Commit e649a7c

Browse files
Use UTF8 instance that doesn't emit BOM (#3399)
1 parent e037c8a commit e649a7c

File tree

6 files changed

+230
-10
lines changed

6 files changed

+230
-10
lines changed

src/Microsoft.Data.SqlClient.sln

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "steps", "steps", "{AD738BD4
304304
EndProject
305305
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.Data.SqlClient.UnitTests", "Microsoft.Data.SqlClient\tests\UnitTests\Microsoft.Data.SqlClient.UnitTests.csproj", "{4461063D-2F2B-274C-7E6F-F235119D258E}"
306306
EndProject
307+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common", "Microsoft.Data.SqlClient\tests\Common\Common.csproj", "{67128EC0-30F5-6A98-448B-55F88A1DE707}"
308+
EndProject
307309
Global
308310
GlobalSection(SolutionConfigurationPlatforms) = preSolution
309311
Debug|Any CPU = Debug|Any CPU
@@ -582,6 +584,18 @@ Global
582584
{4461063D-2F2B-274C-7E6F-F235119D258E}.Release|x64.Build.0 = Release|x64
583585
{4461063D-2F2B-274C-7E6F-F235119D258E}.Release|x86.ActiveCfg = Release|x86
584586
{4461063D-2F2B-274C-7E6F-F235119D258E}.Release|x86.Build.0 = Release|x86
587+
{67128EC0-30F5-6A98-448B-55F88A1DE707}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
588+
{67128EC0-30F5-6A98-448B-55F88A1DE707}.Debug|Any CPU.Build.0 = Debug|Any CPU
589+
{67128EC0-30F5-6A98-448B-55F88A1DE707}.Debug|x64.ActiveCfg = Debug|x64
590+
{67128EC0-30F5-6A98-448B-55F88A1DE707}.Debug|x64.Build.0 = Debug|x64
591+
{67128EC0-30F5-6A98-448B-55F88A1DE707}.Debug|x86.ActiveCfg = Debug|x86
592+
{67128EC0-30F5-6A98-448B-55F88A1DE707}.Debug|x86.Build.0 = Debug|x86
593+
{67128EC0-30F5-6A98-448B-55F88A1DE707}.Release|Any CPU.ActiveCfg = Release|Any CPU
594+
{67128EC0-30F5-6A98-448B-55F88A1DE707}.Release|Any CPU.Build.0 = Release|Any CPU
595+
{67128EC0-30F5-6A98-448B-55F88A1DE707}.Release|x64.ActiveCfg = Release|x64
596+
{67128EC0-30F5-6A98-448B-55F88A1DE707}.Release|x64.Build.0 = Release|x64
597+
{67128EC0-30F5-6A98-448B-55F88A1DE707}.Release|x86.ActiveCfg = Release|x86
598+
{67128EC0-30F5-6A98-448B-55F88A1DE707}.Release|x86.Build.0 = Release|x86
585599
EndGlobalSection
586600
GlobalSection(SolutionProperties) = preSolution
587601
HideSolutionNode = FALSE
@@ -632,6 +646,7 @@ Global
632646
{09352F1D-878F-4F55-8AA2-6E47F1AD37D5} = {4CAE9195-4F1A-4D48-854C-1C9FBC512C66}
633647
{AD738BD4-6A02-4B88-8F93-FBBBA49A74C8} = {4CAE9195-4F1A-4D48-854C-1C9FBC512C66}
634648
{4461063D-2F2B-274C-7E6F-F235119D258E} = {0CC4817A-12F3-4357-912C-09315FAAD008}
649+
{67128EC0-30F5-6A98-448B-55F88A1DE707} = {0CC4817A-12F3-4357-912C-09315FAAD008}
635650
EndGlobalSection
636651
GlobalSection(ExtensibilityGlobals) = postSolution
637652
SolutionGuid = {01D48116-37A2-4D33-B9EC-94793C702431}

src/Microsoft.Data.SqlClient/netcore/src/Microsoft/Data/SqlClient/TdsParser.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2813,7 +2813,7 @@ private TdsOperationStatus TryProcessEnvChange(int tokenLength, TdsParserStateOb
28132813
// UTF8 collation
28142814
if (env._newCollation.IsUTF8)
28152815
{
2816-
_defaultEncoding = Encoding.UTF8;
2816+
_defaultEncoding = s_utf8EncodingWithoutBom;
28172817
}
28182818
else
28192819
{
@@ -4324,7 +4324,7 @@ internal TdsOperationStatus TryProcessReturnValue(int length,
43244324

43254325
if (rec.collation.IsUTF8)
43264326
{ // UTF8 collation
4327-
rec.encoding = Encoding.UTF8;
4327+
rec.encoding = s_utf8EncodingWithoutBom;
43284328
}
43294329
else
43304330
{
@@ -5181,7 +5181,7 @@ private TdsOperationStatus TryProcessTypeInfo(TdsParserStateObject stateObj, Sql
51815181

51825182
if (col.collation.IsUTF8)
51835183
{ // UTF8 collation
5184-
col.encoding = Encoding.UTF8;
5184+
col.encoding = s_utf8EncodingWithoutBom;
51855185
}
51865186
else
51875187
{
@@ -5986,7 +5986,7 @@ private TdsOperationStatus TryReadSqlStringValue(SqlBuffer value, byte type, int
59865986
break;
59875987

59885988
case TdsEnums.SQLJSON:
5989-
encoding = Encoding.UTF8;
5989+
encoding = s_utf8EncodingWithoutBom;
59905990
string jsonStringValue;
59915991
result = stateObj.TryReadStringWithEncoding(length, encoding, isPlp, out jsonStringValue);
59925992
if (result != TdsOperationStatus.Done)
@@ -11052,7 +11052,7 @@ internal Task WriteBulkCopyValue(object value, SqlMetaDataPriv metadata, TdsPars
1105211052
// Replace encoding if it is UTF8
1105311053
if (metadata.collation.IsUTF8)
1105411054
{
11055-
_defaultEncoding = Encoding.UTF8;
11055+
_defaultEncoding = s_utf8EncodingWithoutBom;
1105611056
}
1105711057

1105811058
_defaultCollation = metadata.collation;

src/Microsoft.Data.SqlClient/netfx/src/Microsoft/Data/SqlClient/TdsParser.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2865,7 +2865,7 @@ private TdsOperationStatus TryProcessEnvChange(int tokenLength, TdsParserStateOb
28652865
// UTF8 collation
28662866
if (env._newCollation.IsUTF8)
28672867
{
2868-
_defaultEncoding = Encoding.UTF8;
2868+
_defaultEncoding = s_utf8EncodingWithoutBom;
28692869
}
28702870
else
28712871
{
@@ -4376,7 +4376,7 @@ internal TdsOperationStatus TryProcessReturnValue(int length,
43764376

43774377
if (rec.collation.IsUTF8)
43784378
{ // UTF8 collation
4379-
rec.encoding = Encoding.UTF8;
4379+
rec.encoding = s_utf8EncodingWithoutBom;
43804380
}
43814381
else
43824382
{
@@ -5297,7 +5297,7 @@ private TdsOperationStatus TryProcessTypeInfo(TdsParserStateObject stateObj, Sql
52975297

52985298
if (col.collation.IsUTF8)
52995299
{ // UTF8 collation
5300-
col.encoding = Encoding.UTF8;
5300+
col.encoding = s_utf8EncodingWithoutBom;
53015301
}
53025302
else
53035303
{
@@ -6183,7 +6183,7 @@ private TdsOperationStatus TryReadSqlStringValue(SqlBuffer value, byte type, int
61836183
break;
61846184

61856185
case TdsEnums.SQLJSON:
6186-
encoding = Encoding.UTF8;
6186+
encoding = s_utf8EncodingWithoutBom;
61876187
string jsonStringValue;
61886188
result = stateObj.TryReadStringWithEncoding(length, encoding, isPlp, out jsonStringValue);
61896189
if (result != TdsOperationStatus.Done)
@@ -11240,7 +11240,7 @@ internal Task WriteBulkCopyValue(object value, SqlMetaDataPriv metadata, TdsPars
1124011240
// Replace encoding if it is UTF8
1124111241
if (metadata.collation.IsUTF8)
1124211242
{
11243-
_defaultEncoding = Encoding.UTF8;
11243+
_defaultEncoding = s_utf8EncodingWithoutBom;
1124411244
}
1124511245

1124611246
_defaultCollation = metadata.collation;

src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/TdsParser.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11
using System;
22
using System.Buffers;
33
using System.Diagnostics;
4+
using System.Text;
45
using Microsoft.Data.SqlClient.Utilities;
56

67
#nullable enable
78

89
namespace Microsoft.Data.SqlClient
910
{
11+
1012
internal partial class TdsParser
1113
{
14+
private static readonly Encoding s_utf8EncodingWithoutBom = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
15+
1216
internal void ProcessSSPI(int receivedLength)
1317
{
1418
Debug.Assert(_authenticationProvider is not null);

src/Microsoft.Data.SqlClient/tests/ManualTests/Microsoft.Data.SqlClient.ManualTesting.Tests.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@
148148
<Compile Include="SQL\SqlBulkCopyTest\MissingTargetColumns.cs" />
149149
<Compile Include="SQL\SqlBulkCopyTest\MissingTargetTable.cs" />
150150
<Compile Include="SQL\SqlBulkCopyTest\SqlBulkCopyTest.cs" />
151+
<Compile Include="SQL\SqlBulkCopyTest\TestBulkCopyWithUTF8.cs" />
151152
<Compile Include="SQL\SqlBulkCopyTest\Transaction.cs" />
152153
<Compile Include="SQL\SqlBulkCopyTest\Transaction1.cs" />
153154
<Compile Include="SQL\SqlBulkCopyTest\Transaction2.cs" />
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System;
6+
using System.Data;
7+
using System.Threading.Tasks;
8+
using Xunit;
9+
10+
namespace Microsoft.Data.SqlClient.ManualTesting.Tests
11+
{
12+
/// <summary>
13+
/// Validates SqlBulkCopy functionality when working with UTF-8 encoded data.
14+
/// Ensures that data copied from a UTF-8 source table to a destination table retains its encoding and content integrity.
15+
/// </summary>
16+
public sealed class TestBulkCopyWithUtf8 : IDisposable
17+
{
18+
private static string s_sourceTable = DataTestUtility.GetUniqueName("SourceTableForUTF8Data");
19+
private static string s_destinationTable = DataTestUtility.GetUniqueName("DestinationTableForUTF8Data");
20+
private static string s_testValue = "test";
21+
private static byte[] s_testValueInUtf8Bytes = new byte[] { 0x74, 0x65, 0x73, 0x74 };
22+
private static readonly string s_insertQuery = $"INSERT INTO {s_sourceTable} VALUES('{s_testValue}')";
23+
24+
/// <summary>
25+
/// Constructor: Initializes and populates source and destination tables required for the tests.
26+
/// </summary>
27+
public TestBulkCopyWithUtf8()
28+
{
29+
using SqlConnection sourceConnection = new SqlConnection(GetConnectionString(true));
30+
sourceConnection.Open();
31+
SetupTables(sourceConnection, s_sourceTable, s_destinationTable, s_insertQuery);
32+
}
33+
34+
/// <summary>
35+
/// Cleanup method to drop tables after test completion.
36+
/// </summary>
37+
public void Dispose()
38+
{
39+
using SqlConnection connection = new SqlConnection(GetConnectionString(true));
40+
connection.Open();
41+
DataTestUtility.DropTable(connection, s_sourceTable);
42+
DataTestUtility.DropTable(connection, s_destinationTable);
43+
connection.Close();
44+
}
45+
46+
/// <summary>
47+
/// Builds a connection string with or without Multiple Active Result Sets (MARS) property.
48+
/// </summary>
49+
private string GetConnectionString(bool enableMars)
50+
{
51+
return new SqlConnectionStringBuilder(DataTestUtility.TCPConnectionString)
52+
{
53+
MultipleActiveResultSets = enableMars
54+
}.ConnectionString;
55+
}
56+
57+
/// <summary>
58+
/// Creates source and destination tables with a varchar(max) column with a collation setting
59+
/// that stores the data in UTF8 encoding and inserts the data in the source table.
60+
/// </summary>
61+
private void SetupTables(SqlConnection connection, string sourceTable, string destinationTable, string insertQuery)
62+
{
63+
string columnDefinition = "(str_col varchar(max) COLLATE Latin1_General_100_CS_AS_KS_WS_SC_UTF8)";
64+
DataTestUtility.CreateTable(connection, sourceTable, columnDefinition);
65+
DataTestUtility.CreateTable(connection, destinationTable, columnDefinition);
66+
using SqlCommand insertCommand = connection.CreateCommand();
67+
insertCommand.CommandText = insertQuery;
68+
Helpers.TryExecute(insertCommand, insertQuery);
69+
}
70+
71+
/// <summary>
72+
/// Synchronous test case: Validates that data copied using SqlBulkCopy matches UTF-8 byte sequence for test value.
73+
/// Tested with MARS enabled and disabled, and with streaming enabled and disabled.
74+
/// </summary>
75+
[ConditionalTheory(typeof(DataTestUtility),
76+
nameof(DataTestUtility.AreConnStringsSetup),
77+
nameof(DataTestUtility.IsNotAzureServer),
78+
nameof(DataTestUtility.IsNotAzureSynapse))]
79+
[InlineData(true, true)]
80+
[InlineData(false, true)]
81+
[InlineData(true, false)]
82+
[InlineData(false, false)]
83+
public void BulkCopy_Utf8Data_ShouldMatchSource(bool isMarsEnabled, bool enableStreaming)
84+
{
85+
// Setup connections for source and destination tables
86+
string connectionString = GetConnectionString(isMarsEnabled);
87+
using SqlConnection sourceConnection = new SqlConnection(connectionString);
88+
sourceConnection.Open();
89+
using SqlConnection destinationConnection = new SqlConnection(connectionString);
90+
destinationConnection.Open();
91+
92+
// Read data from source table
93+
using SqlCommand sourceDataCommand = new SqlCommand($"SELECT str_col FROM {s_sourceTable}", sourceConnection);
94+
using SqlDataReader reader = sourceDataCommand.ExecuteReader(CommandBehavior.SequentialAccess);
95+
96+
// Verify that the destination table is empty before bulk copy
97+
using SqlCommand countCommand = new SqlCommand($"SELECT COUNT(*) FROM {s_destinationTable}", destinationConnection);
98+
Assert.Equal(0, Convert.ToInt16(countCommand.ExecuteScalar()));
99+
100+
// Initialize bulk copy configuration
101+
using SqlBulkCopy bulkCopy = new SqlBulkCopy(destinationConnection)
102+
{
103+
EnableStreaming = enableStreaming,
104+
DestinationTableName = s_destinationTable
105+
};
106+
107+
try
108+
{
109+
// Perform bulk copy from source to destination table
110+
bulkCopy.WriteToServer(reader);
111+
}
112+
catch (Exception ex)
113+
{
114+
// If bulk copy fails, fail the test with the exception message
115+
Assert.Fail($"Bulk copy failed: {ex.Message}");
116+
}
117+
118+
// Verify that the 1 row from the source table has been copied into our destination table.
119+
Assert.Equal(1, Convert.ToInt16(countCommand.ExecuteScalar()));
120+
121+
// Read the data from destination table as varbinary to verify the UTF-8 byte sequence
122+
using SqlCommand verifyCommand = new SqlCommand($"SELECT cast(str_col as varbinary) FROM {s_destinationTable}", destinationConnection);
123+
using SqlDataReader verifyReader = verifyCommand.ExecuteReader(CommandBehavior.SequentialAccess);
124+
125+
// Verify that we have data in the destination table
126+
Assert.True(verifyReader.Read(), "No data found in destination table after bulk copy.");
127+
128+
// Read the value of the column as SqlBinary.
129+
byte[] actualBytes = verifyReader.GetSqlBinary(0).Value;
130+
131+
// Verify that the byte array matches the expected UTF-8 byte sequence
132+
Assert.Equal(s_testValueInUtf8Bytes.Length, actualBytes.Length);
133+
Assert.Equal(s_testValueInUtf8Bytes, actualBytes);
134+
}
135+
136+
/// <summary>
137+
/// Asynchronous version of the testcase BulkCopy_Utf8Data_ShouldMatchSource
138+
/// </summary>
139+
[ConditionalTheory(typeof(DataTestUtility),
140+
nameof(DataTestUtility.AreConnStringsSetup),
141+
nameof(DataTestUtility.IsNotAzureServer),
142+
nameof(DataTestUtility.IsNotAzureSynapse))]
143+
[InlineData(true, true)]
144+
[InlineData(false, true)]
145+
[InlineData(true, false)]
146+
[InlineData(false, false)]
147+
public async Task BulkCopy_Utf8Data_ShouldMatchSource_Async(bool isMarsEnabled, bool enableStreaming)
148+
{
149+
// Setup connections for source and destination tables
150+
string connectionString = GetConnectionString(isMarsEnabled);
151+
using SqlConnection sourceConnection = new SqlConnection(connectionString);
152+
await sourceConnection.OpenAsync();
153+
using SqlConnection destinationConnection = new SqlConnection(connectionString);
154+
await destinationConnection.OpenAsync();
155+
156+
// Read data from source table
157+
using SqlCommand sourceDataCommand = new SqlCommand($"SELECT str_col FROM {s_sourceTable}", sourceConnection);
158+
using SqlDataReader reader = await sourceDataCommand.ExecuteReaderAsync(CommandBehavior.SequentialAccess);
159+
160+
// Verify that the destination table is empty before bulk copy
161+
using SqlCommand countCommand = new SqlCommand($"SELECT COUNT(*) FROM {s_destinationTable}", destinationConnection);
162+
Assert.Equal(0, Convert.ToInt16(await countCommand.ExecuteScalarAsync()));
163+
164+
// Initialize bulk copy configuration
165+
using SqlBulkCopy bulkCopy = new SqlBulkCopy(destinationConnection)
166+
{
167+
EnableStreaming = enableStreaming,
168+
DestinationTableName = s_destinationTable
169+
};
170+
171+
try
172+
{
173+
// Perform bulk copy from source to destination table
174+
await bulkCopy.WriteToServerAsync(reader);
175+
}
176+
catch (Exception ex)
177+
{
178+
// If bulk copy fails, fail the test with the exception message
179+
Assert.Fail($"Bulk copy failed: {ex.Message}");
180+
}
181+
182+
// Verify that the 1 row from the source table has been copied into our destination table.
183+
Assert.Equal(1, Convert.ToInt16(await countCommand.ExecuteScalarAsync()));
184+
185+
// Read the data from destination table as varbinary to verify the UTF-8 byte sequence
186+
using SqlCommand verifyCommand = new SqlCommand($"SELECT cast(str_col as varbinary) FROM {s_destinationTable}", destinationConnection);
187+
using SqlDataReader verifyReader = await verifyCommand.ExecuteReaderAsync(CommandBehavior.SequentialAccess);
188+
189+
// Verify that we have data in the destination table
190+
Assert.True(await verifyReader.ReadAsync(), "No data found in destination table after bulk copy.");
191+
192+
// Read the value of the column as SqlBinary.
193+
byte[] actualBytes = verifyReader.GetSqlBinary(0).Value;
194+
195+
// Verify that the byte array matches the expected UTF-8 byte sequence
196+
Assert.Equal(s_testValueInUtf8Bytes.Length, actualBytes.Length);
197+
Assert.Equal(s_testValueInUtf8Bytes, actualBytes);
198+
}
199+
}
200+
}

0 commit comments

Comments
 (0)