Skip to content

Commit c808346

Browse files
Use UTF8 instance that doesn't emit BOM
1 parent 0b1f00b commit c808346

File tree

5 files changed

+219
-8
lines changed

5 files changed

+219
-8
lines changed

src/Microsoft.Data.SqlClient/netcore/src/Microsoft/Data/SqlClient/TdsParser.cs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ internal static void Assert(string message)
5252
}
5353
}
5454

55+
private static readonly Encoding s_utf8EncodingWithoutBom = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
5556
private static int _objectTypeCount; // EventSource counter
5657
private readonly SqlClientLogger _logger = new SqlClientLogger();
5758

@@ -2791,7 +2792,7 @@ private bool TryProcessEnvChange(int tokenLength, TdsParserStateObject stateObj,
27912792
// UTF8 collation
27922793
if (env._newCollation.IsUTF8)
27932794
{
2794-
_defaultEncoding = Encoding.UTF8;
2795+
_defaultEncoding = s_utf8EncodingWithoutBom;
27952796
}
27962797
else
27972798
{
@@ -4199,7 +4200,7 @@ internal bool TryProcessReturnValue(int length, TdsParserStateObject stateObj, o
41994200
// UTF8 collation
42004201
if (rec.collation.IsUTF8)
42014202
{
4202-
rec.encoding = Encoding.UTF8;
4203+
rec.encoding = s_utf8EncodingWithoutBom;
42034204
}
42044205
else
42054206
{
@@ -4986,7 +4987,7 @@ private bool TryProcessTypeInfo(TdsParserStateObject stateObj, SqlMetaDataPriv c
49864987
// UTF8 collation
49874988
if (col.collation.IsUTF8)
49884989
{
4989-
col.encoding = Encoding.UTF8;
4990+
col.encoding = s_utf8EncodingWithoutBom;
49904991
}
49914992
else
49924993
{
@@ -10801,7 +10802,7 @@ internal Task WriteBulkCopyValue(object value, SqlMetaDataPriv metadata, TdsPars
1080110802
// Replace encoding if it is UTF8
1080210803
if (metadata.collation.IsUTF8)
1080310804
{
10804-
_defaultEncoding = Encoding.UTF8;
10805+
_defaultEncoding = s_utf8EncodingWithoutBom;
1080510806
}
1080610807

1080710808
_defaultCollation = metadata.collation;

src/Microsoft.Data.SqlClient/netfx/src/Microsoft/Data/SqlClient/TdsParser.cs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ namespace Microsoft.Data.SqlClient
3333
// and surfacing objects to the user.
3434
sealed internal class TdsParser
3535
{
36+
private static readonly Encoding s_utf8EncodingWithoutBom = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
3637
private static int _objectTypeCount; // EventSource Counter
3738
private readonly SqlClientLogger _logger = new SqlClientLogger();
3839

@@ -3235,7 +3236,7 @@ private bool TryProcessEnvChange(int tokenLength, TdsParserStateObject stateObj,
32353236
// UTF8 collation
32363237
if (env._newCollation.IsUTF8)
32373238
{
3238-
_defaultEncoding = Encoding.UTF8;
3239+
_defaultEncoding = s_utf8EncodingWithoutBom;
32393240
}
32403241
else
32413242
{
@@ -4757,7 +4758,7 @@ internal bool TryProcessReturnValue(int length,
47574758

47584759
if (rec.collation.IsUTF8)
47594760
{ // UTF8 collation
4760-
rec.encoding = Encoding.UTF8;
4761+
rec.encoding = s_utf8EncodingWithoutBom;
47614762
}
47624763
else
47634764
{
@@ -5657,7 +5658,7 @@ private bool TryProcessTypeInfo(TdsParserStateObject stateObj, SqlMetaDataPriv c
56575658

56585659
if (col.collation.IsUTF8)
56595660
{ // UTF8 collation
5660-
col.encoding = Encoding.UTF8;
5661+
col.encoding = s_utf8EncodingWithoutBom;
56615662
}
56625663
else
56635664
{
@@ -11740,7 +11741,7 @@ internal Task WriteBulkCopyValue(object value, SqlMetaDataPriv metadata, TdsPars
1174011741
// Replace encoding if it is UTF8
1174111742
if (metadata.collation.IsUTF8)
1174211743
{
11743-
_defaultEncoding = Encoding.UTF8;
11744+
_defaultEncoding = s_utf8EncodingWithoutBom;
1174411745
}
1174511746

1174611747
_defaultCollation = metadata.collation;

src/Microsoft.Data.SqlClient/tests/ManualTests/DataCommon/DataTestUtility.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,17 @@ public static string GetUniqueNameForSqlServer(string prefix, bool withBracket =
590590
return name;
591591
}
592592

593+
public static void CreateTable(SqlConnection sqlConnection, string tableName, string createBody)
594+
{
595+
DropTable(sqlConnection, tableName);
596+
string tableCreate = "CREATE TABLE " + tableName + createBody;
597+
using (SqlCommand command = sqlConnection.CreateCommand())
598+
{
599+
command.CommandText = tableCreate;
600+
command.ExecuteNonQuery();
601+
}
602+
}
603+
593604
public static void DropTable(SqlConnection sqlConnection, string tableName)
594605
{
595606
ResurrectConnection(sqlConnection);

src/Microsoft.Data.SqlClient/tests/ManualTests/Microsoft.Data.SqlClient.ManualTesting.Tests.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@
285285
<Compile Include="SQL\Common\SystemDataInternals\TdsParserHelper.cs" />
286286
<Compile Include="SQL\Common\SystemDataInternals\TdsParserStateObjectHelper.cs" />
287287
<Compile Include="SQL\ConnectionTestWithSSLCert\CertificateTest.cs" />
288+
<Compile Include="SQL\SqlBulkCopyTest\TestBulkCopyWithUTF8.cs" />
288289
<Compile Include="SQL\SqlCommand\SqlCommandStoredProcTest.cs" />
289290
<Compile Include="TracingTests\TestTdsServer.cs" />
290291
<Compile Include="XUnitAssemblyAttributes.cs" />
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
using System;
2+
using System.Data;
3+
using System.Threading.Tasks;
4+
using Xunit;
5+
6+
namespace Microsoft.Data.SqlClient.ManualTesting.Tests.SQL.SqlBulkCopyTest
7+
{
8+
/// <summary>
9+
/// Validates SqlBulkCopy functionality when working with UTF-8 encoded data.
10+
/// Ensures that data copied from a UTF-8 source table to a destination table retains its encoding and content integrity.
11+
/// </summary>
12+
public sealed class TestBulkCopyWithUtf8 : IDisposable
13+
{
14+
private static bool IsAzureServer() => !DataTestUtility.IsNotAzureServer();
15+
private static bool IsNotAzureSynapse => DataTestUtility.IsNotAzureSynapse();
16+
private static bool AreConnectionStringsSetup() => DataTestUtility.AreConnStringsSetup();
17+
18+
private static string s_sourceTable = DataTestUtility.GetUniqueName("SourceTableForUTF8Data");
19+
private static string s_destinationTable = DataTestUtility.GetUniqueName("DestinationTableForUTF8Data");
20+
private static string s_testValue = "test";
21+
private static byte[] s_testValueInUtf8Bytes = new byte[] { 0x74, 0x65, 0x73, 0x74 };
22+
private static readonly string s_insertQuery = $"INSERT INTO {s_sourceTable} VALUES('{s_testValue}')";
23+
24+
/// <summary>
25+
/// Constructor: Initializes and populates source and destination tables required for the tests.
26+
/// </summary>
27+
public TestBulkCopyWithUtf8()
28+
{
29+
using SqlConnection sourceConnection = new SqlConnection(GetConnectionString(true));
30+
sourceConnection.Open();
31+
SetupTables(sourceConnection, s_sourceTable, s_destinationTable, s_insertQuery);
32+
}
33+
34+
/// <summary>
35+
/// Cleanup method to drop tables after test completion.
36+
/// </summary>
37+
public void Dispose()
38+
{
39+
using SqlConnection connection = new SqlConnection(GetConnectionString(true));
40+
connection.Open();
41+
DataTestUtility.DropTable(connection, s_sourceTable);
42+
DataTestUtility.DropTable(connection, s_destinationTable);
43+
connection.Close();
44+
}
45+
46+
/// <summary>
47+
/// Builds a connection string with or without Multiple Active Result Sets (MARS) property.
48+
/// </summary>
49+
private string GetConnectionString(bool enableMars)
50+
{
51+
return new SqlConnectionStringBuilder(DataTestUtility.TCPConnectionString)
52+
{
53+
MultipleActiveResultSets = enableMars
54+
}.ConnectionString;
55+
}
56+
57+
/// <summary>
58+
/// Creates source and destination tables with a varchar(max) column with a collation setting
59+
/// that stores the data in UTF8 encoding and inserts the data in the source table.
60+
/// </summary>
61+
private void SetupTables(SqlConnection connection, string sourceTable, string destinationTable, string insertQuery)
62+
{
63+
string columnDefinition = "(str_col varchar(max) COLLATE Latin1_General_100_CS_AS_KS_WS_SC_UTF8)";
64+
DataTestUtility.CreateTable(connection, sourceTable, columnDefinition);
65+
DataTestUtility.CreateTable(connection, destinationTable, columnDefinition);
66+
using SqlCommand insertCommand = connection.CreateCommand();
67+
insertCommand.CommandText = insertQuery;
68+
Helpers.TryExecute(insertCommand, insertQuery);
69+
}
70+
71+
/// <summary>
72+
/// Synchronous test case: Validates that data copied using SqlBulkCopy matches UTF-8 byte sequence for test value.
73+
/// Tested with MARS enabled and disabled, and with streaming enabled and disabled.
74+
/// </summary>
75+
[Theory]
76+
[InlineData(true, true)]
77+
[InlineData(false, true)]
78+
[InlineData(true, false)]
79+
[InlineData(false, false)]
80+
public void BulkCopy_Utf8Data_ShouldMatchSource(bool isMarsEnabled, bool enableStreaming)
81+
{
82+
// Setup connections for source and destination tables
83+
string connectionString = GetConnectionString(isMarsEnabled);
84+
using SqlConnection sourceConnection = new SqlConnection(connectionString);
85+
sourceConnection.Open();
86+
using SqlConnection destinationConnection = new SqlConnection(connectionString);
87+
destinationConnection.Open();
88+
89+
// Read data from source table
90+
using SqlCommand sourceDataCommand = new SqlCommand($"SELECT str_col FROM {s_sourceTable}", sourceConnection);
91+
using SqlDataReader reader = sourceDataCommand.ExecuteReader(CommandBehavior.SequentialAccess);
92+
93+
// Verify that the destination table is empty before bulk copy
94+
using SqlCommand countCommand = new SqlCommand($"SELECT COUNT(*) FROM {s_destinationTable}", destinationConnection);
95+
Assert.Equal(0, Convert.ToInt16(countCommand.ExecuteScalar()));
96+
97+
// Initialize bulk copy configuration
98+
using SqlBulkCopy bulkCopy = new SqlBulkCopy(destinationConnection)
99+
{
100+
EnableStreaming = enableStreaming,
101+
DestinationTableName = s_destinationTable
102+
};
103+
104+
try
105+
{
106+
// Perform bulk copy from source to destination table
107+
bulkCopy.WriteToServer(reader);
108+
}
109+
catch (Exception ex)
110+
{
111+
// If bulk copy fails, fail the test with the exception message
112+
Assert.Fail($"Bulk copy failed: {ex.Message}");
113+
}
114+
115+
// Verify that the 1 row from the source table has been copied into our destination table.
116+
Assert.Equal(1, Convert.ToInt16(countCommand.ExecuteScalar()));
117+
118+
// Read the data from destination table as varbinary to verify the UTF-8 byte sequence
119+
using SqlCommand verifyCommand = new SqlCommand($"SELECT cast(str_col as varbinary) FROM {s_destinationTable}", destinationConnection);
120+
using SqlDataReader verifyReader = verifyCommand.ExecuteReader(CommandBehavior.SequentialAccess);
121+
122+
// Verify that we have data in the destination table
123+
Assert.True(verifyReader.Read(), "No data found in destination table after bulk copy.");
124+
125+
// Read the value of the column as SqlBinary.
126+
byte[] actualBytes = verifyReader.GetSqlBinary(0).Value;
127+
128+
// Verify that the byte array matches the expected UTF-8 byte sequence
129+
Assert.Equal(s_testValueInUtf8Bytes.Length, actualBytes.Length);
130+
Assert.Equal(s_testValueInUtf8Bytes, actualBytes);
131+
}
132+
133+
/// <summary>
134+
/// Asynchronous version of the testcase BulkCopy_Utf8Data_ShouldMatchSource
135+
/// </summary>
136+
[ConditionalTheory(typeof(DataTestUtility),
137+
nameof(DataTestUtility.AreConnStringsSetup),
138+
nameof(DataTestUtility.IsNotAzureServer),
139+
nameof(DataTestUtility.IsNotAzureSynapse))]
140+
[InlineData(true, true)]
141+
[InlineData(false, true)]
142+
[InlineData(true, false)]
143+
[InlineData(false, false)]
144+
public async Task BulkCopy_Utf8Data_ShouldMatchSource_Async(bool isMarsEnabled, bool enableStreaming)
145+
{
146+
// Setup connections for source and destination tables
147+
string connectionString = GetConnectionString(isMarsEnabled);
148+
using SqlConnection sourceConnection = new SqlConnection(connectionString);
149+
await sourceConnection.OpenAsync();
150+
using SqlConnection destinationConnection = new SqlConnection(connectionString);
151+
await destinationConnection.OpenAsync();
152+
153+
// Read data from source table
154+
using SqlCommand sourceDataCommand = new SqlCommand($"SELECT str_col FROM {s_sourceTable}", sourceConnection);
155+
using SqlDataReader reader = await sourceDataCommand.ExecuteReaderAsync(CommandBehavior.SequentialAccess);
156+
157+
// Verify that the destination table is empty before bulk copy
158+
using SqlCommand countCommand = new SqlCommand($"SELECT COUNT(*) FROM {s_destinationTable}", destinationConnection);
159+
Assert.Equal(0, Convert.ToInt16(await countCommand.ExecuteScalarAsync()));
160+
161+
// Initialize bulk copy configuration
162+
using SqlBulkCopy bulkCopy = new SqlBulkCopy(destinationConnection)
163+
{
164+
EnableStreaming = enableStreaming,
165+
DestinationTableName = s_destinationTable
166+
};
167+
168+
try
169+
{
170+
// Perform bulk copy from source to destination table
171+
await bulkCopy.WriteToServerAsync(reader);
172+
}
173+
catch (Exception ex)
174+
{
175+
// If bulk copy fails, fail the test with the exception message
176+
Assert.Fail($"Bulk copy failed: {ex.Message}");
177+
}
178+
179+
// Verify that the 1 row from the source table has been copied into our destination table.
180+
Assert.Equal(1, Convert.ToInt16(await countCommand.ExecuteScalarAsync()));
181+
182+
// Read the data from destination table as varbinary to verify the UTF-8 byte sequence
183+
using SqlCommand verifyCommand = new SqlCommand($"SELECT cast(str_col as varbinary) FROM {s_destinationTable}", destinationConnection);
184+
using SqlDataReader verifyReader = await verifyCommand.ExecuteReaderAsync(CommandBehavior.SequentialAccess);
185+
186+
// Verify that we have data in the destination table
187+
Assert.True(await verifyReader.ReadAsync(), "No data found in destination table after bulk copy.");
188+
189+
// Read the value of the column as SqlBinary.
190+
byte[] actualBytes = verifyReader.GetSqlBinary(0).Value;
191+
192+
// Verify that the byte array matches the expected UTF-8 byte sequence
193+
Assert.Equal(s_testValueInUtf8Bytes.Length, actualBytes.Length);
194+
Assert.Equal(s_testValueInUtf8Bytes, actualBytes);
195+
}
196+
}
197+
}

0 commit comments

Comments
 (0)