From 334a63605398b7ea621d0bf7e9ffd498abfb568e Mon Sep 17 00:00:00 2001 From: Apoorv Deshmukh Date: Wed, 11 Jun 2025 11:15:03 +0530 Subject: [PATCH] Port #3399 to release/5.2 --- .../src/Microsoft/Data/SqlClient/TdsParser.cs | 9 +- .../src/Microsoft/Data/SqlClient/TdsParser.cs | 9 +- .../ManualTests/DataCommon/DataTestUtility.cs | 11 + ....Data.SqlClient.ManualTesting.Tests.csproj | 1 + .../SqlBulkCopyTest/TestBulkCopyWithUTF8.cs | 196 ++++++++++++++++++ 5 files changed, 218 insertions(+), 8 deletions(-) create mode 100644 src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlBulkCopyTest/TestBulkCopyWithUTF8.cs diff --git a/src/Microsoft.Data.SqlClient/netcore/src/Microsoft/Data/SqlClient/TdsParser.cs b/src/Microsoft.Data.SqlClient/netcore/src/Microsoft/Data/SqlClient/TdsParser.cs index 3c73cb89a9..8f090d71c7 100644 --- a/src/Microsoft.Data.SqlClient/netcore/src/Microsoft/Data/SqlClient/TdsParser.cs +++ b/src/Microsoft.Data.SqlClient/netcore/src/Microsoft/Data/SqlClient/TdsParser.cs @@ -52,6 +52,7 @@ internal static void Assert(string message) } } + private static readonly Encoding s_utf8EncodingWithoutBom = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); private static int _objectTypeCount; // EventSource counter private readonly SqlClientLogger _logger = new SqlClientLogger(); @@ -2791,7 +2792,7 @@ private bool TryProcessEnvChange(int tokenLength, TdsParserStateObject stateObj, // UTF8 collation if (env._newCollation.IsUTF8) { - _defaultEncoding = Encoding.UTF8; + _defaultEncoding = s_utf8EncodingWithoutBom; } else { @@ -4199,7 +4200,7 @@ internal bool TryProcessReturnValue(int length, TdsParserStateObject stateObj, o // UTF8 collation if (rec.collation.IsUTF8) { - rec.encoding = Encoding.UTF8; + rec.encoding = s_utf8EncodingWithoutBom; } else { @@ -4986,7 +4987,7 @@ private bool TryProcessTypeInfo(TdsParserStateObject stateObj, SqlMetaDataPriv c // UTF8 collation if (col.collation.IsUTF8) { - col.encoding = Encoding.UTF8; + col.encoding = s_utf8EncodingWithoutBom; } else { @@ -10801,7 +10802,7 @@ internal Task WriteBulkCopyValue(object value, SqlMetaDataPriv metadata, TdsPars // Replace encoding if it is UTF8 if (metadata.collation.IsUTF8) { - _defaultEncoding = Encoding.UTF8; + _defaultEncoding = s_utf8EncodingWithoutBom; } _defaultCollation = metadata.collation; diff --git a/src/Microsoft.Data.SqlClient/netfx/src/Microsoft/Data/SqlClient/TdsParser.cs b/src/Microsoft.Data.SqlClient/netfx/src/Microsoft/Data/SqlClient/TdsParser.cs index 49b6eacb8e..164816e736 100644 --- a/src/Microsoft.Data.SqlClient/netfx/src/Microsoft/Data/SqlClient/TdsParser.cs +++ b/src/Microsoft.Data.SqlClient/netfx/src/Microsoft/Data/SqlClient/TdsParser.cs @@ -33,6 +33,7 @@ namespace Microsoft.Data.SqlClient // and surfacing objects to the user. sealed internal class TdsParser { + private static readonly Encoding s_utf8EncodingWithoutBom = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); private static int _objectTypeCount; // EventSource Counter private readonly SqlClientLogger _logger = new SqlClientLogger(); @@ -3235,7 +3236,7 @@ private bool TryProcessEnvChange(int tokenLength, TdsParserStateObject stateObj, // UTF8 collation if (env._newCollation.IsUTF8) { - _defaultEncoding = Encoding.UTF8; + _defaultEncoding = s_utf8EncodingWithoutBom; } else { @@ -4757,7 +4758,7 @@ internal bool TryProcessReturnValue(int length, if (rec.collation.IsUTF8) { // UTF8 collation - rec.encoding = Encoding.UTF8; + rec.encoding = s_utf8EncodingWithoutBom; } else { @@ -5657,7 +5658,7 @@ private bool TryProcessTypeInfo(TdsParserStateObject stateObj, SqlMetaDataPriv c if (col.collation.IsUTF8) { // UTF8 collation - col.encoding = Encoding.UTF8; + col.encoding = s_utf8EncodingWithoutBom; } else { @@ -11740,7 +11741,7 @@ internal Task WriteBulkCopyValue(object value, SqlMetaDataPriv metadata, TdsPars // Replace encoding if it is UTF8 if (metadata.collation.IsUTF8) { - _defaultEncoding = Encoding.UTF8; + _defaultEncoding = s_utf8EncodingWithoutBom; } _defaultCollation = metadata.collation; diff --git a/src/Microsoft.Data.SqlClient/tests/ManualTests/DataCommon/DataTestUtility.cs b/src/Microsoft.Data.SqlClient/tests/ManualTests/DataCommon/DataTestUtility.cs index ef1586f51c..599482b9b4 100644 --- a/src/Microsoft.Data.SqlClient/tests/ManualTests/DataCommon/DataTestUtility.cs +++ b/src/Microsoft.Data.SqlClient/tests/ManualTests/DataCommon/DataTestUtility.cs @@ -590,6 +590,17 @@ public static string GetUniqueNameForSqlServer(string prefix, bool withBracket = return name; } + public static void CreateTable(SqlConnection sqlConnection, string tableName, string createBody) + { + DropTable(sqlConnection, tableName); + string tableCreate = "CREATE TABLE " + tableName + createBody; + using (SqlCommand command = sqlConnection.CreateCommand()) + { + command.CommandText = tableCreate; + command.ExecuteNonQuery(); + } + } + public static void DropTable(SqlConnection sqlConnection, string tableName) { ResurrectConnection(sqlConnection); diff --git a/src/Microsoft.Data.SqlClient/tests/ManualTests/Microsoft.Data.SqlClient.ManualTesting.Tests.csproj b/src/Microsoft.Data.SqlClient/tests/ManualTests/Microsoft.Data.SqlClient.ManualTesting.Tests.csproj index 7eb2b1140c..c67fd525f7 100644 --- a/src/Microsoft.Data.SqlClient/tests/ManualTests/Microsoft.Data.SqlClient.ManualTesting.Tests.csproj +++ b/src/Microsoft.Data.SqlClient/tests/ManualTests/Microsoft.Data.SqlClient.ManualTesting.Tests.csproj @@ -285,6 +285,7 @@ + diff --git a/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlBulkCopyTest/TestBulkCopyWithUTF8.cs b/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlBulkCopyTest/TestBulkCopyWithUTF8.cs new file mode 100644 index 0000000000..552e9b9e03 --- /dev/null +++ b/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlBulkCopyTest/TestBulkCopyWithUTF8.cs @@ -0,0 +1,196 @@ +using System; +using System.Data; +using System.Threading.Tasks; +using Xunit; + +namespace Microsoft.Data.SqlClient.ManualTesting.Tests.SQL.SqlBulkCopyTest +{ + /// + /// Validates SqlBulkCopy functionality when working with UTF-8 encoded data. + /// Ensures that data copied from a UTF-8 source table to a destination table retains its encoding and content integrity. + /// + public sealed class TestBulkCopyWithUtf8 : IDisposable + { + private static string s_sourceTable = DataTestUtility.GetUniqueName("SourceTableForUTF8Data"); + private static string s_destinationTable = DataTestUtility.GetUniqueName("DestinationTableForUTF8Data"); + private static string s_testValue = "test"; + private static byte[] s_testValueInUtf8Bytes = new byte[] { 0x74, 0x65, 0x73, 0x74 }; + private static readonly string s_insertQuery = $"INSERT INTO {s_sourceTable} VALUES('{s_testValue}')"; + + /// + /// Constructor: Initializes and populates source and destination tables required for the tests. + /// + public TestBulkCopyWithUtf8() + { + using SqlConnection sourceConnection = new SqlConnection(GetConnectionString(true)); + sourceConnection.Open(); + SetupTables(sourceConnection, s_sourceTable, s_destinationTable, s_insertQuery); + } + + /// + /// Cleanup method to drop tables after test completion. + /// + public void Dispose() + { + using SqlConnection connection = new SqlConnection(GetConnectionString(true)); + connection.Open(); + DataTestUtility.DropTable(connection, s_sourceTable); + DataTestUtility.DropTable(connection, s_destinationTable); + connection.Close(); + } + + /// + /// Builds a connection string with or without Multiple Active Result Sets (MARS) property. + /// + private string GetConnectionString(bool enableMars) + { + return new SqlConnectionStringBuilder(DataTestUtility.TCPConnectionString) + { + MultipleActiveResultSets = enableMars + }.ConnectionString; + } + + /// + /// Creates source and destination tables with a varchar(max) column with a collation setting + /// that stores the data in UTF8 encoding and inserts the data in the source table. + /// + private void SetupTables(SqlConnection connection, string sourceTable, string destinationTable, string insertQuery) + { + string columnDefinition = "(str_col varchar(max) COLLATE Latin1_General_100_CS_AS_KS_WS_SC_UTF8)"; + DataTestUtility.CreateTable(connection, sourceTable, columnDefinition); + DataTestUtility.CreateTable(connection, destinationTable, columnDefinition); + using SqlCommand insertCommand = connection.CreateCommand(); + insertCommand.CommandText = insertQuery; + Helpers.TryExecute(insertCommand, insertQuery); + } + + /// + /// Synchronous test case: Validates that data copied using SqlBulkCopy matches UTF-8 byte sequence for test value. + /// Tested with MARS enabled and disabled, and with streaming enabled and disabled. + /// + [ConditionalTheory(typeof(DataTestUtility), + nameof(DataTestUtility.AreConnStringsSetup), + nameof(DataTestUtility.IsNotAzureServer), + nameof(DataTestUtility.IsNotAzureSynapse))] + [InlineData(true, true)] + [InlineData(false, true)] + [InlineData(true, false)] + [InlineData(false, false)] + public void BulkCopy_Utf8Data_ShouldMatchSource(bool isMarsEnabled, bool enableStreaming) + { + // Setup connections for source and destination tables + string connectionString = GetConnectionString(isMarsEnabled); + using SqlConnection sourceConnection = new SqlConnection(connectionString); + sourceConnection.Open(); + using SqlConnection destinationConnection = new SqlConnection(connectionString); + destinationConnection.Open(); + + // Read data from source table + using SqlCommand sourceDataCommand = new SqlCommand($"SELECT str_col FROM {s_sourceTable}", sourceConnection); + using SqlDataReader reader = sourceDataCommand.ExecuteReader(CommandBehavior.SequentialAccess); + + // Verify that the destination table is empty before bulk copy + using SqlCommand countCommand = new SqlCommand($"SELECT COUNT(*) FROM {s_destinationTable}", destinationConnection); + Assert.Equal(0, Convert.ToInt16(countCommand.ExecuteScalar())); + + // Initialize bulk copy configuration + using SqlBulkCopy bulkCopy = new SqlBulkCopy(destinationConnection) + { + EnableStreaming = enableStreaming, + DestinationTableName = s_destinationTable + }; + + try + { + // Perform bulk copy from source to destination table + bulkCopy.WriteToServer(reader); + } + catch (Exception ex) + { + // If bulk copy fails, fail the test with the exception message + Assert.Fail($"Bulk copy failed: {ex.Message}"); + } + + // Verify that the 1 row from the source table has been copied into our destination table. + Assert.Equal(1, Convert.ToInt16(countCommand.ExecuteScalar())); + + // Read the data from destination table as varbinary to verify the UTF-8 byte sequence + using SqlCommand verifyCommand = new SqlCommand($"SELECT cast(str_col as varbinary) FROM {s_destinationTable}", destinationConnection); + using SqlDataReader verifyReader = verifyCommand.ExecuteReader(CommandBehavior.SequentialAccess); + + // Verify that we have data in the destination table + Assert.True(verifyReader.Read(), "No data found in destination table after bulk copy."); + + // Read the value of the column as SqlBinary. + byte[] actualBytes = verifyReader.GetSqlBinary(0).Value; + + // Verify that the byte array matches the expected UTF-8 byte sequence + Assert.Equal(s_testValueInUtf8Bytes.Length, actualBytes.Length); + Assert.Equal(s_testValueInUtf8Bytes, actualBytes); + } + + /// + /// Asynchronous version of the testcase BulkCopy_Utf8Data_ShouldMatchSource + /// + [ConditionalTheory(typeof(DataTestUtility), + nameof(DataTestUtility.AreConnStringsSetup), + nameof(DataTestUtility.IsNotAzureServer), + nameof(DataTestUtility.IsNotAzureSynapse))] + [InlineData(true, true)] + [InlineData(false, true)] + [InlineData(true, false)] + [InlineData(false, false)] + public async Task BulkCopy_Utf8Data_ShouldMatchSource_Async(bool isMarsEnabled, bool enableStreaming) + { + // Setup connections for source and destination tables + string connectionString = GetConnectionString(isMarsEnabled); + using SqlConnection sourceConnection = new SqlConnection(connectionString); + await sourceConnection.OpenAsync(); + using SqlConnection destinationConnection = new SqlConnection(connectionString); + await destinationConnection.OpenAsync(); + + // Read data from source table + using SqlCommand sourceDataCommand = new SqlCommand($"SELECT str_col FROM {s_sourceTable}", sourceConnection); + using SqlDataReader reader = await sourceDataCommand.ExecuteReaderAsync(CommandBehavior.SequentialAccess); + + // Verify that the destination table is empty before bulk copy + using SqlCommand countCommand = new SqlCommand($"SELECT COUNT(*) FROM {s_destinationTable}", destinationConnection); + Assert.Equal(0, Convert.ToInt16(await countCommand.ExecuteScalarAsync())); + + // Initialize bulk copy configuration + using SqlBulkCopy bulkCopy = new SqlBulkCopy(destinationConnection) + { + EnableStreaming = enableStreaming, + DestinationTableName = s_destinationTable + }; + + try + { + // Perform bulk copy from source to destination table + await bulkCopy.WriteToServerAsync(reader); + } + catch (Exception ex) + { + // If bulk copy fails, fail the test with the exception message + Assert.Fail($"Bulk copy failed: {ex.Message}"); + } + + // Verify that the 1 row from the source table has been copied into our destination table. + Assert.Equal(1, Convert.ToInt16(await countCommand.ExecuteScalarAsync())); + + // Read the data from destination table as varbinary to verify the UTF-8 byte sequence + using SqlCommand verifyCommand = new SqlCommand($"SELECT cast(str_col as varbinary) FROM {s_destinationTable}", destinationConnection); + using SqlDataReader verifyReader = await verifyCommand.ExecuteReaderAsync(CommandBehavior.SequentialAccess); + + // Verify that we have data in the destination table + Assert.True(await verifyReader.ReadAsync(), "No data found in destination table after bulk copy."); + + // Read the value of the column as SqlBinary. + byte[] actualBytes = verifyReader.GetSqlBinary(0).Value; + + // Verify that the byte array matches the expected UTF-8 byte sequence + Assert.Equal(s_testValueInUtf8Bytes.Length, actualBytes.Length); + Assert.Equal(s_testValueInUtf8Bytes, actualBytes); + } + } +}