From 1ccd0fabe9dc6e064b15e9fe7ead16125c42da58 Mon Sep 17 00:00:00 2001 From: Apoorv Deshmukh Date: Wed, 11 Jun 2025 11:57:15 +0530 Subject: [PATCH] Port #3399 to release/5.1 --- .../src/Microsoft/Data/SqlClient/TdsParser.cs | 9 +- .../src/Microsoft/Data/SqlClient/TdsParser.cs | 9 +- .../ManualTests/DataCommon/DataTestUtility.cs | 11 + ....Data.SqlClient.ManualTesting.Tests.csproj | 1 + .../SqlBulkCopyTest/TestBulkCopyWithUTF8.cs | 196 ++++++++++++++++++ 5 files changed, 218 insertions(+), 8 deletions(-) create mode 100644 src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlBulkCopyTest/TestBulkCopyWithUTF8.cs diff --git a/src/Microsoft.Data.SqlClient/netcore/src/Microsoft/Data/SqlClient/TdsParser.cs b/src/Microsoft.Data.SqlClient/netcore/src/Microsoft/Data/SqlClient/TdsParser.cs index dae197f5f7..01e29ef579 100644 --- a/src/Microsoft.Data.SqlClient/netcore/src/Microsoft/Data/SqlClient/TdsParser.cs +++ b/src/Microsoft.Data.SqlClient/netcore/src/Microsoft/Data/SqlClient/TdsParser.cs @@ -39,6 +39,7 @@ internal struct SNIErrorDetails // and surfacing objects to the user. internal sealed partial class TdsParser { + private static readonly Encoding s_utf8EncodingWithoutBom = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); private static int _objectTypeCount; // EventSource counter private readonly SqlClientLogger _logger = new SqlClientLogger(); @@ -2767,7 +2768,7 @@ private bool TryProcessEnvChange(int tokenLength, TdsParserStateObject stateObj, // UTF8 collation if (env._newCollation.IsUTF8) { - _defaultEncoding = Encoding.UTF8; + _defaultEncoding = s_utf8EncodingWithoutBom; } else { @@ -4171,7 +4172,7 @@ internal bool TryProcessReturnValue(int length, TdsParserStateObject stateObj, o // UTF8 collation if (rec.collation.IsUTF8) { - rec.encoding = Encoding.UTF8; + rec.encoding = s_utf8EncodingWithoutBom; } else { @@ -4955,7 +4956,7 @@ private bool TryProcessTypeInfo(TdsParserStateObject stateObj, SqlMetaDataPriv c // UTF8 collation if (col.collation.IsUTF8) { - col.encoding = Encoding.UTF8; + col.encoding = s_utf8EncodingWithoutBom; } else { @@ -10681,7 +10682,7 @@ internal Task WriteBulkCopyValue(object value, SqlMetaDataPriv metadata, TdsPars // Replace encoding if it is UTF8 if (metadata.collation.IsUTF8) { - _defaultEncoding = Encoding.UTF8; + _defaultEncoding = s_utf8EncodingWithoutBom; } _defaultCollation = metadata.collation; diff --git a/src/Microsoft.Data.SqlClient/netfx/src/Microsoft/Data/SqlClient/TdsParser.cs b/src/Microsoft.Data.SqlClient/netfx/src/Microsoft/Data/SqlClient/TdsParser.cs index c94f714973..264e132009 100644 --- a/src/Microsoft.Data.SqlClient/netfx/src/Microsoft/Data/SqlClient/TdsParser.cs +++ b/src/Microsoft.Data.SqlClient/netfx/src/Microsoft/Data/SqlClient/TdsParser.cs @@ -32,6 +32,7 @@ namespace Microsoft.Data.SqlClient // and surfacing objects to the user. sealed internal class TdsParser { + private static readonly Encoding s_utf8EncodingWithoutBom = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); private static int _objectTypeCount; // EventSource Counter private readonly SqlClientLogger _logger = new SqlClientLogger(); @@ -3221,7 +3222,7 @@ private bool TryProcessEnvChange(int tokenLength, TdsParserStateObject stateObj, // UTF8 collation if (env._newCollation.IsUTF8) { - _defaultEncoding = Encoding.UTF8; + _defaultEncoding = s_utf8EncodingWithoutBom; } else { @@ -4739,7 +4740,7 @@ internal bool TryProcessReturnValue(int length, if (rec.collation.IsUTF8) { // UTF8 collation - rec.encoding = Encoding.UTF8; + rec.encoding = s_utf8EncodingWithoutBom; } else { @@ -5636,7 +5637,7 @@ private bool TryProcessTypeInfo(TdsParserStateObject stateObj, SqlMetaDataPriv c if (col.collation.IsUTF8) { // UTF8 collation - col.encoding = Encoding.UTF8; + col.encoding = s_utf8EncodingWithoutBom; } else { @@ -11670,7 +11671,7 @@ internal Task WriteBulkCopyValue(object value, SqlMetaDataPriv metadata, TdsPars // Replace encoding if it is UTF8 if (metadata.collation.IsUTF8) { - _defaultEncoding = Encoding.UTF8; + _defaultEncoding = s_utf8EncodingWithoutBom; } _defaultCollation = metadata.collation; diff --git a/src/Microsoft.Data.SqlClient/tests/ManualTests/DataCommon/DataTestUtility.cs b/src/Microsoft.Data.SqlClient/tests/ManualTests/DataCommon/DataTestUtility.cs index 1375cbea2f..f0b930facf 100644 --- a/src/Microsoft.Data.SqlClient/tests/ManualTests/DataCommon/DataTestUtility.cs +++ b/src/Microsoft.Data.SqlClient/tests/ManualTests/DataCommon/DataTestUtility.cs @@ -470,6 +470,17 @@ public static bool IsSupportingDistributedTransactions() #endif } + public static void CreateTable(SqlConnection sqlConnection, string tableName, string createBody) + { + DropTable(sqlConnection, tableName); + string tableCreate = "CREATE TABLE " + tableName + createBody; + using (SqlCommand command = sqlConnection.CreateCommand()) + { + command.CommandText = tableCreate; + command.ExecuteNonQuery(); + } + } + public static void DropTable(SqlConnection sqlConnection, string tableName) { ResurrectConnection(sqlConnection); diff --git a/src/Microsoft.Data.SqlClient/tests/ManualTests/Microsoft.Data.SqlClient.ManualTesting.Tests.csproj b/src/Microsoft.Data.SqlClient/tests/ManualTests/Microsoft.Data.SqlClient.ManualTesting.Tests.csproj index f570fad10f..011fc0c3cf 100644 --- a/src/Microsoft.Data.SqlClient/tests/ManualTests/Microsoft.Data.SqlClient.ManualTesting.Tests.csproj +++ b/src/Microsoft.Data.SqlClient/tests/ManualTests/Microsoft.Data.SqlClient.ManualTesting.Tests.csproj @@ -279,6 +279,7 @@ + diff --git a/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlBulkCopyTest/TestBulkCopyWithUTF8.cs b/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlBulkCopyTest/TestBulkCopyWithUTF8.cs new file mode 100644 index 0000000000..5d2b0b3108 --- /dev/null +++ b/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/SqlBulkCopyTest/TestBulkCopyWithUTF8.cs @@ -0,0 +1,196 @@ +using System; +using System.Data; +using System.Threading.Tasks; +using Xunit; + +namespace Microsoft.Data.SqlClient.ManualTesting.Tests.SQL.SqlBulkCopyTest +{ + /// + /// Validates SqlBulkCopy functionality when working with UTF-8 encoded data. + /// Ensures that data copied from a UTF-8 source table to a destination table retains its encoding and content integrity. + /// + public sealed class TestBulkCopyWithUtf8 : IDisposable + { + private static string s_sourceTable = DataTestUtility.GetUniqueName("SourceTableForUTF8Data"); + private static string s_destinationTable = DataTestUtility.GetUniqueName("DestinationTableForUTF8Data"); + private static string s_testValue = "test"; + private static byte[] s_testValueInUtf8Bytes = new byte[] { 0x74, 0x65, 0x73, 0x74 }; + private static readonly string s_insertQuery = $"INSERT INTO {s_sourceTable} VALUES('{s_testValue}')"; + + /// + /// Constructor: Initializes and populates source and destination tables required for the tests. + /// + public TestBulkCopyWithUtf8() + { + using SqlConnection sourceConnection = new SqlConnection(GetConnectionString(true)); + sourceConnection.Open(); + SetupTables(sourceConnection, s_sourceTable, s_destinationTable, s_insertQuery); + } + + /// + /// Cleanup method to drop tables after test completion. + /// + public void Dispose() + { + using SqlConnection connection = new SqlConnection(GetConnectionString(true)); + connection.Open(); + DataTestUtility.DropTable(connection, s_sourceTable); + DataTestUtility.DropTable(connection, s_destinationTable); + connection.Close(); + } + + /// + /// Builds a connection string with or without Multiple Active Result Sets (MARS) property. + /// + private string GetConnectionString(bool enableMars) + { + return new SqlConnectionStringBuilder(DataTestUtility.TCPConnectionString) + { + MultipleActiveResultSets = enableMars + }.ConnectionString; + } + + /// + /// Creates source and destination tables with a varchar(max) column with a collation setting + /// that stores the data in UTF8 encoding and inserts the data in the source table. + /// + private void SetupTables(SqlConnection connection, string sourceTable, string destinationTable, string insertQuery) + { + string columnDefinition = "(str_col varchar(max) COLLATE Latin1_General_100_CS_AS_KS_WS_SC_UTF8)"; + DataTestUtility.CreateTable(connection, sourceTable, columnDefinition); + DataTestUtility.CreateTable(connection, destinationTable, columnDefinition); + using SqlCommand insertCommand = connection.CreateCommand(); + insertCommand.CommandText = insertQuery; + Helpers.TryExecute(insertCommand, insertQuery); + } + + /// + /// Synchronous test case: Validates that data copied using SqlBulkCopy matches UTF-8 byte sequence for test value. + /// Tested with MARS enabled and disabled, and with streaming enabled and disabled. + /// + [ConditionalTheory(typeof(DataTestUtility), + nameof(DataTestUtility.AreConnStringsSetup), + nameof(DataTestUtility.IsNotAzureServer), + nameof(DataTestUtility.IsNotAzureSynapse))] + [InlineData(true, true)] + [InlineData(false, true)] + [InlineData(true, false)] + [InlineData(false, false)] + public void BulkCopy_Utf8Data_ShouldMatchSource(bool isMarsEnabled, bool enableStreaming) + { + // Setup connections for source and destination tables + string connectionString = GetConnectionString(isMarsEnabled); + using SqlConnection sourceConnection = new SqlConnection(connectionString); + sourceConnection.Open(); + using SqlConnection destinationConnection = new SqlConnection(connectionString); + destinationConnection.Open(); + + // Read data from source table + using SqlCommand sourceDataCommand = new SqlCommand($"SELECT str_col FROM {s_sourceTable}", sourceConnection); + using SqlDataReader reader = sourceDataCommand.ExecuteReader(CommandBehavior.SequentialAccess); + + // Verify that the destination table is empty before bulk copy + using SqlCommand countCommand = new SqlCommand($"SELECT COUNT(*) FROM {s_destinationTable}", destinationConnection); + Assert.Equal(0, Convert.ToInt16(countCommand.ExecuteScalar())); + + // Initialize bulk copy configuration + using SqlBulkCopy bulkCopy = new SqlBulkCopy(destinationConnection) + { + EnableStreaming = enableStreaming, + DestinationTableName = s_destinationTable + }; + + try + { + // Perform bulk copy from source to destination table + bulkCopy.WriteToServer(reader); + } + catch (Exception ex) + { + // If bulk copy fails, fail the test with the exception message + Assert.Fail($"Bulk copy failed: {ex.Message}"); + } + + // Verify that the 1 row from the source table has been copied into our destination table. + Assert.Equal(1, Convert.ToInt16(countCommand.ExecuteScalar())); + + // Read the data from destination table as varbinary to verify the UTF-8 byte sequence + using SqlCommand verifyCommand = new SqlCommand($"SELECT cast(str_col as varbinary) FROM {s_destinationTable}", destinationConnection); + using SqlDataReader verifyReader = verifyCommand.ExecuteReader(CommandBehavior.SequentialAccess); + + // Verify that we have data in the destination table + Assert.True(verifyReader.Read(), "No data found in destination table after bulk copy."); + + // Read the value of the column as SqlBinary. + byte[] actualBytes = verifyReader.GetSqlBinary(0).Value; + + // Verify that the byte array matches the expected UTF-8 byte sequence + Assert.Equal(s_testValueInUtf8Bytes.Length, actualBytes.Length); + Assert.Equal(s_testValueInUtf8Bytes, actualBytes); + } + + /// + /// Asynchronous version of the testcase BulkCopy_Utf8Data_ShouldMatchSource + /// + [ConditionalTheory(typeof(DataTestUtility), + nameof(DataTestUtility.AreConnStringsSetup), + nameof(DataTestUtility.IsNotAzureServer), + nameof(DataTestUtility.IsNotAzureSynapse))] + [InlineData(true, true)] + [InlineData(false, true)] + [InlineData(true, false)] + [InlineData(false, false)] + public async Task BulkCopy_Utf8Data_ShouldMatchSource_Async(bool isMarsEnabled, bool enableStreaming) + { + // Setup connections for source and destination tables + string connectionString = GetConnectionString(isMarsEnabled); + using SqlConnection sourceConnection = new SqlConnection(connectionString); + await sourceConnection.OpenAsync(); + using SqlConnection destinationConnection = new SqlConnection(connectionString); + await destinationConnection.OpenAsync(); + + // Read data from source table + using SqlCommand sourceDataCommand = new SqlCommand($"SELECT str_col FROM {s_sourceTable}", sourceConnection); + using SqlDataReader reader = await sourceDataCommand.ExecuteReaderAsync(CommandBehavior.SequentialAccess); + + // Verify that the destination table is empty before bulk copy + using SqlCommand countCommand = new SqlCommand($"SELECT COUNT(*) FROM {s_destinationTable}", destinationConnection); + Assert.Equal(0, Convert.ToInt16(await countCommand.ExecuteScalarAsync())); + + // Initialize bulk copy configuration + using SqlBulkCopy bulkCopy = new SqlBulkCopy(destinationConnection) + { + EnableStreaming = enableStreaming, + DestinationTableName = s_destinationTable + }; + + try + { + // Perform bulk copy from source to destination table + await bulkCopy.WriteToServerAsync(reader); + } + catch (Exception ex) + { + // If bulk copy fails, fail the test with the exception message + Assert.Fail($"Bulk copy failed: {ex.Message}"); + } + + // Verify that the 1 row from the source table has been copied into our destination table. + Assert.Equal(1, Convert.ToInt16(await countCommand.ExecuteScalarAsync())); + + // Read the data from destination table as varbinary to verify the UTF-8 byte sequence + using SqlCommand verifyCommand = new SqlCommand($"SELECT cast(str_col as varbinary) FROM {s_destinationTable}", destinationConnection); + using SqlDataReader verifyReader = await verifyCommand.ExecuteReaderAsync(CommandBehavior.SequentialAccess); + + // Verify that we have data in the destination table + Assert.True(await verifyReader.ReadAsync(), "No data found in destination table after bulk copy."); + + // Read the value of the column as SqlBinary. + byte[] actualBytes = verifyReader.GetSqlBinary(0).Value; + + // Verify that the byte array matches the expected UTF-8 byte sequence + Assert.Equal(s_testValueInUtf8Bytes.Length, actualBytes.Length); + Assert.Equal(s_testValueInUtf8Bytes, actualBytes); + } + } +}