diff --git a/backends/vulkan/test/op_tests/choose_qparams_test.cpp b/backends/vulkan/test/op_tests/choose_qparams_test.cpp index ec839cdf6b..2f75473c23 100644 --- a/backends/vulkan/test/op_tests/choose_qparams_test.cpp +++ b/backends/vulkan/test/op_tests/choose_qparams_test.cpp @@ -114,3 +114,277 @@ std::tuple choose_qparams_per_token_asymmetric_aten( } // namespace native } // namespace executor } // namespace torch + +// +// Reference Implementation +// + +/* + * Reference implementation of choose_qparams_tensor + */ +std::tuple choose_qparams_tensor_reference_impl( + const at::Tensor& input, + int64_t quant_min, + int64_t quant_max) { + // Create output tensors + at::Tensor scale_out = at::empty({}, at::device(at::kCPU).dtype(at::kDouble)); + at::Tensor zero_point_out = + at::empty({}, at::device(at::kCPU).dtype(at::kLong)); + + // Find min and max values in the input tensor + float min_val = input.min().item(); + float max_val = input.max().item(); + + // Extend the [min, max] interval to ensure it contains 0 + min_val = std::min(min_val, 0.f); + max_val = std::max(max_val, 0.f); + + // Calculate scale + double scale = + (static_cast(max_val) - min_val) / (quant_max - quant_min); + + // Handle small scale + constexpr float SMALL_SCALE_THRESHOLD = 6.1e-5f; + if (float(scale) == 0.0f || std::isinf(1.0f / float(scale))) { + scale = 0.1; + } + + if (scale < SMALL_SCALE_THRESHOLD) { + float org_scale = scale; + scale = SMALL_SCALE_THRESHOLD; + // Adjust min and max based on new scale + if (min_val == 0.0f) { + max_val = SMALL_SCALE_THRESHOLD * (quant_max - quant_min); + } else if (max_val == 0.0f) { + min_val = -SMALL_SCALE_THRESHOLD * (quant_max - quant_min); + } else { + float amplifier = SMALL_SCALE_THRESHOLD / org_scale; + min_val *= amplifier; + max_val *= amplifier; + } + } + + // Calculate zero point + double zero_point_from_min = quant_min - min_val / static_cast(scale); + double zero_point_from_max = quant_max - max_val / static_cast(scale); + double zero_point_from_min_error = + std::abs(quant_min) - std::abs(min_val / static_cast(scale)); + double zero_point_from_max_error = + std::abs(quant_max) - std::abs(max_val / static_cast(scale)); + double initial_zero_point = + zero_point_from_min_error < zero_point_from_max_error + ? zero_point_from_min + : zero_point_from_max; + + // Nudge zero point to be an integer + int64_t nudged_zero_point = 0; + if (initial_zero_point < quant_min) { + nudged_zero_point = quant_min; + } else if (initial_zero_point > quant_max) { + nudged_zero_point = quant_max; + } else { + nudged_zero_point = std::nearbyint(static_cast(initial_zero_point)); + } + + // Set output values - use item_mutable() for scalar tensors + scale_out.fill_(scale); + zero_point_out.fill_(nudged_zero_point); + + return std::make_tuple(scale_out, zero_point_out); +} + +// Forward declaration of implementation functions +void test_vulkan_choose_qparams_tensor_impl( + const std::vector& input_sizes, + int64_t quant_min, + int64_t quant_max, + at::ScalarType dtype, + const vkcompute::utils::StorageType in_storage, + const vkcompute::utils::StorageType out_storage); + +// Wrapper function to test both buffer and texture storage types +void test_vulkan_choose_qparams_tensor( + const std::vector& input_sizes, + int64_t quant_min, + int64_t quant_max, + at::ScalarType dtype) { + // Test with buffer storage + test_vulkan_choose_qparams_tensor_impl( + input_sizes, + quant_min, + quant_max, + dtype, + vkcompute::utils::kBuffer, + vkcompute::utils::kBuffer); + + // Test with texture storage + test_vulkan_choose_qparams_tensor_impl( + input_sizes, + quant_min, + quant_max, + dtype, + vkcompute::utils::kTexture3D, + vkcompute::utils::kTexture3D); +} + +void test_reference_choose_qparams_tensor( + const std::vector& input_sizes, + int64_t quant_min, + int64_t quant_max, + at::ScalarType dtype) { + std::vector input_sizes_int64( + input_sizes.begin(), input_sizes.end()); + at::Tensor input = + at::rand(input_sizes_int64, at::device(at::kCPU).dtype(at::kFloat)); + + // Get reference output + auto [reference_scale, reference_zero_point] = + choose_qparams_tensor_reference_impl(input, quant_min, quant_max); + + // Get implementation output + auto [impl_scale, impl_zero_point] = + torch::executor::native::choose_qparams_tensor_aten( + input, quant_min, quant_max, dtype); + + // Compare outputs + const bool scale_correct = at::allclose(reference_scale, impl_scale); + const bool zero_point_correct = + at::equal(reference_zero_point, impl_zero_point); + + if (!scale_correct || !zero_point_correct) { + std::cout << "\n" + << "Failed with parameters: " << std::endl; + std::cout << " quant_min: " << quant_min << std::endl; + std::cout << " quant_max: " << quant_max << std::endl; + + std::cout << "input:" << std::endl; + std::cout << input << std::endl; + std::cout << "reference scale:" << std::endl; + std::cout << reference_scale << std::endl; + std::cout << "implementation scale:" << std::endl; + std::cout << impl_scale << std::endl; + std::cout << "reference zero_point:" << std::endl; + std::cout << reference_zero_point << std::endl; + std::cout << "implementation zero_point:" << std::endl; + std::cout << impl_zero_point << std::endl; + } + + ASSERT_TRUE(scale_correct && zero_point_correct); +} + +void test_vulkan_choose_qparams_tensor_impl( + const std::vector& input_sizes, + int64_t quant_min, + int64_t quant_max, + at::ScalarType dtype, + const vkcompute::utils::StorageType in_storage, + const vkcompute::utils::StorageType out_storage) { + std::vector input_sizes_int64( + input_sizes.begin(), input_sizes.end()); + at::Tensor input = + at::rand(input_sizes_int64, at::device(at::kCPU).dtype(at::kFloat)); + + // Get reference output + auto [reference_scale, reference_zero_point] = + torch::executor::native::choose_qparams_tensor_aten( + input, quant_min, quant_max, dtype); + + // Build Vulkan choose_qparams_tensor graph + using namespace vkcompute; + + GraphConfig config; + config.set_storage_type_override(in_storage); + ComputeGraph graph(config); + + IOValueRef r_input = graph.add_input_tensor( + input.sizes().vec(), from_at_scalartype(input.scalar_type()), in_storage); + + const ValueRef r_quant_min = graph.add_scalar(quant_min); + const ValueRef r_quant_max = graph.add_scalar(quant_max); + + // Output tensors + const ValueRef r_scale = graph.add_tensor({}, vkapi::kFloat, out_storage); + const ValueRef r_zero_point = graph.add_tensor({}, vkapi::kInt, out_storage); + + VK_GET_OP_FN("choose_qparams.tensor") + (graph, + { + r_input.value, + r_quant_min, + r_quant_max, + r_scale, + r_zero_point, + }); + + ValueRef staging_scale = graph.set_output_tensor(r_scale); + ValueRef staging_zero_point = graph.set_output_tensor(r_zero_point); + + graph.prepare(); + graph.encode_prepack(); + graph.prepack(); + graph.encode_execute(); + + // Run Vulkan choose_qparams_tensor + graph.copy_into_staging( + r_input.staging, input.const_data_ptr(), input.numel()); + + graph.execute(); + + // Create output tensors to hold the results - use types that match GPU output + at::Tensor vk_scale = + at::empty({}, at::device(at::kCPU).dtype(at::kFloat)).contiguous(); + at::Tensor vk_zero_point = + at::empty({}, at::device(at::kCPU).dtype(at::kInt)).contiguous(); + + // Copy results from GPU to CPU + graph.copy_from_staging( + staging_scale, vk_scale.mutable_data_ptr(), vk_scale.numel()); + graph.copy_from_staging( + staging_zero_point, + vk_zero_point.mutable_data_ptr(), + vk_zero_point.numel()); + + // Convert reference values to match Vulkan output types for comparison + at::Tensor reference_scale_float = reference_scale.to(at::kFloat); + at::Tensor reference_zero_point_int = reference_zero_point.to(at::kInt); + + // Compare outputs + const bool scale_correct = at::allclose(reference_scale_float, vk_scale); + const bool zero_point_correct = + at::equal(reference_zero_point_int, vk_zero_point); + + if (!scale_correct || !zero_point_correct) { + std::cout << "\n" + << "Failed with parameters: " << std::endl; + std::cout << " quant_min: " << quant_min << std::endl; + std::cout << " quant_max: " << quant_max << std::endl; + std::cout << " storage type: " + << (in_storage == vkcompute::utils::kBuffer ? "buffer" + : "texture") + << std::endl; + + // make sure that there arent a ton of elements in the input tensor + if (input.numel() < 100) { + std::cout << "input:" << std::endl; + std::cout << input << "\n" << std::endl; + std::cout << "reference scale:" << std::endl; + std::cout << reference_scale << std::endl; + std::cout << "vulkan scale:" << std::endl; + std::cout << vk_scale << "\n" << std::endl; + std::cout << "reference zero_point:" << std::endl; + std::cout << reference_zero_point << std::endl; + std::cout << "vulkan zero_point:" << std::endl; + std::cout << vk_zero_point << std::endl; + } + } + + ASSERT_TRUE(scale_correct && zero_point_correct); +} + +TEST(VulkanChooseQparamsTest, test_reference_choose_qparams_tensor_int8) { + test_reference_choose_qparams_tensor( + {2, 3, 4}, // input sizes + -128, // quant_min + 127, // quant_max + at::kChar); +}