Skip to content

Commit 812a54e

Browse files
summerdengfbfacebook-github-bot
authored andcommitted
NVFP4 quantization emulation kernels as reference (#4324)
Summary: Pull Request resolved: #4324 X-link: facebookresearch/FBGEMM#1397 Add the NVFP4 quantization emulation kernels as reference. These kernels were provided by Nvidia and used for the FP4 QAT study on LLaMa3 8B. We can use these kernels for numerical studies on H100. Reviewed By: jiawenliu64 Differential Revision: D76363519 fbshipit-source-id: e3341d1ef38591eae061d1a457a1c1e82b336ebb
1 parent 998e7a7 commit 812a54e

File tree

4 files changed

+485
-1
lines changed

4 files changed

+485
-1
lines changed

fbgemm_gpu/experimental/gen_ai/src/quantize/quantize.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,12 @@ void scaled_fp4_quant(
262262
at::Tensor const& output_sf,
263263
at::Tensor const& input_sf);
264264

265+
std::vector<at::Tensor> fake_quantize_nvfp4_per_tensor(
266+
at::Tensor input,
267+
std::optional<at::Tensor> static_scales,
268+
std::optional<at::Tensor> bs, // batch size
269+
std::optional<at::Tensor> scale_ub); // scale upperbound
270+
265271
TORCH_LIBRARY_IMPL(fbgemm, CUDA, m) {
266272
m.impl("f8f8bf16_blockwise", f8f8bf16_blockwise);
267273
m.impl("f8f8bf16_tensorwise", f8f8bf16_tensorwise);
@@ -308,6 +314,7 @@ TORCH_LIBRARY_IMPL(fbgemm, CUDA, m) {
308314
m.impl("bf16i4bf16_rowwise", bf16i4bf16_rowwise);
309315
m.impl("scaled_fp4_quant", scaled_fp4_quant);
310316
m.impl("i8i8bf16_dynamic", i8i8bf16_dynamic);
317+
m.impl("fake_quantize_nvfp4_per_tensor", fake_quantize_nvfp4_per_tensor);
311318
#endif
312319

313320
#ifdef USE_ROCM
@@ -357,6 +364,7 @@ TORCH_LIBRARY_IMPL(fbgemm, CPU, m) {
357364
m.impl("bf16i4bf16_rowwise_batched", bf16i4bf16_rowwise_batched);
358365
m.impl("bf16i4bf16_rowwise", bf16i4bf16_rowwise);
359366
m.impl("scaled_fp4_quant", scaled_fp4_quant);
367+
m.impl("fake_quantize_nvfp4_per_tensor", fake_quantize_nvfp4_per_tensor);
360368
#endif
361369
}
362370

0 commit comments

Comments
 (0)