File tree Expand file tree Collapse file tree 2 files changed +4
-3
lines changed
vllm/model_executor/layers/fused_moe Expand file tree Collapse file tree 2 files changed +4
-3
lines changed Original file line number Diff line number Diff line change @@ -12,7 +12,7 @@ ARG PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
12
12
ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git"
13
13
ARG FA_BRANCH="1a7f4dfa"
14
14
ARG FA_REPO="https://github.com/Dao-AILab/flash-attention.git"
15
- ARG AITER_BRANCH="c1debd8 "
15
+ ARG AITER_BRANCH="6487649 "
16
16
ARG AITER_REPO="https://github.com/ROCm/aiter.git"
17
17
18
18
FROM ${BASE_IMAGE} AS base
Original file line number Diff line number Diff line change @@ -22,8 +22,9 @@ class QuantMethod(IntEnum):
22
22
NO = 0 # a16w16
23
23
PER_TENSOR = 1 # w8a8 (pre_Tensor)
24
24
PER_TOKEN = 2 # w8a8/w8a4 (per_Token)
25
- BLOCK_1X128 = 3 # block quantized w8a8 (per_1x128)
26
- BLOCK_128x128 = 4 # block quantized w8a8 (per_128x128)
25
+ BLOCK_1X32 = 3 # fp4x2
26
+ BLOCK_1X128 = 4 # block quantized w8a8 (per_1x128)
27
+ BLOCK_128x128 = 5 # block quantized w8a8 (per_128x128)
27
28
28
29
29
30
class ActivationMethod (IntEnum ):
You can’t perform that action at this time.
0 commit comments