Skip to content

Commit 26d5d73

Browse files
authored
【Fearture】support qwen2 some func (#2740)
* add rl qwen model support * fix * fix
1 parent fefbd65 commit 26d5d73

File tree

13 files changed

+442
-178
lines changed

13 files changed

+442
-178
lines changed

fastdeploy/distributed/communication_op.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,15 @@
1717
import paddle
1818
import paddle.distributed as dist
1919

20-
21-
@paddle.jit.marker.unified
22-
def tensor_model_parallel_all_reduce(input_: paddle.Tensor) -> paddle.Tensor:
23-
"""All-reduce the input tensor across model parallel group."""
24-
if paddle.in_dynamic_mode():
25-
hcg = dist.fleet.get_hybrid_communicate_group()
26-
mp_group = hcg.get_model_parallel_group()
27-
dist.all_reduce(input_, group=mp_group)
28-
else:
29-
dist.all_reduce(input_)
20+
try:
21+
@paddle.jit.marker.unified
22+
def tensor_model_parallel_all_reduce(input_: paddle.Tensor) -> paddle.Tensor:
23+
"""All-reduce the input tensor across model parallel group."""
24+
if paddle.in_dynamic_mode():
25+
hcg = dist.fleet.get_hybrid_communicate_group()
26+
mp_group = hcg.get_model_parallel_group()
27+
dist.all_reduce(input_, group=mp_group)
28+
else:
29+
dist.all_reduce(input_)
30+
except:
31+
tensor_model_parallel_all_reduce=None

fastdeploy/model_executor/layers/attention/flash_attn_backend.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,11 @@
2121
from typing import List, Optional
2222

2323
import paddle
24-
from paddle.nn.functional.flash_attention import flash_attention_v3_varlen
24+
25+
try:
26+
from paddle.nn.functional.flash_attention import flash_attention_v3_varlen
27+
except:
28+
flash_attention_v3_varlen = None
2529

2630
from fastdeploy.config import FDConfig
2731
from fastdeploy.model_executor.layers.attention.attention import Attention

fastdeploy/model_executor/layers/linear.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ def init_weight(self):
294294
)
295295
if self.nranks > 0:
296296
# col parallel
297-
_set_var_distributed(self.linear_weight, split_axis=-1)
297+
_set_var_distributed(self.linear_weight, split_axis=1)
298298

299299
self.linear_bias = None
300300
if self.with_bias:
@@ -305,7 +305,7 @@ def init_weight(self):
305305
)
306306
if self.nranks > 0:
307307
# col parallel
308-
_set_var_distributed(self.linear_bias, split_axis=-1)
308+
_set_var_distributed(self.linear_bias, split_axis=1)
309309

310310
# smooth quant
311311
self.linear_shift = None

fastdeploy/model_executor/layers/moe/moe.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ def __init__(
8989
self.routed_scaling_factor = routed_scaling_factor
9090

9191
moe_quant_config = fd_config.quant_config
92+
self.moe_quant_type = None
9293
if moe_quant_config:
9394
self.quant_method = moe_quant_config.get_quant_method(self)
9495
self.moe_quant_type = moe_quant_config.name()
@@ -142,7 +143,7 @@ def init_moe_weights(self):
142143
if self.moe_quant_type == "fp8":
143144
#(TODO:gaoziyuan)
144145
pass
145-
else:
146+
elif self.moe_quant_type == "wint8":
146147
self.weight_dtype = "int8"
147148
self.init_weight_only_scale()
148149

fastdeploy/model_executor/model_loader.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,11 @@ def clean_memory_fragments(self, state_dict: dict) -> None:
9191
def load_model(self, fd_config: FDConfig) -> nn.Layer:
9292
context = paddle.LazyGuard()
9393
architectures = fd_config.model_config.architectures[0]
94-
# TODO(gongshaotian): Now, only support safetensor
95-
model_class = MODEL_CLASSES[architectures]
94+
95+
if fd_config.load_config.dynamic_load_weight:
96+
# register rl model
97+
import fastdeploy.rl
98+
architectures = architectures + "RL"
9699

97100
with context:
98101
model_cls = ModelRegistry.get_class(architectures)
@@ -104,6 +107,8 @@ def load_model(self, fd_config: FDConfig) -> nn.Layer:
104107
if fd_config.load_config.dynamic_load_weight:
105108
return model
106109

110+
# TODO(gongshaotian): Now, only support safetensor
111+
model_class = MODEL_CLASSES[architectures]
107112
state_dict = load_composite_checkpoint(
108113
fd_config.parallel_config.model_name_or_path,
109114
model_class,

fastdeploy/model_executor/models/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,7 @@ def _find_py_files(root_dir):
3636

3737

3838
def auto_models_registry(dir_path,
39-
register_path="fastdeploy.model_executor.models",
40-
suffix=""):
39+
register_path="fastdeploy.model_executor.models"):
4140
"""
4241
auto registry all models in this folder
4342
"""
@@ -49,7 +48,7 @@ def auto_models_registry(dir_path,
4948
if inspect.isclass(attr) and issubclass(
5049
attr,
5150
ModelForCasualLM) and attr is not ModelForCasualLM:
52-
ModelRegistry.register(attr, suffix=suffix)
51+
ModelRegistry.register(attr)
5352
except ImportError:
5453
raise ImportError(f"{module_file=} import error")
5554

fastdeploy/model_executor/models/model_base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@ class ModelRegistry:
2828
_registry = {}
2929

3030
@classmethod
31-
def register(cls, model_class, suffix=""):
31+
def register(cls, model_class):
3232
"""register model class"""
3333
if issubclass(
3434
model_class,
3535
ModelForCasualLM) and model_class is not ModelForCasualLM:
36-
cls._registry[f"{model_class.name()}{suffix}"] = model_class
36+
cls._registry[model_class.name()] = model_class
3737
return model_class
3838

3939
@classmethod

fastdeploy/model_executor/models/qwen2.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,7 @@ def __init__(self, fd_config: FDConfig):
302302
"""
303303
super(Qwen2ForCausalLM, self).__init__(fd_config)
304304

305+
self.fd_config =fd_config
305306
self.model = Qwen2Model(fd_config=fd_config)
306307

307308
self.ori_vocab_size = fd_config.model_config.ori_vocab_size

fastdeploy/model_executor/ops/gpu/__init__.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,19 @@
1313
# limitations under the License.
1414
"""fastdeploy gpu ops"""
1515

16-
import os
16+
import sys
17+
1718
from fastdeploy.import_ops import import_custom_ops
1819

1920
PACKAGE = "fastdeploy.model_executor.ops.gpu"
2021

2122
import_custom_ops(PACKAGE, "..base.fastdeploy_base_ops", globals())
2223
import_custom_ops(PACKAGE, ".fastdeploy_ops", globals())
24+
25+
26+
def tolerant_import_error():
27+
class NoneModule:
28+
def __getattr__(self, name):
29+
return None
30+
31+
sys.modules[__name__] = NoneModule()

fastdeploy/rl/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,4 @@
1717

1818
from fastdeploy.model_executor.models import auto_models_registry
1919

20-
auto_models_registry(os.path.dirname(__file__), "fastdeploy.rl", suffix="RL")
20+
auto_models_registry(os.path.dirname(__file__), "fastdeploy.rl")

0 commit comments

Comments
 (0)