@@ -261,6 +261,7 @@ class ReplicatedLinear(LinearBase):
261
261
quant_config: Quantization configure.
262
262
prefix: The name of the layer in the state dict, including all parents
263
263
(e.g. model.layers.0.qkv_proj)
264
+ return_bias: If true, return bias together with outputs in forward pass.
264
265
"""
265
266
266
267
def __init__ (
@@ -523,6 +524,7 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
523
524
quant_config: Quantization configure.
524
525
prefix: The name of the layer in the state dict, including all parents
525
526
(e.g. model.layers.0.qkv_proj)
527
+ return_bias: If true, return bias together with outputs in forward pass.
526
528
"""
527
529
528
530
def __init__ (
@@ -805,6 +807,7 @@ class QKVParallelLinear(ColumnParallelLinear):
805
807
quant_config: Quantization configure.
806
808
prefix: The name of the layer in the state dict, including all parents
807
809
(e.g. model.layers.0.qkv_proj)
810
+ return_bias: If true, return bias together with outputs in forward pass.
808
811
"""
809
812
810
813
def __init__ (
@@ -1155,7 +1158,13 @@ class RowParallelLinear(LinearBase):
1155
1158
bias can be fused with other element-wise operations.
1156
1159
We skip adding bias but instead return it.
1157
1160
params_dtype: Data type for the parameters.
1161
+ reduce_results: If true, call all-reduce on output and make Y available
1162
+ to all GPUs, otherwise, every GPU will have its output
1163
+ which is Y = X_iA_i
1158
1164
quant_config: Quantization configure.
1165
+ prefix: The name of the layer in the state dict, including all parents
1166
+ (e.g. model.layers.0.down_proj)
1167
+ return_bias: If true, return bias together with outputs in forward pass.
1159
1168
"""
1160
1169
1161
1170
def __init__ (
0 commit comments