Skip to content

Commit bf01538

Browse files
Merge pull request #1128 from akx/type-fixes
Minor type/doc fixes
2 parents b03ce0e + 0c6dda0 commit bf01538

File tree

12 files changed

+82
-82
lines changed

12 files changed

+82
-82
lines changed

bitsandbytes/functional.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1618,18 +1618,18 @@ def optimizer_update_8bit(
16181618
g: Tensor,
16191619
p: Tensor,
16201620
state1: Tensor,
1621-
state2: Tensor,
1621+
state2: Optional[torch.Tensor],
16221622
beta1: float,
16231623
beta2: float,
16241624
eps: float,
16251625
step: int,
16261626
lr: float,
16271627
qmap1: Tensor,
1628-
qmap2: Tensor,
1628+
qmap2: Optional[torch.Tensor],
16291629
max1: Tensor,
1630-
max2: Tensor,
1630+
max2: Optional[torch.Tensor],
16311631
new_max1: Tensor,
1632-
new_max2: Tensor,
1632+
new_max2: Optional[torch.Tensor],
16331633
weight_decay: float = 0.0,
16341634
gnorm_scale: float = 1.0,
16351635
unorm_vec: Optional[torch.Tensor] = None,
@@ -1751,16 +1751,16 @@ def optimizer_update_8bit_blockwise(
17511751
g: Tensor,
17521752
p: Tensor,
17531753
state1: Tensor,
1754-
state2: Tensor,
1754+
state2: Optional[torch.Tensor],
17551755
beta1: float,
17561756
beta2: float,
17571757
eps: float,
17581758
step: int,
17591759
lr: float,
17601760
qmap1: Tensor,
1761-
qmap2: Tensor,
1761+
qmap2: Optional[torch.Tensor],
17621762
absmax1: Tensor,
1763-
absmax2: Tensor,
1763+
absmax2: Optional[torch.Tensor],
17641764
weight_decay: float = 0.0,
17651765
gnorm_scale: float = 1.0,
17661766
skip_zeros=False,

bitsandbytes/nn/modules.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -658,8 +658,8 @@ class Linear8bitLt(nn.Linear):
658658

659659
def __init__(
660660
self,
661-
input_features,
662-
output_features,
661+
input_features: int,
662+
output_features: int,
663663
bias=True,
664664
has_fp16_weights=True,
665665
memory_efficient_backward=False,
@@ -671,9 +671,9 @@ def __init__(
671671
Initialize Linear8bitLt class.
672672
673673
Args:
674-
input_features (`str`):
674+
input_features (`int`):
675675
Number of input features of the linear layer.
676-
output_features (`str`):
676+
output_features (`int`):
677677
Number of output features of the linear layer.
678678
bias (`bool`, defaults to `True`):
679679
Whether the linear class uses the bias term as well.

bitsandbytes/optim/adagrad.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ def __init__(
3838
The epsilon value prevents division by zero in the optimizer.
3939
optim_bits (`int`, defaults to 32):
4040
The number of bits of the optimizer state.
41-
args (`dict`, defaults to `None`):
42-
A dictionary with additional arguments.
41+
args (`object`, defaults to `None`):
42+
An object with additional arguments.
4343
min_8bit_size (`int`, defaults to 4096):
4444
The minimum number of elements of the parameter tensors for 8-bit optimization.
4545
percentile_clipping (`int`, defaults to 100):
@@ -105,8 +105,8 @@ def __init__(
105105
The epsilon value prevents division by zero in the optimizer.
106106
optim_bits (`int`, defaults to 8):
107107
The number of bits of the optimizer state.
108-
args (`dict`, defaults to `None`):
109-
A dictionary with additional arguments.
108+
args (`object`, defaults to `None`):
109+
An object with additional arguments.
110110
min_8bit_size (`int`, defaults to 4096):
111111
The minimum number of elements of the parameter tensors for 8-bit optimization.
112112
percentile_clipping (`int`, defaults to 100):
@@ -173,8 +173,8 @@ def __init__(
173173
The epsilon value prevents division by zero in the optimizer.
174174
optim_bits (`int`, defaults to 32):
175175
The number of bits of the optimizer state.
176-
args (`dict`, defaults to `None`):
177-
A dictionary with additional arguments.
176+
args (`object`, defaults to `None`):
177+
An object with additional arguments.
178178
min_8bit_size (`int`, defaults to 4096):
179179
The minimum number of elements of the parameter tensors for 8-bit optimization.
180180
percentile_clipping (`int`, defaults to 100):

bitsandbytes/optim/adam.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ def __init__(
4747
Whether to use the [AMSGrad](https://hf.co/papers/1904.09237) variant of Adam that uses the maximum of past squared gradients instead.
4848
optim_bits (`int`, defaults to 32):
4949
The number of bits of the optimizer state.
50-
args (`dict`, defaults to `None`):
51-
A dictionary with additional arguments.
50+
args (`object`, defaults to `None`):
51+
An object with additional arguments.
5252
min_8bit_size (`int`, defaults to 4096):
5353
The minimum number of elements of the parameter tensors for 8-bit optimization.
5454
percentile_clipping (`int`, defaults to 100):
@@ -108,8 +108,8 @@ def __init__(
108108
Whether to use the [AMSGrad](https://hf.co/papers/1904.09237) variant of Adam that uses the maximum of past squared gradients instead.
109109
optim_bits (`int`, defaults to 32):
110110
The number of bits of the optimizer state.
111-
args (`dict`, defaults to `None`):
112-
A dictionary with additional arguments.
111+
args (`object`, defaults to `None`):
112+
An object with additional arguments.
113113
min_8bit_size (`int`, defaults to 4096):
114114
The minimum number of elements of the parameter tensors for 8-bit optimization.
115115
percentile_clipping (`int`, defaults to 100):
@@ -169,8 +169,8 @@ def __init__(
169169
Whether to use the [AMSGrad](https://hf.co/papers/1904.09237) variant of Adam that uses the maximum of past squared gradients instead.
170170
optim_bits (`int`, defaults to 32):
171171
The number of bits of the optimizer state.
172-
args (`dict`, defaults to `None`):
173-
A dictionary with additional arguments.
172+
args (`object`, defaults to `None`):
173+
An object with additional arguments.
174174
min_8bit_size (`int`, defaults to 4096):
175175
The minimum number of elements of the parameter tensors for 8-bit optimization.
176176
percentile_clipping (`int`, defaults to 100):
@@ -230,8 +230,8 @@ def __init__(
230230
Whether to use the [AMSGrad](https://hf.co/papers/1904.09237) variant of Adam that uses the maximum of past squared gradients instead.
231231
optim_bits (`int`, defaults to 32):
232232
The number of bits of the optimizer state.
233-
args (`dict`, defaults to `None`):
234-
A dictionary with additional arguments.
233+
args (`object`, defaults to `None`):
234+
An object with additional arguments.
235235
min_8bit_size (`int`, defaults to 4096):
236236
The minimum number of elements of the parameter tensors for 8-bit optimization.
237237
percentile_clipping (`int`, defaults to 100):
@@ -291,8 +291,8 @@ def __init__(
291291
Whether to use the [AMSGrad](https://hf.co/papers/1904.09237) variant of Adam that uses the maximum of past squared gradients instead.
292292
optim_bits (`int`, defaults to 32):
293293
The number of bits of the optimizer state.
294-
args (`dict`, defaults to `None`):
295-
A dictionary with additional arguments.
294+
args (`object`, defaults to `None`):
295+
An object with additional arguments.
296296
min_8bit_size (`int`, defaults to 4096):
297297
The minimum number of elements of the parameter tensors for 8-bit optimization.
298298
percentile_clipping (`int`, defaults to 100):
@@ -352,8 +352,8 @@ def __init__(
352352
Whether to use the [AMSGrad](https://hf.co/papers/1904.09237) variant of Adam that uses the maximum of past squared gradients instead.
353353
optim_bits (`int`, defaults to 32):
354354
The number of bits of the optimizer state.
355-
args (`dict`, defaults to `None`):
356-
A dictionary with additional arguments.
355+
args (`object`, defaults to `None`):
356+
An object with additional arguments.
357357
min_8bit_size (`int`, defaults to 4096):
358358
The minimum number of elements of the parameter tensors for 8-bit optimization.
359359
percentile_clipping (`int`, defaults to 100):

bitsandbytes/optim/adamw.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ def __init__(
3939
Whether to use the [AMSGrad](https://hf.co/papers/1904.09237) variant of Adam that uses the maximum of past squared gradients instead.
4040
optim_bits (`int`, defaults to 32):
4141
The number of bits of the optimizer state.
42-
args (`dict`, defaults to `None`):
43-
A dictionary with additional arguments.
42+
args (`object`, defaults to `None`):
43+
An object with additional arguments.
4444
min_8bit_size (`int`, defaults to 4096):
4545
The minimum number of elements of the parameter tensors for 8-bit optimization.
4646
percentile_clipping (`int`, defaults to 100):
@@ -100,8 +100,8 @@ def __init__(
100100
Whether to use the [AMSGrad](https://hf.co/papers/1904.09237) variant of Adam that uses the maximum of past squared gradients instead.
101101
optim_bits (`int`, defaults to 32):
102102
The number of bits of the optimizer state.
103-
args (`dict`, defaults to `None`):
104-
A dictionary with additional arguments.
103+
args (`object`, defaults to `None`):
104+
An object with additional arguments.
105105
min_8bit_size (`int`, defaults to 4096):
106106
The minimum number of elements of the parameter tensors for 8-bit optimization.
107107
percentile_clipping (`int`, defaults to 100):
@@ -161,8 +161,8 @@ def __init__(
161161
Whether to use the [AMSGrad](https://hf.co/papers/1904.09237) variant of Adam that uses the maximum of past squared gradients instead.
162162
optim_bits (`int`, defaults to 32):
163163
The number of bits of the optimizer state.
164-
args (`dict`, defaults to `None`):
165-
A dictionary with additional arguments.
164+
args (`object`, defaults to `None`):
165+
An object with additional arguments.
166166
min_8bit_size (`int`, defaults to 4096):
167167
The minimum number of elements of the parameter tensors for 8-bit optimization.
168168
percentile_clipping (`int`, defaults to 100):
@@ -221,8 +221,8 @@ def __init__(
221221
Whether to use the [AMSGrad](https://hf.co/papers/1904.09237) variant of Adam that uses the maximum of past squared gradients instead.
222222
optim_bits (`int`, defaults to 32):
223223
The number of bits of the optimizer state.
224-
args (`dict`, defaults to `None`):
225-
A dictionary with additional arguments.
224+
args (`object`, defaults to `None`):
225+
An object with additional arguments.
226226
min_8bit_size (`int`, defaults to 4096):
227227
The minimum number of elements of the parameter tensors for 8-bit optimization.
228228
percentile_clipping (`int`, defaults to 100):
@@ -281,8 +281,8 @@ def __init__(
281281
Whether to use the [AMSGrad](https://hf.co/papers/1904.09237) variant of Adam that uses the maximum of past squared gradients instead.
282282
optim_bits (`int`, defaults to 32):
283283
The number of bits of the optimizer state.
284-
args (`dict`, defaults to `None`):
285-
A dictionary with additional arguments.
284+
args (`object`, defaults to `None`):
285+
An object with additional arguments.
286286
min_8bit_size (`int`, defaults to 4096):
287287
The minimum number of elements of the parameter tensors for 8-bit optimization.
288288
percentile_clipping (`int`, defaults to 100):
@@ -341,8 +341,8 @@ def __init__(
341341
Whether to use the [AMSGrad](https://hf.co/papers/1904.09237) variant of Adam that uses the maximum of past squared gradients instead.
342342
optim_bits (`int`, defaults to 32):
343343
The number of bits of the optimizer state.
344-
args (`dict`, defaults to `None`):
345-
A dictionary with additional arguments.
344+
args (`object`, defaults to `None`):
345+
An object with additional arguments.
346346
min_8bit_size (`int`, defaults to 4096):
347347
The minimum number of elements of the parameter tensors for 8-bit optimization.
348348
percentile_clipping (`int`, defaults to 100):

bitsandbytes/optim/lamb.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ def __init__(
4545
Whether to use the AdamW variant.
4646
optim_bits (`int`, defaults to 32):
4747
The number of bits of the optimizer state.
48-
args (`dict`, defaults to `None`):
49-
A dictionary with additional arguments.
48+
args (`object`, defaults to `None`):
49+
An object with additional arguments.
5050
min_8bit_size (`int`, defaults to 4096):
5151
The minimum number of elements of the parameter tensors for 8-bit optimization.
5252
percentile_clipping (`int`, defaults to 100):
@@ -109,8 +109,8 @@ def __init__(
109109
Whether to use the [AMSGrad](https://hf.co/papers/1904.09237) variant of Adam that uses the maximum of past squared gradients instead.
110110
adam_w_mode (`bool`, defaults to `True`):
111111
Whether to use the AdamW variant.
112-
args (`dict`, defaults to `None`):
113-
A dictionary with additional arguments.
112+
args (`object`, defaults to `None`):
113+
An object with additional arguments.
114114
min_8bit_size (`int`, defaults to 4096):
115115
The minimum number of elements of the parameter tensors for 8-bit optimization.
116116
percentile_clipping (`int`, defaults to 100):
@@ -173,8 +173,8 @@ def __init__(
173173
Whether to use the [AMSGrad](https://hf.co/papers/1904.09237) variant of Adam that uses the maximum of past squared gradients instead.
174174
adam_w_mode (`bool`, defaults to `True`):
175175
Whether to use the AdamW variant.
176-
args (`dict`, defaults to `None`):
177-
A dictionary with additional arguments.
176+
args (`object`, defaults to `None`):
177+
An object with additional arguments.
178178
min_8bit_size (`int`, defaults to 4096):
179179
The minimum number of elements of the parameter tensors for 8-bit optimization.
180180
percentile_clipping (`int`, defaults to 100):

bitsandbytes/optim/lars.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ def __init__(
4141
Whether to use Nesterov momentum.
4242
optim_bits (`int`, defaults to 32):
4343
The number of bits of the optimizer state.
44-
args (`dict`, defaults to `None`):
45-
A dictionary with additional arguments.
44+
args (`object`, defaults to `None`):
45+
An object with additional arguments.
4646
min_8bit_size (`int`, defaults to 4096):
4747
The minimum number of elements of the parameter tensors for 8-bit optimization.
4848
percentile_clipping (`int`, defaults to 100):
@@ -98,8 +98,8 @@ def __init__(
9898
The weight decay value for the optimizer.
9999
nesterov (`bool`, defaults to `False`):
100100
Whether to use Nesterov momentum.
101-
args (`dict`, defaults to `None`):
102-
A dictionary with additional arguments.
101+
args (`object`, defaults to `None`):
102+
An object with additional arguments.
103103
min_8bit_size (`int`, defaults to 4096):
104104
The minimum number of elements of the parameter tensors for 8-bit optimization.
105105
percentile_clipping (`int`, defaults to 100):
@@ -155,8 +155,8 @@ def __init__(
155155
The weight decay value for the optimizer.
156156
nesterov (`bool`, defaults to `False`):
157157
Whether to use Nesterov momentum.
158-
args (`dict`, defaults to `None`):
159-
A dictionary with additional arguments.
158+
args (`object`, defaults to `None`):
159+
An object with additional arguments.
160160
min_8bit_size (`int`, defaults to 4096):
161161
The minimum number of elements of the parameter tensors for 8-bit optimization.
162162
percentile_clipping (`int`, defaults to 100):

bitsandbytes/optim/lion.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ def __init__(
3333
The weight decay value for the optimizer.
3434
optim_bits (`int`, defaults to 32):
3535
The number of bits of the optimizer state.
36-
args (`dict`, defaults to `None`):
37-
A dictionary with additional arguments.
36+
args (`object`, defaults to `None`):
37+
An object with additional arguments.
3838
min_8bit_size (`int`, defaults to 4096):
3939
The minimum number of elements of the parameter tensors for 8-bit optimization.
4040
percentile_clipping (`int`, defaults to 100):
@@ -85,8 +85,8 @@ def __init__(
8585
The beta values are the decay rates of the first and second-order moment of the optimizer.
8686
weight_decay (`float`, defaults to 0):
8787
The weight decay value for the optimizer.
88-
args (`dict`, defaults to `None`):
89-
A dictionary with additional arguments.
88+
args (`object`, defaults to `None`):
89+
An object with additional arguments.
9090
min_8bit_size (`int`, defaults to 4096):
9191
The minimum number of elements of the parameter tensors for 8-bit optimization.
9292
percentile_clipping (`int`, defaults to 100):
@@ -137,8 +137,8 @@ def __init__(
137137
The beta values are the decay rates of the first and second-order moment of the optimizer.
138138
weight_decay (`float`, defaults to 0):
139139
The weight decay value for the optimizer.
140-
args (`dict`, defaults to `None`):
141-
A dictionary with additional arguments.
140+
args (`object`, defaults to `None`):
141+
An object with additional arguments.
142142
min_8bit_size (`int`, defaults to 4096):
143143
The minimum number of elements of the parameter tensors for 8-bit optimization.
144144
percentile_clipping (`int`, defaults to 100):
@@ -191,8 +191,8 @@ def __init__(
191191
The weight decay value for the optimizer.
192192
optim_bits (`int`, defaults to 32):
193193
The number of bits of the optimizer state.
194-
args (`dict`, defaults to `None`):
195-
A dictionary with additional arguments.
194+
args (`object`, defaults to `None`):
195+
An object with additional arguments.
196196
min_8bit_size (`int`, defaults to 4096):
197197
The minimum number of elements of the parameter tensors for 8-bit optimization.
198198
percentile_clipping (`int`, defaults to 100):
@@ -242,8 +242,8 @@ def __init__(
242242
The weight decay value for the optimizer.
243243
optim_bits (`int`, defaults to 32):
244244
The number of bits of the optimizer state.
245-
args (`dict`, defaults to `None`):
246-
A dictionary with additional arguments.
245+
args (`object`, defaults to `None`):
246+
An object with additional arguments.
247247
min_8bit_size (`int`, defaults to 4096):
248248
The minimum number of elements of the parameter tensors for 8-bit optimization.
249249
percentile_clipping (`int`, defaults to 100):
@@ -293,8 +293,8 @@ def __init__(
293293
The weight decay value for the optimizer.
294294
optim_bits (`int`, defaults to 32):
295295
The number of bits of the optimizer state.
296-
args (`dict`, defaults to `None`):
297-
A dictionary with additional arguments.
296+
args (`object`, defaults to `None`):
297+
An object with additional arguments.
298298
min_8bit_size (`int`, defaults to 4096):
299299
The minimum number of elements of the parameter tensors for 8-bit optimization.
300300
percentile_clipping (`int`, defaults to 100):

0 commit comments

Comments
 (0)