Skip to content

Commit a128138

Browse files
authored
value_and_grad support attacach grads, Parameter support accumulate a… (#1833)
1 parent ac841c9 commit a128138

File tree

24 files changed

+375
-288
lines changed

24 files changed

+375
-288
lines changed

applications/LayoutLMv2/Fine_tuning_LayoutLMv2ForTokenClassification_on_CORD.ipynb

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2499,11 +2499,11 @@
24992499
},
25002500
{
25012501
"cell_type": "code",
2502-
"execution_count": 18,
2502+
"execution_count": null,
25032503
"metadata": {},
25042504
"outputs": [],
25052505
"source": [
2506-
"from mindspore import value_and_grad\n",
2506+
"from mindnlp.core.autograd import value_and_grad\n",
25072507
"\n",
25082508
"def forward_fn(batch):\n",
25092509
" # get the inputs;\n",
@@ -2524,12 +2524,12 @@
25242524
" \n",
25252525
" return loss\n",
25262526
"\n",
2527-
"grad_fn = value_and_grad(forward_fn, None, model.trainable_params())"
2527+
"grad_fn = value_and_grad(forward_fn, model.trainable_params(), attach_grads=True)"
25282528
]
25292529
},
25302530
{
25312531
"cell_type": "code",
2532-
"execution_count": 26,
2532+
"execution_count": null,
25332533
"metadata": {
25342534
"tags": []
25352535
},
@@ -2824,9 +2824,11 @@
28242824
"for epoch in range(num_train_epochs): \n",
28252825
" print(\"Epoch:\", epoch)\n",
28262826
" for batch in tqdm(train_dataloader.create_dict_iterator()):\n",
2827+
" optimizer.zero_grad()\n",
28272828
" # forward, backward + optimize\n",
2828-
" loss, grads = grad_fn(batch)\n",
2829-
" optimizer.step(grads)\n",
2829+
" loss = grad_fn(batch)\n",
2830+
" optimizer.step()\n",
2831+
"\n",
28302832
"\n",
28312833
" # print loss every 100 steps\n",
28322834
" if global_step % 100 == 0:\n",

examples/classification/bert_emotect_finetune.ipynb

Lines changed: 165 additions & 83 deletions
Large diffs are not rendered by default.

llm/peft/adalora/train_adalora_seq2seq/peft_adalora_seq2seq.ipynb

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@
245245
"metadata": {},
246246
"outputs": [],
247247
"source": [
248-
"from mindnlp.transformers.optimization import get_linear_schedule_with_warmup\n",
248+
"from mindnlp.common.optimization import get_linear_schedule_with_warmup\n",
249249
"from mindnlp.core import optim\n",
250250
"# Setting up optimizer and learning rate scheduler\n",
251251
"optimizer = optim.AdamW(model.trainable_params(), lr=1e-3)\n",
@@ -263,7 +263,7 @@
263263
},
264264
{
265265
"cell_type": "code",
266-
"execution_count": 12,
266+
"execution_count": null,
267267
"metadata": {},
268268
"outputs": [
269269
{
@@ -423,18 +423,15 @@
423423
}
424424
],
425425
"source": [
426+
"from mindnlp.core import value_and_grad\n",
426427
"# Forward function to compute the loss\n",
427428
"def forward_fn(**batch):\n",
428429
" outputs = model(**batch)\n",
429430
" loss = outputs.loss\n",
430431
" return loss\n",
431432
"\n",
432433
"# Gradient function to compute gradients for optimization\n",
433-
"grad_fn = mindspore.value_and_grad(forward_fn, None, model.trainable_params())\n",
434-
"# Define the training step function\n",
435-
"def train_step(**batch):\n",
436-
" loss,grads = grad_fn(**batch)\n",
437-
" return loss,grads\n",
434+
"grad_fn = value_and_grad(forward_fn, model.trainable_params(), attach_grads=True)\n",
438435
"\n",
439436
"from mindspore import ops\n",
440437
"global_step = 0\n",
@@ -444,8 +441,9 @@
444441
" train_total_size = train_dataset.get_dataset_size()\n",
445442
" # Iterate over each entry in the training dataset\n",
446443
" for step, batch in enumerate(tqdm(train_dataset.create_dict_iterator(), total=train_total_size)): \n",
447-
" loss,grads = train_step(**batch)\n",
448-
" optimizer.step(grads)\n",
444+
" optimizer.zero_grad()\n",
445+
" loss = grad_fn(**batch)\n",
446+
" optimizer.step()\n",
449447
" total_loss += loss.float() # Accumulate loss for monitoring\n",
450448
" lr_scheduler.step() # Update learning rate based on scheduler\n",
451449
" # model.base_model.update_and_allocate(global_step,grads)\n",

llm/peft/dora/dora_finetuning_mindnlp_mt0.ipynb

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
"from mindnlp.core import ops\n",
2929
"\n",
3030
"from mindnlp.transformers import AutoTokenizer\n",
31-
"from mindnlp.transformers.optimization import get_linear_schedule_with_warmup\n",
31+
"from mindnlp.common.optimization import get_linear_schedule_with_warmup\n",
3232
"from tqdm import tqdm\n",
3333
"\n",
3434
"model_name_or_path = \"bigscience/mt0-small\"\n",
@@ -246,7 +246,7 @@
246246
},
247247
{
248248
"cell_type": "code",
249-
"execution_count": 50,
249+
"execution_count": null,
250250
"id": "6b3a4090",
251251
"metadata": {},
252252
"outputs": [
@@ -304,25 +304,23 @@
304304
}
305305
],
306306
"source": [
307+
"from mindnlp.core import value_and_grad\n",
307308
"# training and evaluation\n",
308309
"def forward_fn(**batch):\n",
309310
" outputs = model(**batch)\n",
310311
" loss = outputs.loss\n",
311312
" return loss\n",
312313
"\n",
313-
"grad_fn = mindspore.value_and_grad(forward_fn, None, model.trainable_params())\n",
314-
"\n",
315-
"def train_step(**batch):\n",
316-
" loss, grads = grad_fn(**batch)\n",
317-
" optimizer.step(grads)\n",
318-
" return loss\n",
314+
"grad_fn = value_and_grad(forward_fn, model.trainable_params(), attach_grads=True)\n",
319315
"\n",
320316
"for epoch in range(num_epochs):\n",
321317
" model.set_train()\n",
322318
" total_loss = 0\n",
323319
" train_total_size = train_dataset.get_dataset_size()\n",
324320
" for step, batch in enumerate(tqdm(train_dataset.create_dict_iterator(), total=train_total_size)):\n",
325-
" loss = train_step(**batch)\n",
321+
" optimizer.zero_grad()\n",
322+
" loss = grad_fn(**batch)\n",
323+
" optimizer.step()\n",
326324
" total_loss += loss.float()\n",
327325
" lr_scheduler.step()\n",
328326
"\n",

llm/peft/ia3/seq_2_seq/peft_ia3_mindnlp.ipynb

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
"from mindnlp.core import ops\n",
2929
"\n",
3030
"from mindnlp.transformers import AutoTokenizer\n",
31-
"from mindnlp.transformers.optimization import get_linear_schedule_with_warmup\n",
31+
"from mindnlp.common.optimization import get_linear_schedule_with_warmup\n",
3232
"from tqdm import tqdm\n",
3333
"\n",
3434
"model_name_or_path = \"bigscience/mt0-small\"\n",
@@ -229,7 +229,7 @@
229229
},
230230
{
231231
"cell_type": "code",
232-
"execution_count": 11,
232+
"execution_count": null,
233233
"id": "6b3a4090",
234234
"metadata": {},
235235
"outputs": [
@@ -287,25 +287,23 @@
287287
}
288288
],
289289
"source": [
290+
"from mindnlp.core import value_and_grad\n",
290291
"# training and evaluation\n",
291292
"def forward_fn(**batch):\n",
292293
" outputs = model(**batch)\n",
293294
" loss = outputs.loss\n",
294295
" return loss\n",
295296
"\n",
296-
"grad_fn = mindspore.value_and_grad(forward_fn, None, model.trainable_params())\n",
297-
"\n",
298-
"def train_step(**batch):\n",
299-
" loss, grads = grad_fn(**batch)\n",
300-
" optimizer.step(grads)\n",
301-
" return loss\n",
297+
"grad_fn = value_and_grad(forward_fn, model.trainable_params())\n",
302298
"\n",
303299
"for epoch in range(num_epochs):\n",
304300
" model.set_train()\n",
305301
" total_loss = 0\n",
306302
" train_total_size = train_dataset.get_dataset_size()\n",
307303
" for step, batch in enumerate(tqdm(train_dataset.create_dict_iterator(), total=train_total_size)):\n",
308-
" loss = train_step(**batch)\n",
304+
" optimizer.zero_grad()\n",
305+
" loss = grad_fn(**batch)\n",
306+
" optimizer.step()\n",
309307
" total_loss += loss.float()\n",
310308
" lr_scheduler.step()\n",
311309
"\n",

llm/peft/ia3/sequence_classification.ipynb

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
"\n",
4848
"import mindnlp.evaluate as evaluate\n",
4949
"from mindnlp.dataset import load_dataset\n",
50-
"from mindnlp.transformers.optimization import get_linear_schedule_with_warmup\n",
50+
"from mindnlp.common.optimization import get_linear_schedule_with_warmup\n",
5151
"from mindnlp.transformers import AutoModelForSequenceClassification, AutoTokenizer\n"
5252
]
5353
},
@@ -527,7 +527,7 @@
527527
},
528528
{
529529
"cell_type": "code",
530-
"execution_count": 12,
530+
"execution_count": null,
531531
"id": "fa0e73be",
532532
"metadata": {
533533
"ExecuteTime": {
@@ -746,24 +746,22 @@
746746
}
747747
],
748748
"source": [
749+
"from mindnlp.core import value_and_grad\n",
750+
"\n",
749751
"def forward_fn(**batch):\n",
750752
" outputs = model(**batch)\n",
751753
" loss = outputs.loss\n",
752754
" return loss\n",
753755
"\n",
754-
"grad_fn = mindspore.value_and_grad(forward_fn, None, tuple(model.parameters()))\n",
755-
"\n",
756-
"def train_step(**batch):\n",
757-
" loss, grads = grad_fn(**batch)\n",
758-
" optimizer.step(grads)\n",
759-
" return loss\n",
760-
"\n",
756+
"grad_fn = value_and_grad(forward_fn, tuple(model.parameters()))\n",
761757
"\n",
762758
"for epoch in range(num_epochs):\n",
763759
" model.set_train()\n",
764760
" train_total_size = train_dataset.get_dataset_size()\n",
765761
" for step, batch in enumerate(tqdm(train_dataset.create_dict_iterator(), total=train_total_size)):\n",
766-
" loss = train_step(**batch)\n",
762+
" optimizer.zero_grad()\n",
763+
" loss = grad_fn(**batch)\n",
764+
" optimizer.step()\n",
767765
" lr_scheduler.step()\n",
768766
"\n",
769767
" model.set_train(False)\n",

llm/peft/lora/lora_seq2seq.ipynb

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
"from mindnlp.core import ops\n",
4141
"\n",
4242
"from mindnlp.transformers import AutoTokenizer\n",
43-
"from mindnlp.transformers.optimization import get_linear_schedule_with_warmup\n",
43+
"from mindnlp.common.optimization import get_linear_schedule_with_warmup\n",
4444
"from tqdm import tqdm\n",
4545
"\n",
4646
"model_name_or_path = \"bigscience/mt0-large\"\n",
@@ -318,7 +318,7 @@
318318
},
319319
{
320320
"cell_type": "code",
321-
"execution_count": 12,
321+
"execution_count": null,
322322
"id": "6b3a4090",
323323
"metadata": {},
324324
"outputs": [
@@ -390,25 +390,23 @@
390390
}
391391
],
392392
"source": [
393+
"from mindnlp.core import value_and_grad\n",
393394
"# training and evaluation\n",
394395
"def forward_fn(**batch):\n",
395396
" outputs = model(**batch)\n",
396397
" loss = outputs.loss\n",
397398
" return loss\n",
398399
"\n",
399-
"grad_fn = mindspore.value_and_grad(forward_fn, None, model.trainable_params())\n",
400-
"\n",
401-
"def train_step(**batch):\n",
402-
" loss, grads = grad_fn(**batch)\n",
403-
" optimizer.step(grads)\n",
404-
" return loss\n",
400+
"grad_fn = value_and_grad(forward_fn, model.trainable_params())\n",
405401
"\n",
406402
"for epoch in range(num_epochs):\n",
407403
" model.set_train()\n",
408404
" total_loss = 0\n",
409405
" train_total_size = train_dataset.get_dataset_size()\n",
410406
" for step, batch in enumerate(tqdm(train_dataset.create_dict_iterator(), total=train_total_size)):\n",
411-
" loss = train_step(**batch)\n",
407+
" optimizer.zero_grad()\n",
408+
" loss = grad_fn(**batch)\n",
409+
" optimizer.step()\n",
412410
" total_loss += loss.float()\n",
413411
" lr_scheduler.step()\n",
414412
"\n",

llm/peft/lora/multilayer_perceptron/multilayer_perceptron_lora.ipynb

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@
228228
],
229229
"source": [
230230
"from mindnlp.core import optim\n",
231-
"from mindnlp.transformers.optimization import get_linear_schedule_with_warmup\n",
231+
"from mindnlp.common.optimization import get_linear_schedule_with_warmup\n",
232232
"\n",
233233
"module = MLP()\n",
234234
"for name, param in module.named_parameters():\n",
@@ -319,7 +319,7 @@
319319
},
320320
{
321321
"cell_type": "code",
322-
"execution_count": 15,
322+
"execution_count": null,
323323
"id": "1cfbbe7e",
324324
"metadata": {},
325325
"outputs": [
@@ -783,25 +783,25 @@
783783
],
784784
"source": [
785785
"from tqdm import tqdm\n",
786+
"from mindnlp.core import value_and_grad\n",
786787
"\n",
787788
"def forward_fn(**batch):\n",
788789
" outputs = model(batch[\"input_ids\"])\n",
789790
" loss = criterion(outputs, batch[\"labels\"])\n",
790791
" return loss\n",
791792
"\n",
792-
"grad_fn = ms.value_and_grad(forward_fn, None, model.trainable_params())\n",
793+
"grad_fn = value_and_grad(forward_fn, model.trainable_params())\n",
793794
"\n",
794-
"def train_step(**batch):\n",
795-
" loss, grads = grad_fn(**batch)\n",
796-
" optimizer.step(grads)\n",
797-
" return loss\n",
798795
"\n",
799796
"for epoch in range(num_epochs):\n",
800797
" model.set_train(True)\n",
801798
" train_loss = 0\n",
802799
" train_total_size = train_dataset.get_dataset_size()\n",
803800
" for step, batch in enumerate(tqdm(train_dataset.create_dict_iterator(), total=train_total_size)):\n",
804-
" loss = train_step(**batch)\n",
801+
" optimizer.zero_grad()\n",
802+
" loss = grad_fn(**batch)\n",
803+
" optimizer.step()\n",
804+
"\n",
805805
" train_loss += loss.float()\n",
806806
" lr_scheduler.step()\n",
807807
"\n",

llm/peft/lora/roberta_sequence_classification.ipynb

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
"from mindnlp.dataset import load_dataset\n",
6363
"from mindnlp.engine import set_seed\n",
6464
"from mindnlp.transformers import AutoModelForSequenceClassification, AutoTokenizer\n",
65-
"from mindnlp.transformers.optimization import get_linear_schedule_with_warmup\n",
65+
"from mindnlp.common.optimization import get_linear_schedule_with_warmup\n",
6666
"from mindnlp.peft import (\n",
6767
" get_peft_config,\n",
6868
" get_peft_model,\n",
@@ -462,23 +462,21 @@
462462
}
463463
],
464464
"source": [
465+
"from mindnlp.core import value_and_grad\n",
465466
"def forward_fn(**batch):\n",
466467
" outputs = model(**batch)\n",
467468
" loss = outputs.loss\n",
468469
" return loss\n",
469470
"\n",
470-
"grad_fn = mindspore.value_and_grad(forward_fn, None, tuple(param for param in model.parameters() if param.requires_grad))\n",
471-
"\n",
472-
"def train_step(**batch):\n",
473-
" loss, grads = grad_fn(**batch)\n",
474-
" optimizer.step(grads)\n",
475-
" return loss\n",
471+
"grad_fn = value_and_grad(forward_fn, tuple(param for param in model.parameters() if param.requires_grad))\n",
476472
"\n",
477473
"for epoch in range(num_epochs):\n",
478474
" model.set_train()\n",
479475
" train_total_size = train_dataset.get_dataset_size()\n",
480476
" for step, batch in enumerate(tqdm(train_dataset.create_dict_iterator(), total=train_total_size)):\n",
481-
" loss = train_step(**batch)\n",
477+
" optimizer.zero_grad()\n",
478+
" loss = grad_fn(**batch)\n",
479+
" optimizer.step()\n",
482480
" lr_scheduler.step()\n",
483481
"\n",
484482
" model.set_train(False)\n",

0 commit comments

Comments
 (0)