We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 5933a34 commit d328f42Copy full SHA for d328f42
verl/workers/fsdp_workers.py
@@ -479,8 +479,12 @@ def _process_multi_modal_inputs(self, data: DataProto):
479
else:
480
multi_modal_inputs = {}
481
482
- # Some image processor return with batch dim (such as glm4.1), we need to squeeze the pix_value.
483
- multi_modal_inputs['pixel_values'] = multi_modal_inputs['pixel_values'].squeeze(0)
+ if multi_modal_inputs['pixel_values'].ndim() == 3:
+ # Some image processor return with batch dim (such as glm4.1), we need to squeeze the pix_value.
484
+ # i.e. (1,patch,pix_per_patch) -> (patch,pix_per_patch)
485
+ multi_modal_inputs['pixel_values'] = multi_modal_inputs['pixel_values'].squeeze(0)
486
+
487
488
489
multi_modal_inputs_cache[index] = multi_modal_inputs
490
0 commit comments