diff --git a/open_r1/readme.md b/examples/open_r1/readme.md similarity index 100% rename from open_r1/readme.md rename to examples/open_r1/readme.md diff --git a/open_r1/sh/grpo.sh b/examples/open_r1/sh/grpo.sh similarity index 100% rename from open_r1/sh/grpo.sh rename to examples/open_r1/sh/grpo.sh diff --git a/open_r1/sh/sft.sh b/examples/open_r1/sh/sft.sh similarity index 100% rename from open_r1/sh/sft.sh rename to examples/open_r1/sh/sft.sh diff --git a/open_r1/src/mind_openr1/__init__.py b/examples/open_r1/src/mind_openr1/__init__.py similarity index 100% rename from open_r1/src/mind_openr1/__init__.py rename to examples/open_r1/src/mind_openr1/__init__.py diff --git a/open_r1/src/mind_openr1/configs.py b/examples/open_r1/src/mind_openr1/configs.py similarity index 100% rename from open_r1/src/mind_openr1/configs.py rename to examples/open_r1/src/mind_openr1/configs.py diff --git a/open_r1/src/mind_openr1/grpo.py b/examples/open_r1/src/mind_openr1/grpo.py similarity index 100% rename from open_r1/src/mind_openr1/grpo.py rename to examples/open_r1/src/mind_openr1/grpo.py diff --git a/open_r1/src/mind_openr1/rewards.py b/examples/open_r1/src/mind_openr1/rewards.py similarity index 100% rename from open_r1/src/mind_openr1/rewards.py rename to examples/open_r1/src/mind_openr1/rewards.py diff --git a/open_r1/src/mind_openr1/sft.py b/examples/open_r1/src/mind_openr1/sft.py similarity index 100% rename from open_r1/src/mind_openr1/sft.py rename to examples/open_r1/src/mind_openr1/sft.py diff --git a/open_r1/src/mind_openr1/utils/__init__.py b/examples/open_r1/src/mind_openr1/utils/__init__.py similarity index 100% rename from open_r1/src/mind_openr1/utils/__init__.py rename to examples/open_r1/src/mind_openr1/utils/__init__.py diff --git a/open_r1/src/mind_openr1/utils/callbacks.py b/examples/open_r1/src/mind_openr1/utils/callbacks.py similarity index 100% rename from open_r1/src/mind_openr1/utils/callbacks.py rename to examples/open_r1/src/mind_openr1/utils/callbacks.py diff --git a/open_r1/src/mind_openr1/utils/code_providers.py b/examples/open_r1/src/mind_openr1/utils/code_providers.py similarity index 100% rename from open_r1/src/mind_openr1/utils/code_providers.py rename to examples/open_r1/src/mind_openr1/utils/code_providers.py diff --git a/open_r1/src/mind_openr1/utils/competitive_programming/__init__.py b/examples/open_r1/src/mind_openr1/utils/competitive_programming/__init__.py similarity index 100% rename from open_r1/src/mind_openr1/utils/competitive_programming/__init__.py rename to examples/open_r1/src/mind_openr1/utils/competitive_programming/__init__.py diff --git a/open_r1/src/mind_openr1/utils/competitive_programming/cf_scoring.py b/examples/open_r1/src/mind_openr1/utils/competitive_programming/cf_scoring.py similarity index 100% rename from open_r1/src/mind_openr1/utils/competitive_programming/cf_scoring.py rename to examples/open_r1/src/mind_openr1/utils/competitive_programming/cf_scoring.py diff --git a/open_r1/src/mind_openr1/utils/competitive_programming/code_patcher.py b/examples/open_r1/src/mind_openr1/utils/competitive_programming/code_patcher.py similarity index 100% rename from open_r1/src/mind_openr1/utils/competitive_programming/code_patcher.py rename to examples/open_r1/src/mind_openr1/utils/competitive_programming/code_patcher.py diff --git a/open_r1/src/mind_openr1/utils/competitive_programming/ioi_scoring.py b/examples/open_r1/src/mind_openr1/utils/competitive_programming/ioi_scoring.py similarity index 100% rename from open_r1/src/mind_openr1/utils/competitive_programming/ioi_scoring.py rename to examples/open_r1/src/mind_openr1/utils/competitive_programming/ioi_scoring.py diff --git a/open_r1/src/mind_openr1/utils/competitive_programming/ioi_utils.py b/examples/open_r1/src/mind_openr1/utils/competitive_programming/ioi_utils.py similarity index 100% rename from open_r1/src/mind_openr1/utils/competitive_programming/ioi_utils.py rename to examples/open_r1/src/mind_openr1/utils/competitive_programming/ioi_utils.py diff --git a/open_r1/src/mind_openr1/utils/competitive_programming/morph_client.py b/examples/open_r1/src/mind_openr1/utils/competitive_programming/morph_client.py similarity index 100% rename from open_r1/src/mind_openr1/utils/competitive_programming/morph_client.py rename to examples/open_r1/src/mind_openr1/utils/competitive_programming/morph_client.py diff --git a/open_r1/src/mind_openr1/utils/competitive_programming/piston_client.py b/examples/open_r1/src/mind_openr1/utils/competitive_programming/piston_client.py similarity index 100% rename from open_r1/src/mind_openr1/utils/competitive_programming/piston_client.py rename to examples/open_r1/src/mind_openr1/utils/competitive_programming/piston_client.py diff --git a/open_r1/src/mind_openr1/utils/competitive_programming/utils.py b/examples/open_r1/src/mind_openr1/utils/competitive_programming/utils.py similarity index 100% rename from open_r1/src/mind_openr1/utils/competitive_programming/utils.py rename to examples/open_r1/src/mind_openr1/utils/competitive_programming/utils.py diff --git a/open_r1/src/mind_openr1/utils/data.py b/examples/open_r1/src/mind_openr1/utils/data.py similarity index 100% rename from open_r1/src/mind_openr1/utils/data.py rename to examples/open_r1/src/mind_openr1/utils/data.py diff --git a/open_r1/src/mind_openr1/utils/evaluation.py b/examples/open_r1/src/mind_openr1/utils/evaluation.py similarity index 100% rename from open_r1/src/mind_openr1/utils/evaluation.py rename to examples/open_r1/src/mind_openr1/utils/evaluation.py diff --git a/open_r1/src/mind_openr1/utils/hub.py b/examples/open_r1/src/mind_openr1/utils/hub.py similarity index 100% rename from open_r1/src/mind_openr1/utils/hub.py rename to examples/open_r1/src/mind_openr1/utils/hub.py diff --git a/open_r1/src/mind_openr1/utils/import_utils.py b/examples/open_r1/src/mind_openr1/utils/import_utils.py similarity index 100% rename from open_r1/src/mind_openr1/utils/import_utils.py rename to examples/open_r1/src/mind_openr1/utils/import_utils.py diff --git a/open_r1/src/mind_openr1/utils/model_utils.py b/examples/open_r1/src/mind_openr1/utils/model_utils.py similarity index 100% rename from open_r1/src/mind_openr1/utils/model_utils.py rename to examples/open_r1/src/mind_openr1/utils/model_utils.py diff --git a/open_r1/src/mind_openr1/utils/routed_morph.py b/examples/open_r1/src/mind_openr1/utils/routed_morph.py similarity index 100% rename from open_r1/src/mind_openr1/utils/routed_morph.py rename to examples/open_r1/src/mind_openr1/utils/routed_morph.py diff --git a/open_r1/src/mind_openr1/utils/routed_sandbox.py b/examples/open_r1/src/mind_openr1/utils/routed_sandbox.py similarity index 100% rename from open_r1/src/mind_openr1/utils/routed_sandbox.py rename to examples/open_r1/src/mind_openr1/utils/routed_sandbox.py diff --git a/open_r1/src/mind_openr1/utils/wandb_logging.py b/examples/open_r1/src/mind_openr1/utils/wandb_logging.py similarity index 100% rename from open_r1/src/mind_openr1/utils/wandb_logging.py rename to examples/open_r1/src/mind_openr1/utils/wandb_logging.py diff --git a/mindnlp/utils/safetensors_patch.py b/mindnlp/utils/safetensors_patch.py index 367b37da3..938099473 100644 --- a/mindnlp/utils/safetensors_patch.py +++ b/mindnlp/utils/safetensors_patch.py @@ -215,6 +215,16 @@ def get_tensor(self, name): def get_slice(self, name): return self.tensors[name] + def offset_keys(self): + """ + Returns the names of the tensors in the file, ordered by offset. + + Returns: + (`List[str]`): + The name of the tensors contained in that file + """ + return self.keys() + def safe_save_file(tensor_dict, filename, metadata=None): """ diff --git a/mindtorch/configs.py b/mindtorch/configs.py index 7a97394f3..6c1113e1f 100644 --- a/mindtorch/configs.py +++ b/mindtorch/configs.py @@ -11,6 +11,7 @@ ON_ORANGE_PI = '310b' in SOC DEFAULT_DTYPE = mindspore.float32 MS27 = '.'.join(mindspore.__version__.split('.')[:2]) >= '2.7' +FLASH_ATTN_MASK_VALID = int(os.environ.get('FLASH_ATTN_MASK_VALID', 1)) # OP backend select USE_PYBOOST = True diff --git a/mindtorch/nn/functional.py b/mindtorch/nn/functional.py index 01d0aa12d..aaeb3c751 100644 --- a/mindtorch/nn/functional.py +++ b/mindtorch/nn/functional.py @@ -9,7 +9,7 @@ from mindtorch._C import default_generator from mindtorch.nn.modules.utils import _pair -from ..configs import ON_A2, ON_A1 +from ..configs import ON_A2, ON_A1, FLASH_ATTN_MASK_VALID generator_step_ = 12 @@ -1162,9 +1162,12 @@ def scaled_dot_product_attention(query, key, value, attn_mask=None, dropout_p=0. scale_factor = 1 / math.sqrt(query.size(-1)) if scale is None else scale - if query.device.type == 'npu' and ON_A2: + if query.dtype != mindtorch.float32 and query.device.type == 'npu' and ON_A2 and: if attn_mask is not None and not is_causal: - attn_mask = ~attn_mask + if FLASH_ATTN_MASK_VALID == 1: + attn_mask = ~attn_mask + else: + attn_mask = attn_mask.bool() sparse_mode = 0