diff --git a/vllm/model_executor/models/utils.py b/vllm/model_executor/models/utils.py
index b022832effc..0e97e93a3e0 100644
--- a/vllm/model_executor/models/utils.py
+++ b/vllm/model_executor/models/utils.py
@@ -5,6 +5,7 @@
 from typing import (Callable, Dict, Iterable, List, Literal, Mapping, Optional,
                     Protocol, Set, Tuple, Union, overload)
 
+import habana_frameworks.torch.core as htcore
 import torch
 import torch.nn as nn
 from torch.func import functional_call
@@ -391,6 +392,7 @@ def _merge_multimodal_embeddings(
     """
     # skip check for HPU, the number of tokens is a cpu fallback during HPU lazy
     if current_platform.is_hpu():
+        htcore.mark_step()
         flattened = _flatten_embeddings(multimodal_embeddings)
         inputs_embeds[is_multimodal] = flattened
         return inputs_embeds