From 428912873515e4380750670bdd4a0630a16fd1a7 Mon Sep 17 00:00:00 2001 From: kumsonali Date: Thu, 12 Jun 2025 12:13:32 +0530 Subject: [PATCH] Modify TransformerEncoder masking documentation --- keras_hub/src/layers/modeling/transformer_encoder.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/keras_hub/src/layers/modeling/transformer_encoder.py b/keras_hub/src/layers/modeling/transformer_encoder.py index 28a61a97ed..ad20cb6422 100644 --- a/keras_hub/src/layers/modeling/transformer_encoder.py +++ b/keras_hub/src/layers/modeling/transformer_encoder.py @@ -16,9 +16,12 @@ class TransformerEncoder(keras.layers.Layer): paper [Attention is All You Need](https://arxiv.org/abs/1706.03762). Users can instantiate multiple instances of this class to stack up an encoder. - This layer will correctly compute an attention mask from an implicit - Keras padding mask (for example, by passing `mask_zero=True` to a - `keras.layers.Embedding` layer). See the Masking and Padding + This layer will compute an attention mask, prioritizing explicitly provided + masks(a `padding_mask` or a custom `attention_mask`) over an implicit Keras + padding mask (for example, by passing `mask_zero=True` to a + `keras.layers.Embedding` layer). If both a `padding_mask` and a + `attention_mask` are provided, they will be combined to determine the final + mask. See the Masking and Padding [guide](https://keras.io/guides/understanding_masking_and_padding/) for more details.