Steinbeck-Lab
diff --git a/‎DECIMER/Predictor_usingCheckpoints.py‎
Lines changed: 83 additions & 10 deletions b/‎DECIMER/Predictor_usingCheckpoints.py‎
Lines changed: 83 additions & 10 deletions
diff --git a/‎DECIMER/Repack_model.py‎
Lines changed: 46 additions & 9 deletions b/‎DECIMER/Repack_model.py‎
Lines changed: 46 additions & 9 deletions
diff --git a/‎DECIMER/Transformer_decoder.py‎
Lines changed: 3 additions & 1 deletion b/‎DECIMER/Transformer_decoder.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎DECIMER/__init__.py‎
Lines changed: 3 additions & 3 deletions b/‎DECIMER/__init__.py‎
Lines changed: 3 additions & 3 deletions
@@ -1,10 +1,9 @@
 import os
 import sys
 import tensorflow as tf
-
+from typing import List, Tuple
 import pickle
 import pystow
-from selfies import decoder
 import Transformer_decoder
 
 if int(tf.__version__.split(".")[1]) <= 10:
@@ -116,6 +115,9 @@ def main():
     else:
         SMILES = predict_SMILES(sys.argv[1])
         print(SMILES)
+        SMILES_with_confidence = predict_SMILES_with_confidence(sys.argv[1])
+        for tup in SMILES_with_confidence:
+            print(tup)
 
 
 class DECIMER_Predictor(tf.Module):
@@ -126,6 +128,16 @@ def __init__(self, encoder, tokenizer, transformer, max_length):
         self.max_length = max_length
 
     def __call__(self, Decoded_image):
+        """
+        Run the DECIMER predictor model when called.
+        Usage of predict_SMILES or predict_SMILES_with_confidence is recommended instead
+
+        Args:
+            Decoded_image (_type_): output of config.decode_image
+
+        Returns:
+            Tuple[tf.Tensor, tf.Tensor]: predicted tokens, confidence values
+        """
         assert isinstance(Decoded_image, tf.Tensor)
 
         _image_batch = tf.expand_dims(Decoded_image, 0)
@@ -140,6 +152,7 @@ def __call__(self, Decoded_image):
 
         output_array = tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True)
         output_array = output_array.write(0, start_token)
+        confidence_array = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
 
         for t in tf.range(max_length):
             output = tf.transpose(output_array.stack())
@@ -154,31 +167,92 @@ def __call__(self, Decoded_image):
             predictions = prediction_batch[:, -1:, :]  # (batch_size, 1, vocab_size)
 
             predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)
-
+            confidence = predictions[-1][-1][int(predicted_id)]
             output_array = output_array.write(t + 1, predicted_id[0])
-
+            confidence_array = confidence_array.write(t + 1, confidence)
             if predicted_id == end_token:
                 break
         output = tf.transpose(output_array.stack())
-        return output
 
+        return output, confidence_array.stack()
+
+
+def detokenize_output(predicted_array: tf.Tensor) -> str:
+    """
+    This function takes the predicted array of tokens and returns the predicted SMILES
+    string.
+
+    Args:
+        predicted_array (tf.Tensor): Transformer Decoder output array (predicted tokens)
 
-def detokenize_output(predicted_array):
+    Returns:
+        str: SMILES string
+    """
     outputs = [tokenizer.index_word[i] for i in predicted_array[0].numpy()]
     prediction = (
         "".join([str(elem) for elem in outputs])
         .replace("<start>", "")
         .replace("<end>", "")
     )
-
     return prediction
 
 
+def detokenize_output_add_confidence(
+    predicted_array: tf.Tensor,
+    confidence_array: tf.Tensor,
+) -> List[Tuple[str, float]]:
+    """
+    This function takes the predicted array of tokens as well as the confidence values
+    returned by the Transformer Decoder and returns a list of tuples
+    that contain each token of the predicted SMILES string and the confidence
+    value.
+
+    Args:
+        predicted_array (tf.Tensor): Transformer Decoder output array (predicted tokens)
+
+    Returns:
+        str: SMILES string
+    """
+    prediction_with_confidence = [
+        (
+            tokenizer.index_word[predicted_array[0].numpy()[i]],
+            confidence_array[i].numpy(),
+        )
+        for i in range(len(confidence_array))
+    ]
+    decoded_prediction_with_confidence = list(
+        [(utils.decoder(tok), conf) for tok, conf in prediction_with_confidence[1:-1]]
+    )
+    decoded_prediction_with_confidence.append(prediction_with_confidence[-1])
+    return decoded_prediction_with_confidence
+
+
 # Initiate the DECIMER class
 DECIMER = DECIMER_Predictor(encoder, tokenizer, transformer, MAX_LEN)
 
 
-def predict_SMILES(image_path: str):
+def predict_SMILES_with_confidence(image_path: str) -> List[Tuple[str, float]]:
+    """
+    This function takes an image path (str) and returns a list of tuples
+    that contain each token of the predicted SMILES string and the confidence
+    level from the last layer of the Transformer decoder.
+
+    Args:
+        image_path (str): Path of chemical structure depiction image
+
+    Returns:
+        (List[Tuple[str, float]]): Tuples that contain the tokens and the confidence
+            values of the predicted SMILES
+    """
+    decodedImage = config.decode_image(image_path)
+    predicted_tokens, confidence_values = DECIMER(tf.constant(decodedImage))
+    predicted_SMILES_with_confidence = detokenize_output_add_confidence(
+        predicted_tokens, confidence_values
+    )
+    return predicted_SMILES_with_confidence
+
+
+def predict_SMILES(image_path: str) -> str:
     """
     This function takes an image path (str) and returns the SMILES
     representation of the depicted molecule (str).
@@ -190,9 +264,8 @@ def predict_SMILES(image_path: str):
         (str): SMILES representation of the molecule in the input image
     """
     decodedImage = config.decode_image(image_path)
-    predicted_tokens = DECIMER(tf.constant(decodedImage))
+    predicted_tokens, _ = DECIMER(tf.constant(decodedImage))
     predicted_SMILES = utils.decoder(detokenize_output(predicted_tokens))
-
     return predicted_SMILES
 
 
 
@@ -1,11 +1,10 @@
 import os
 import tensorflow as tf
-
+from typing import List, Tuple
 import pickle
 import DECIMER.Efficient_Net_encoder as Efficient_Net_encoder
 import DECIMER.Transformer_decoder as Transformer_decoder
 import DECIMER.config as config
-import DECIMER.utils as utils
 
 print(tf.__version__)
 
@@ -84,17 +83,56 @@
     start_epoch = int(ckpt_manager.latest_checkpoint.split("-")[-1])
 
 
-def detokenize_output(predicted_array):
+def detokenize_output(predicted_array: tf.Tensor) -> str:
+    """
+    This function takes the predicted array of tokens and returns the predicted SMILES
+    string.
+
+    Args:
+        predicted_array (tf.Tensor): Transformer Decoder output array (predicted tokens)
+
+    Returns:
+        str: SMILES string
+    """
     outputs = [tokenizer.index_word[i] for i in predicted_array[0].numpy()]
     prediction = (
         "".join([str(elem) for elem in outputs])
         .replace("<start>", "")
         .replace("<end>", "")
     )
-
     return prediction
 
 
+def detokenize_output_add_confidence(
+    predicted_array: tf.Tensor,
+    confidence_array: tf.Tensor,
+) -> List[Tuple[str, float]]:
+    """
+    This function takes the predicted array of tokens as well as the confidence values
+    returned by the Transformer Decoder and returns a list of tuples
+    that contain each token of the predicted SMILES string and the confidence
+    value.
+
+    Args:
+        predicted_array (tf.Tensor): Transformer Decoder output array (predicted tokens)
+
+    Returns:
+        str: SMILES string
+    """
+    prediction_with_confidence = [
+        (
+            tokenizer.index_word[predicted_array[0].numpy()[i]],
+            confidence_array[i].numpy(),
+        )
+        for i in range(len(confidence_array))
+    ]
+    decoded_prediction_with_confidence = list(
+        [(utils.decoder(tok), conf) for tok, conf in prediction_with_confidence[1:-1]]
+    )
+    decoded_prediction_with_confidence.append(prediction_with_confidence[-1])
+    return decoded_prediction_with_confidence
+
+
 class DECIMER_Predictor(tf.Module):
     """This is a class which takes care of inference. It loads the saved checkpoint and the necessary
     tokenizers. The inference begins with the start token (<start>) and ends when the end token(<end>)
@@ -128,8 +166,6 @@ def __call__(self, Decoded_image):
             output (tf.Tensor[tf.int64]): predicted output as an array.
         """
         assert isinstance(Decoded_image, tf.Tensor)
-        if len(Decoded_image.shape) == 0:
-            sentence = Decoded_image[tf.newaxis]
 
         _image_batch = tf.expand_dims(Decoded_image, 0)
         _image_embedding = encoder(_image_batch, training=False)
@@ -143,6 +179,7 @@ def __call__(self, Decoded_image):
 
         output_array = tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True)
         output_array = output_array.write(0, start_token)
+        confidence_array = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
 
         for t in tf.range(max_length):
             output = tf.transpose(output_array.stack())
@@ -157,14 +194,14 @@ def __call__(self, Decoded_image):
             predictions = prediction_batch[:, -1:, :]  # (batch_size, 1, vocab_size)
 
             predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)
-
+            confidence = predictions[0, 0, int(predicted_id[0, 0])]
             output_array = output_array.write(t + 1, predicted_id[0])
-
+            confidence_array = confidence_array.write(t + 1, confidence)
             if predicted_id == end_token:
                 break
         output = tf.transpose(output_array.stack())
 
-        return output
+        return output, confidence_array.stack()
 
 
 DECIMER = DECIMER_Predictor(encoder, tokenizer, transformer, MAX_LEN)
 
@@ -185,7 +185,9 @@ def __init__(
             for _ in range(num_layers)
         ]
         self.dropout = tf.keras.layers.Dropout(rate)
-        self.final_layer = tf.keras.layers.Dense(target_vocab_size)
+        self.final_layer = tf.keras.layers.Dense(
+            target_vocab_size, activation="softmax"
+        )
 
     def call(self, x, enc_output, training, look_ahead_mask=None, padding_mask=None):
         seq_len = tf.shape(x)[1]
 
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
 """
-DECIMER V2.3.1 Python Package.
+DECIMER V2.4.0 Python Package.
 ============================
 
 This repository contains DECIMER-V2,Deep lEarning for Chemical ImagE Recognition) project
@@ -23,11 +23,11 @@
 
 """
 
-__version__ = "2.3.1"
+__version__ = "2.4.0"
 
 __all__ = [
     "DECIMER",
 ]
 
 
-from .decimer import predict_SMILES
+from .decimer import predict_SMILES, predict_SMILES_with_confidence