1
1
import numpy as np
2
2
3
- # Define the Transformer components
3
+ def softmax (x , axis = - 1 ):
4
+ # Apply softmax operation to the input array along the specified axis
5
+ e_x = np .exp (x - np .max (x , axis = axis , keepdims = True ))
6
+ return e_x / np .sum (e_x , axis = axis , keepdims = True )
4
7
5
8
class PositionalEncoding :
6
9
def __init__ (self , d_model , max_seq_len ):
@@ -69,13 +72,6 @@ def forward(self, X):
69
72
70
73
return output
71
74
72
- # Create a simple Transformer model
73
-
74
- def softmax (x , axis = - 1 ):
75
- # Apply softmax operation to the input array along the specified axis
76
- e_x = np .exp (x - np .max (x , axis = axis , keepdims = True ))
77
- return e_x / np .sum (e_x , axis = axis , keepdims = True )
78
-
79
75
class Transformer :
80
76
def __init__ (self , d_model , num_heads , d_ff , num_layers ):
81
77
self .d_model = d_model
@@ -90,15 +86,14 @@ def __init__(self, d_model, num_heads, d_ff, num_layers):
90
86
)
91
87
92
88
def forward (self , X ):
93
- for _ in range ( self .num_layers ) :
94
- attention_output = self . layers [ _ ] [0 ].forward (X )
89
+ for layer in self .layers :
90
+ attention_output = layer [0 ].forward (X )
95
91
X = X + attention_output # Residual connection
96
- X = X + self . layers [ _ ] [1 ].forward (X ) # Residual connection
92
+ X = X + layer [1 ].forward (X ) # Residual connection
97
93
98
94
return X
99
95
100
96
# Example usage
101
-
102
97
max_seq_len = 10
103
98
d_model = 64
104
99
num_heads = 4
@@ -115,48 +110,4 @@ def forward(self, X):
115
110
transformer = Transformer (d_model , num_heads , d_ff , num_layers )
116
111
output = transformer .forward (X_with_pos_enc )
117
112
118
- import numpy as np
119
-
120
- # Define the necessary classes and functions (same as the code provided)
121
-
122
- # Example usage
123
- max_seq_len = 10
124
- d_model = 64
125
- num_heads = 4
126
- d_ff = 128
127
- num_layers = 2
128
-
129
- # Tokenize the sentence
130
- sentence = "You are an alien."
131
- tokens = sentence .split () # Split by whitespace
132
- num_tokens = len (tokens )
133
-
134
- # Encode the tokens
135
- token_to_id = {"You" : 1 , "are" : 2 , "an" : 3 , "alien" : 4 } # Remove the period from 'alien'
136
- encoded_tokens = [token_to_id [token ] for token in tokens ]
137
-
138
- # Pad or truncate the sequence
139
- if num_tokens < max_seq_len :
140
- padded_tokens = encoded_tokens + [0 ] * (max_seq_len - num_tokens )
141
- else :
142
- padded_tokens = encoded_tokens [:max_seq_len ]
143
-
144
- # Convert the numerical sequence into a NumPy array
145
- X = np .array (padded_tokens )
146
-
147
- # Apply positional encoding
148
- pos_enc = PositionalEncoding (d_model , max_seq_len )
149
- positions = np .arange (max_seq_len )
150
- pos_encoding = pos_enc .get_positional_encoding (positions )
151
-
152
- # Add positional encodings to the input
153
- X_with_pos_enc = X + pos_encoding
154
-
155
- # Create a Transformer model
156
- transformer = Transformer (d_model , num_heads , d_ff , num_layers )
157
-
158
- # Process the input through the Transformer model
159
- output = transformer .forward (X_with_pos_enc )
160
-
161
- # Print the output
162
113
print (output )
0 commit comments