Skip to content
This repository was archived by the owner on Feb 8, 2024. It is now read-only.

Commit 378ae54

Browse files
CipherCipher
authored andcommitted
Fixed Transformer that's generated by GPT 3.5
1 parent 0785e88 commit 378ae54

File tree

1 file changed

+7
-56
lines changed

1 file changed

+7
-56
lines changed

Experimental/Transformer.py

Lines changed: 7 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import numpy as np
22

3-
# Define the Transformer components
3+
def softmax(x, axis=-1):
4+
# Apply softmax operation to the input array along the specified axis
5+
e_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
6+
return e_x / np.sum(e_x, axis=axis, keepdims=True)
47

58
class PositionalEncoding:
69
def __init__(self, d_model, max_seq_len):
@@ -69,13 +72,6 @@ def forward(self, X):
6972

7073
return output
7174

72-
# Create a simple Transformer model
73-
74-
def softmax(x, axis=-1):
75-
# Apply softmax operation to the input array along the specified axis
76-
e_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
77-
return e_x / np.sum(e_x, axis=axis, keepdims=True)
78-
7975
class Transformer:
8076
def __init__(self, d_model, num_heads, d_ff, num_layers):
8177
self.d_model = d_model
@@ -90,15 +86,14 @@ def __init__(self, d_model, num_heads, d_ff, num_layers):
9086
)
9187

9288
def forward(self, X):
93-
for _ in range(self.num_layers):
94-
attention_output = self.layers[_][0].forward(X)
89+
for layer in self.layers:
90+
attention_output = layer[0].forward(X)
9591
X = X + attention_output # Residual connection
96-
X = X + self.layers[_][1].forward(X) # Residual connection
92+
X = X + layer[1].forward(X) # Residual connection
9793

9894
return X
9995

10096
# Example usage
101-
10297
max_seq_len = 10
10398
d_model = 64
10499
num_heads = 4
@@ -115,48 +110,4 @@ def forward(self, X):
115110
transformer = Transformer(d_model, num_heads, d_ff, num_layers)
116111
output = transformer.forward(X_with_pos_enc)
117112

118-
import numpy as np
119-
120-
# Define the necessary classes and functions (same as the code provided)
121-
122-
# Example usage
123-
max_seq_len = 10
124-
d_model = 64
125-
num_heads = 4
126-
d_ff = 128
127-
num_layers = 2
128-
129-
# Tokenize the sentence
130-
sentence = "You are an alien."
131-
tokens = sentence.split() # Split by whitespace
132-
num_tokens = len(tokens)
133-
134-
# Encode the tokens
135-
token_to_id = {"You": 1, "are": 2, "an": 3, "alien": 4} # Remove the period from 'alien'
136-
encoded_tokens = [token_to_id[token] for token in tokens]
137-
138-
# Pad or truncate the sequence
139-
if num_tokens < max_seq_len:
140-
padded_tokens = encoded_tokens + [0] * (max_seq_len - num_tokens)
141-
else:
142-
padded_tokens = encoded_tokens[:max_seq_len]
143-
144-
# Convert the numerical sequence into a NumPy array
145-
X = np.array(padded_tokens)
146-
147-
# Apply positional encoding
148-
pos_enc = PositionalEncoding(d_model, max_seq_len)
149-
positions = np.arange(max_seq_len)
150-
pos_encoding = pos_enc.get_positional_encoding(positions)
151-
152-
# Add positional encodings to the input
153-
X_with_pos_enc = X + pos_encoding
154-
155-
# Create a Transformer model
156-
transformer = Transformer(d_model, num_heads, d_ff, num_layers)
157-
158-
# Process the input through the Transformer model
159-
output = transformer.forward(X_with_pos_enc)
160-
161-
# Print the output
162113
print(output)

0 commit comments

Comments
 (0)