Skip to content
This repository was archived by the owner on Feb 8, 2024. It is now read-only.

Commit 0785e88

Browse files
CipherCipher
authored andcommitted
Added Transformer that's generated by GPT 3.5
1 parent f25b74b commit 0785e88

File tree

3 files changed

+181
-0
lines changed

3 files changed

+181
-0
lines changed

Experimental/Transformer.py

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
import numpy as np
2+
3+
# Define the Transformer components
4+
5+
class PositionalEncoding:
6+
def __init__(self, d_model, max_seq_len):
7+
self.d_model = d_model
8+
self.max_seq_len = max_seq_len
9+
10+
def get_positional_encoding(self, positions):
11+
angles = np.arange(self.d_model) / self.d_model
12+
angles = angles[np.newaxis, :] # Shape: (1, d_model)
13+
14+
positions = positions[:, np.newaxis] # Shape: (max_seq_len, 1)
15+
angles = angles * (1 / np.power(10000, 2 * positions / self.d_model))
16+
angles[:, 0::2] = np.sin(angles[:, 0::2])
17+
angles[:, 1::2] = np.cos(angles[:, 1::2])
18+
19+
return angles # Shape: (max_seq_len, d_model)
20+
21+
class MultiHeadAttention:
22+
def __init__(self, d_model, num_heads):
23+
self.d_model = d_model
24+
self.num_heads = num_heads
25+
self.d_head = d_model // num_heads
26+
27+
self.W_q = np.random.randn(d_model, d_model)
28+
self.W_k = np.random.randn(d_model, d_model)
29+
self.W_v = np.random.randn(d_model, d_model)
30+
self.W_o = np.random.randn(d_model, d_model)
31+
32+
def attention(self, Q, K, V):
33+
scores = np.matmul(Q, K.T) / np.sqrt(self.d_head) # Shape: (num_heads, seq_len, seq_len)
34+
attention_weights = softmax(scores, axis=-1) # Apply softmax along the last axis
35+
36+
attended_values = np.matmul(attention_weights, V) # Shape: (num_heads, seq_len, d_head)
37+
return attended_values
38+
39+
def forward(self, X):
40+
Q = np.matmul(X, self.W_q)
41+
K = np.matmul(X, self.W_k)
42+
V = np.matmul(X, self.W_v)
43+
44+
Q_split = np.split(Q, self.num_heads, axis=-1)
45+
K_split = np.split(K, self.num_heads, axis=-1)
46+
V_split = np.split(V, self.num_heads, axis=-1)
47+
48+
attended_values = []
49+
for i in range(self.num_heads):
50+
attended_values.append(self.attention(Q_split[i], K_split[i], V_split[i]))
51+
52+
concatenated = np.concatenate(attended_values, axis=-1) # Shape: (seq_len, d_model)
53+
output = np.matmul(concatenated, self.W_o)
54+
55+
return output
56+
57+
class FeedForwardNetwork:
58+
def __init__(self, d_model, d_ff):
59+
self.d_model = d_model
60+
self.d_ff = d_ff
61+
62+
self.W_1 = np.random.randn(d_model, d_ff)
63+
self.W_2 = np.random.randn(d_ff, d_model)
64+
65+
def forward(self, X):
66+
hidden = np.matmul(X, self.W_1)
67+
hidden = np.maximum(hidden, 0) # Apply ReLU activation
68+
output = np.matmul(hidden, self.W_2)
69+
70+
return output
71+
72+
# Create a simple Transformer model
73+
74+
def softmax(x, axis=-1):
75+
# Apply softmax operation to the input array along the specified axis
76+
e_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
77+
return e_x / np.sum(e_x, axis=axis, keepdims=True)
78+
79+
class Transformer:
80+
def __init__(self, d_model, num_heads, d_ff, num_layers):
81+
self.d_model = d_model
82+
self.num_heads = num_heads
83+
self.d_ff = d_ff
84+
self.num_layers = num_layers
85+
86+
self.layers = []
87+
for _ in range(num_layers):
88+
self.layers.append(
89+
(MultiHeadAttention(d_model, num_heads), FeedForwardNetwork(d_model, d_ff))
90+
)
91+
92+
def forward(self, X):
93+
for _ in range(self.num_layers):
94+
attention_output = self.layers[_][0].forward(X)
95+
X = X + attention_output # Residual connection
96+
X = X + self.layers[_][1].forward(X) # Residual connection
97+
98+
return X
99+
100+
# Example usage
101+
102+
max_seq_len = 10
103+
d_model = 64
104+
num_heads = 4
105+
d_ff = 128
106+
num_layers = 2
107+
108+
pos_enc = PositionalEncoding(d_model, max_seq_len)
109+
X = np.random.randn(max_seq_len, d_model) # Input sequence
110+
positions = np.arange(max_seq_len)
111+
pos_encoding = pos_enc.get_positional_encoding(positions)
112+
113+
X_with_pos_enc = X + pos_encoding
114+
115+
transformer = Transformer(d_model, num_heads, d_ff, num_layers)
116+
output = transformer.forward(X_with_pos_enc)
117+
118+
import numpy as np
119+
120+
# Define the necessary classes and functions (same as the code provided)
121+
122+
# Example usage
123+
max_seq_len = 10
124+
d_model = 64
125+
num_heads = 4
126+
d_ff = 128
127+
num_layers = 2
128+
129+
# Tokenize the sentence
130+
sentence = "You are an alien."
131+
tokens = sentence.split() # Split by whitespace
132+
num_tokens = len(tokens)
133+
134+
# Encode the tokens
135+
token_to_id = {"You": 1, "are": 2, "an": 3, "alien": 4} # Remove the period from 'alien'
136+
encoded_tokens = [token_to_id[token] for token in tokens]
137+
138+
# Pad or truncate the sequence
139+
if num_tokens < max_seq_len:
140+
padded_tokens = encoded_tokens + [0] * (max_seq_len - num_tokens)
141+
else:
142+
padded_tokens = encoded_tokens[:max_seq_len]
143+
144+
# Convert the numerical sequence into a NumPy array
145+
X = np.array(padded_tokens)
146+
147+
# Apply positional encoding
148+
pos_enc = PositionalEncoding(d_model, max_seq_len)
149+
positions = np.arange(max_seq_len)
150+
pos_encoding = pos_enc.get_positional_encoding(positions)
151+
152+
# Add positional encodings to the input
153+
X_with_pos_enc = X + pos_encoding
154+
155+
# Create a Transformer model
156+
transformer = Transformer(d_model, num_heads, d_ff, num_layers)
157+
158+
# Process the input through the Transformer model
159+
output = transformer.forward(X_with_pos_enc)
160+
161+
# Print the output
162+
print(output)

Janex.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,22 @@ def Tokenize(input_string):
99
input_string = input_string.translate(str.maketrans("", "", string.punctuation))
1010
words = input_string.split()
1111

12+
words = stem_list(words)
13+
1214
return words
1315

16+
def Tokenize_List(input_list):
17+
stemmedwords = []
18+
for word in input_list:
19+
token = Tokenize(word)
20+
Tokenwords.append(token)
21+
22+
return Tokenwords
23+
1424
def train(intents_file_path):
1525
with open(intents_file_path, 'r') as json_data:
1626
intents = json.load(json_data)
27+
return intents
1728

1829
def patterncompare(input_string, intents_file_path):
1930
input_string = input_string.lower()
@@ -148,3 +159,11 @@ def stem_sentence(input_string):
148159
stemmedwords.append(word)
149160

150161
return stemmedwords
162+
163+
def stem_list(input_list):
164+
stemmedwords = []
165+
for word in input_list:
166+
stemmedword = stem(word)
167+
stemmedwords.append(stemmedword)
168+
169+
return stemmedwords

__pycache__/Janex.cpython-311.pyc

259 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)