Skip to content
This repository was archived by the owner on Feb 8, 2024. It is now read-only.

Commit cd2d793

Browse files
CipherCipher
authored andcommitted
Added text-input transformer
1 parent 0285e82 commit cd2d793

File tree

1 file changed

+129
-0
lines changed

1 file changed

+129
-0
lines changed

Experimental/TextTransformer.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
import numpy as np
2+
3+
def softmax(x, axis=-1):
4+
# Apply softmax operation to the input array along the specified axis
5+
e_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
6+
return e_x / np.sum(e_x, axis=axis, keepdims=True)
7+
8+
class PositionalEncoding:
9+
def __init__(self, d_model, max_seq_len):
10+
self.d_model = d_model
11+
self.max_seq_len = max_seq_len
12+
13+
def get_positional_encoding(self, positions):
14+
angles = np.arange(self.d_model) / self.d_model
15+
angles = angles[np.newaxis, :] # Shape: (1, d_model)
16+
17+
positions = positions[:, np.newaxis] # Shape: (max_seq_len, 1)
18+
angles = angles * (1 / np.power(10000, 2 * positions / self.d_model))
19+
angles[:, 0::2] = np.sin(angles[:, 0::2])
20+
angles[:, 1::2] = np.cos(angles[:, 1::2])
21+
22+
return angles # Shape: (max_seq_len, d_model)
23+
24+
class MultiHeadAttention:
25+
def __init__(self, d_model, num_heads):
26+
self.d_model = d_model
27+
self.num_heads = num_heads
28+
self.d_head = d_model // num_heads
29+
30+
self.W_q = np.random.randn(d_model, d_model)
31+
self.W_k = np.random.randn(d_model, d_model)
32+
self.W_v = np.random.randn(d_model, d_model)
33+
self.W_o = np.random.randn(d_model, d_model)
34+
35+
def attention(self, Q, K, V):
36+
scores = np.matmul(Q, K.T) / np.sqrt(self.d_head) # Shape: (num_heads, seq_len, seq_len)
37+
attention_weights = softmax(scores, axis=-1) # Apply softmax along the last axis
38+
39+
attended_values = np.matmul(attention_weights, V) # Shape: (num_heads, seq_len, d_head)
40+
return attended_values
41+
42+
def forward(self, X):
43+
Q = np.matmul(X, self.W_q)
44+
K = np.matmul(X, self.W_k)
45+
V = np.matmul(X, self.W_v)
46+
47+
Q_split = np.split(Q, self.num_heads, axis=-1)
48+
K_split = np.split(K, self.num_heads, axis=-1)
49+
V_split = np.split(V, self.num_heads, axis=-1)
50+
51+
attended_values = []
52+
for i in range(self.num_heads):
53+
attended_values.append(self.attention(Q_split[i], K_split[i], V_split[i]))
54+
55+
concatenated = np.concatenate(attended_values, axis=-1) # Shape: (seq_len, d_model)
56+
output = np.matmul(concatenated, self.W_o)
57+
58+
return output
59+
60+
class FeedForwardNetwork:
61+
def __init__(self, d_model, d_ff):
62+
self.d_model = d_model
63+
self.d_ff = d_ff
64+
65+
self.W_1 = np.random.randn(d_model, d_ff)
66+
self.W_2 = np.random.randn(d_ff, d_model)
67+
68+
def forward(self, X):
69+
hidden = np.matmul(X, self.W_1)
70+
hidden = np.maximum(hidden, 0) # Apply ReLU activation
71+
output = np.matmul(hidden, self.W_2)
72+
73+
return output
74+
75+
class Transformer:
76+
def __init__(self, d_model, num_heads, d_ff, num_layers):
77+
self.d_model = d_model
78+
self.num_heads = num_heads
79+
self.d_ff = d_ff
80+
self.num_layers = num_layers
81+
82+
self.layers = []
83+
for _ in range(num_layers):
84+
self.layers.append(
85+
(MultiHeadAttention(d_model, num_heads), FeedForwardNetwork(d_model, d_ff))
86+
)
87+
88+
def forward(self, X):
89+
for layer in self.layers:
90+
attention_output = layer[0].forward(X)
91+
X = X + attention_output # Residual connection
92+
X = X + layer[1].forward(X) # Residual connection
93+
94+
return X
95+
96+
# Example usage
97+
max_seq_len = 10
98+
d_model = 64
99+
num_heads = 4
100+
d_ff = 128
101+
num_layers = 2
102+
103+
pos_enc = PositionalEncoding(d_model, max_seq_len)
104+
105+
# Text preprocessing
106+
text = "This is a sample text input."
107+
tokens = text.split() # Split the text into tokens
108+
max_seq_len = max_seq_len # Specify the desired maximum sequence length
109+
110+
print(tokens)
111+
112+
# Convert tokens to numerical representation (e.g., using word embeddings)
113+
word_embeddings = {} # Replace with your word embeddings dictionary
114+
X = np.array([word_embeddings.get(token, np.zeros(d_model)) for token in tokens])
115+
116+
# Pad or truncate X to match the desired sequence length
117+
X = X[:max_seq_len]
118+
padding = np.zeros((max_seq_len - X.shape[0], d_model))
119+
X = np.vstack((X, padding))
120+
121+
positions = np.arange(max_seq_len)
122+
pos_encoding = pos_enc.get_positional_encoding(positions)
123+
124+
X_with_pos_enc = X + pos_encoding
125+
126+
transformer = Transformer(d_model, num_heads, d_ff, num_layers)
127+
output = transformer.forward(X_with_pos_enc)
128+
129+
print(output)

0 commit comments

Comments
 (0)