|
45 | 45 | },
|
46 | 46 | {
|
47 | 47 | "cell_type": "code",
|
48 |
| - "execution_count": 292, |
| 48 | + "execution_count": 11, |
49 | 49 | "metadata": {},
|
50 | 50 | "outputs": [],
|
51 | 51 | "source": [
|
|
80 | 80 | },
|
81 | 81 | {
|
82 | 82 | "cell_type": "code",
|
83 |
| - "execution_count": 293, |
| 83 | + "execution_count": 12, |
84 | 84 | "metadata": {},
|
85 | 85 | "outputs": [
|
86 | 86 | {
|
|
103 | 103 | },
|
104 | 104 | {
|
105 | 105 | "cell_type": "code",
|
106 |
| - "execution_count": 294, |
| 106 | + "execution_count": 13, |
107 | 107 | "metadata": {},
|
108 | 108 | "outputs": [
|
109 | 109 | {
|
110 | 110 | "data": {
|
111 | 111 | "text/html": [
|
112 |
| - "<div id=\"circuits-vis-7e4c8a75-1335\" style=\"margin: 15px 0;\"/>\n", |
| 112 | + "<div id=\"circuits-vis-1f2a8687-9cd7\" style=\"margin: 15px 0;\"/>\n", |
113 | 113 | " <script crossorigin type=\"module\">\n",
|
114 |
| - " import { render, Hello } from \"https://unpkg.com/circuitsvis@1.43.0/dist/cdn/esm.js\";\n", |
| 114 | + " import { render, Hello } from \"https://unpkg.com/circuitsvis@1.43.2/dist/cdn/esm.js\";\n", |
115 | 115 | " render(\n",
|
116 |
| - " \"circuits-vis-7e4c8a75-1335\",\n", |
| 116 | + " \"circuits-vis-1f2a8687-9cd7\",\n", |
117 | 117 | " Hello,\n",
|
118 | 118 | " {\"name\": \"Neel\"}\n",
|
119 | 119 | " )\n",
|
120 | 120 | " </script>"
|
121 | 121 | ],
|
122 | 122 | "text/plain": [
|
123 |
| - "<circuitsvis.utils.render.RenderedHTML at 0xffff10cc9f10>" |
| 123 | + "<circuitsvis.utils.render.RenderedHTML at 0x7f21437f1c30>" |
124 | 124 | ]
|
125 | 125 | },
|
126 |
| - "execution_count": 294, |
127 |
| - "metadata": { |
128 |
| - "text/html": { |
129 |
| - "Content-Type": "text/html" |
130 |
| - } |
131 |
| - }, |
| 126 | + "execution_count": 13, |
| 127 | + "metadata": {}, |
132 | 128 | "output_type": "execute_result"
|
133 | 129 | }
|
134 | 130 | ],
|
|
140 | 136 | },
|
141 | 137 | {
|
142 | 138 | "cell_type": "code",
|
143 |
| - "execution_count": 295, |
| 139 | + "execution_count": 14, |
144 | 140 | "metadata": {},
|
145 | 141 | "outputs": [],
|
146 | 142 | "source": [
|
|
158 | 154 | },
|
159 | 155 | {
|
160 | 156 | "cell_type": "code",
|
161 |
| - "execution_count": 296, |
| 157 | + "execution_count": 15, |
162 | 158 | "metadata": {},
|
163 | 159 | "outputs": [],
|
164 | 160 | "source": [
|
|
179 | 175 | },
|
180 | 176 | {
|
181 | 177 | "cell_type": "code",
|
182 |
| - "execution_count": 297, |
| 178 | + "execution_count": 16, |
183 | 179 | "metadata": {},
|
184 | 180 | "outputs": [
|
185 | 181 | {
|
186 | 182 | "data": {
|
187 | 183 | "text/plain": [
|
188 |
| - "<torch.autograd.grad_mode.set_grad_enabled at 0xffff425948e0>" |
| 184 | + "<torch.autograd.grad_mode.set_grad_enabled at 0x7f213de735e0>" |
189 | 185 | ]
|
190 | 186 | },
|
191 |
| - "execution_count": 297, |
| 187 | + "execution_count": 16, |
192 | 188 | "metadata": {},
|
193 | 189 | "output_type": "execute_result"
|
194 | 190 | }
|
|
254 | 250 | },
|
255 | 251 | {
|
256 | 252 | "cell_type": "code",
|
257 |
| - "execution_count": 299, |
| 253 | + "execution_count": 17, |
258 | 254 | "metadata": {},
|
259 | 255 | "outputs": [],
|
260 | 256 | "source": [
|
|
263 | 259 | },
|
264 | 260 | {
|
265 | 261 | "cell_type": "code",
|
266 |
| - "execution_count": 300, |
| 262 | + "execution_count": 18, |
267 | 263 | "metadata": {},
|
268 | 264 | "outputs": [
|
269 | 265 | {
|
|
1210 | 1206 | },
|
1211 | 1207 | {
|
1212 | 1208 | "cell_type": "code",
|
1213 |
| - "execution_count": 314, |
| 1209 | + "execution_count": 19, |
1214 | 1210 | "metadata": {},
|
1215 | 1211 | "outputs": [
|
1216 | 1212 | {
|
1217 | 1213 | "name": "stdout",
|
1218 | 1214 | "output_type": "stream",
|
1219 | 1215 | "text": [
|
1220 | 1216 | "blocks.0.attn.W_Q torch.Size([12, 768, 64])\n",
|
1221 |
| - "blocks.0.attn.W_K torch.Size([12, 768, 64])\n", |
1222 |
| - "blocks.0.attn.W_V torch.Size([12, 768, 64])\n", |
1223 | 1217 | "blocks.0.attn.W_O torch.Size([12, 64, 768])\n",
|
1224 | 1218 | "blocks.0.attn.b_Q torch.Size([12, 64])\n",
|
| 1219 | + "blocks.0.attn.b_O torch.Size([768])\n", |
| 1220 | + "blocks.0.attn.W_K torch.Size([12, 768, 64])\n", |
| 1221 | + "blocks.0.attn.W_V torch.Size([12, 768, 64])\n", |
1225 | 1222 | "blocks.0.attn.b_K torch.Size([12, 64])\n",
|
1226 | 1223 | "blocks.0.attn.b_V torch.Size([12, 64])\n",
|
1227 |
| - "blocks.0.attn.b_O torch.Size([768])\n", |
1228 | 1224 | "blocks.0.mlp.W_in torch.Size([768, 3072])\n",
|
1229 | 1225 | "blocks.0.mlp.b_in torch.Size([3072])\n",
|
1230 | 1226 | "blocks.0.mlp.W_out torch.Size([3072, 768])\n",
|
|
1247 | 1243 | },
|
1248 | 1244 | {
|
1249 | 1245 | "cell_type": "code",
|
1250 |
| - "execution_count": 315, |
| 1246 | + "execution_count": 20, |
1251 | 1247 | "metadata": {},
|
1252 | 1248 | "outputs": [
|
1253 | 1249 | {
|
|
0 commit comments