Updated on 2024-12-28

lxaw · lxaw · commit f10f7a864521 · 2024-12-28T08:34:18.000-05:00
diff --git a/index.html b/index.html
@@ -35,7 +35,7 @@ <h1>Where?</h1>
         </p>
         <h1>When?</h1>
         <p>
-        Last time this was edited was 2024-12-26 (YYYY/MM/DD).
+        Last time this was edited was 2024-12-28 (YYYY/MM/DD).
         </p>
         <small><a href="misc.html">misc</a></small>
     </div>
diff --git a/papers/list.json b/papers/list.json
@@ -1,4 +1,13 @@
 [
+  {
+    "title": "Hyper-Connections",
+    "author": "Defa Zhu et al",
+    "year": "2024",
+    "topic": "residual connections, hyper-connections",
+    "venue": "Arxiv",
+    "description": "This paper introduces hyper-connections, which is a novel alternative to residual connections. Basically, they introduce learnable depth and width connections.",
+    "link": "https://arxiv.org/pdf/2409.19606"
+  },
   {
     "title": "Remix-DiT: Mixing Diffusion Transformers for Multi-Expert Denoising",
     "author": "Gongfan Fang et al",
@@ -1050,7 +1059,7 @@
     "topic": "q-learning, reinforcement learning",
     "venue": "Arxiv",
     "description": "The authors present the first deep learning model that can learn complex control policies, and they teach it to play Atari 2600 games using Q-learning. Their goal was to create one net that can play as many games as possible.",
-    "link": "TODO"
+    "link": "https://arxiv.org/pdf/1312.5602"
   },
   {
     "title": "Deep Compression: Compressing Deep Neural Networks with Pruning, Trained Quantization and Huffman Encoding",
@@ -1059,7 +1068,7 @@
     "topic": "quantization, encoding, pruning",
     "venue": "ICML",
     "description": "A three-pronged approach to compressing nets. They prune networks, then quantize and share weights, and then apply Huffman encoding.",
-    "link": "TODO"
+    "link": "https://arxiv.org/pdf/1510.00149"
   },
   {
     "title": "Binarized Neural Networks: Training Neural Networks with Weights and Activations Constrained to +1 or -1",
@@ -1068,7 +1077,7 @@
     "topic": "quantization, efficiency, binary",
     "venue": "Arxiv",
     "description": "Introduction of training Binary Neural Networks, or nets with binary weights and activations. They also present experiments on deterministic vs stochastic binarization. They use the deterministic one for the most part, except for activations.",
-    "link": "TODO"
+    "link": "https://arxiv.org/pdf/1602.02830"
   },
   {
     "title": "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks",
@@ -1077,16 +1086,7 @@
     "topic": "efficiency, scaling",
     "venue": "ICML",
     "description": "A study of model scaling is presented. They propose a novel scaling method to uniformly scale all dimensions of depth/width/resolution using a compound coefficient. This paper presents a method for scaling width/depth/resolution; for instance, if you want to use 2^{N} more compute resources, then you can scale by their coefficients to do so. They also quantify the relationship between width, depth, and resolution.",
-    "link": "TODO"
-  },
-  {
-    "title": "2-in-1 Accelerator: Enabling Random Precision Switch for Winning Both Adversarial Robustness and Efficiency",
-    "author": "Yonggan Fu et al",
-    "year": "2021",
-    "topic": "precision, adversarial, efficiency",
-    "venue": "ACM",
-    "description": "Introduction of a Random Precision Switch algorithm that has potential for defending against adversarial attacks while promoting efficiency.",
-    "link": "TODO"
+    "link": "https://arxiv.org/pdf/1905.11946"
   },
   {
     "title": "The wake-sleep algorithm for unsupervised neural networks",
diff --git a/papers_read.html b/papers_read.html
@@ -46,6 +46,16 @@ <h1>Here's where I keep a list of papers I have read.</h1>
         </thead>
         <tbody>
         
+            <tr>
+                <td>Hyper-Connections</td>
+                <td>Defa Zhu et al</td>
+                <td>2024</td>
+                <td>residual connections, hyper-connections</td>
+                <td>Arxiv</td>
+                <td>This paper introduces hyper-connections, which is a novel alternative to residual connections. Basically, they introduce learnable depth and width connections.</td>
+                <td><a href="https://arxiv.org/pdf/2409.19606" target="_blank">Link</a></td>
+            </tr>
+        
             <tr>
                 <td>Remix-DiT: Mixing Diffusion Transformers for Multi-Expert Denoising</td>
                 <td>Gongfan Fang et al</td>
@@ -1213,7 +1223,7 @@ <h1>Here's where I keep a list of papers I have read.</h1>
                 <td>q-learning, reinforcement learning</td>
                 <td>Arxiv</td>
                 <td>The authors present the first deep learning model that can learn complex control policies, and they teach it to play Atari 2600 games using Q-learning. Their goal was to create one net that can play as many games as possible.</td>
-                <td><a href="TODO" target="_blank">Link</a></td>
+                <td><a href="https://arxiv.org/pdf/1312.5602" target="_blank">Link</a></td>
             </tr>
         
             <tr>
@@ -1223,7 +1233,7 @@ <h1>Here's where I keep a list of papers I have read.</h1>
                 <td>quantization, encoding, pruning</td>
                 <td>ICML</td>
                 <td>A three-pronged approach to compressing nets. They prune networks, then quantize and share weights, and then apply Huffman encoding.</td>
-                <td><a href="TODO" target="_blank">Link</a></td>
+                <td><a href="https://arxiv.org/pdf/1510.00149" target="_blank">Link</a></td>
             </tr>
         
             <tr>
@@ -1233,7 +1243,7 @@ <h1>Here's where I keep a list of papers I have read.</h1>
                 <td>quantization, efficiency, binary</td>
                 <td>Arxiv</td>
                 <td>Introduction of training Binary Neural Networks, or nets with binary weights and activations. They also present experiments on deterministic vs stochastic binarization. They use the deterministic one for the most part, except for activations.</td>
-                <td><a href="TODO" target="_blank">Link</a></td>
+                <td><a href="https://arxiv.org/pdf/1602.02830" target="_blank">Link</a></td>
             </tr>
         
             <tr>
@@ -1243,17 +1253,7 @@ <h1>Here's where I keep a list of papers I have read.</h1>
                 <td>efficiency, scaling</td>
                 <td>ICML</td>
                 <td>A study of model scaling is presented. They propose a novel scaling method to uniformly scale all dimensions of depth/width/resolution using a compound coefficient. This paper presents a method for scaling width/depth/resolution; for instance, if you want to use 2^{N} more compute resources, then you can scale by their coefficients to do so. They also quantify the relationship between width, depth, and resolution.</td>
-                <td><a href="TODO" target="_blank">Link</a></td>
-            </tr>
-        
-            <tr>
-                <td>2-in-1 Accelerator: Enabling Random Precision Switch for Winning Both Adversarial Robustness and Efficiency</td>
-                <td>Yonggan Fu et al</td>
-                <td>2021</td>
-                <td>precision, adversarial, efficiency</td>
-                <td>ACM</td>
-                <td>Introduction of a Random Precision Switch algorithm that has potential for defending against adversarial attacks while promoting efficiency.</td>
-                <td><a href="TODO" target="_blank">Link</a></td>
+                <td><a href="https://arxiv.org/pdf/1905.11946" target="_blank">Link</a></td>
             </tr>
         
             <tr>