Updated on 2024-12-26

lxaw · lxaw · commit fdd5e758cf13 · 2024-12-26T12:15:48.000-05:00
diff --git a/papers/list.json b/papers/list.json
@@ -1,4 +1,13 @@
 [
+  {
+    "title": "Remix-DiT: Mixing Diffusion Transformers for Multi-Expert Denoising",
+    "author": "Gongfan Fang et al",
+    "year": "2024",
+    "topic": "dit, diffusion, moe",
+    "venue": "NeurIPS",
+    "description": "This paper introduces a method of mixing diffusion models for multi-expert denoising. Basically, they increase the width of the linear layers by a factor of K, and then modify the forward pass to support it. This allows for K experts that are initialized from the original weights. ",
+    "link": "https://arxiv.org/pdf/2412.05628"
+  },
   {
     "title": "Hymba: A Hybrid-head Architecture for Small Language Models",
     "author": "Xin Dong et al",
diff --git a/papers_read.html b/papers_read.html
@@ -16,10 +16,10 @@ <h1>Here's where I keep a list of papers I have read.</h1>
         I typically use this to organize papers I found interesting. Please feel free to do whatever you want with it. Note that this is not every single paper I have ever read, just a collection of ones that I remember to put down.
     </p>
     <p id="paperCount">
-        So far, we have read 194 papers. Let's keep it up!
+        So far, we have read 195 papers. Let's keep it up!
     </p> 
     <small id="searchCount">
-        Your search returned 194 papers. Nice! 
+        Your search returned 195 papers. Nice! 
     </small>
     
     <div class="search-inputs">
@@ -46,6 +46,16 @@ <h1>Here's where I keep a list of papers I have read.</h1>
         </thead>
         <tbody>
         
+            <tr>
+                <td>Remix-DiT: Mixing Diffusion Transformers for Multi-Expert Denoising</td>
+                <td>Gongfan Fang et al</td>
+                <td>2024</td>
+                <td>dit, diffusion, moe</td>
+                <td>NeurIPS</td>
+                <td>This paper introduces a method of mixing diffusion models for multi-expert denoising. Basically, they increase the width of the linear layers by a factor of K, and then modify the forward pass to support it. This allows for K experts that are initialized from the original weights. </td>
+                <td><a href="https://arxiv.org/pdf/2412.05628" target="_blank">Link</a></td>
+            </tr>
+        
             <tr>
                 <td>Hymba: A Hybrid-head Architecture for Small Language Models</td>
                 <td>Xin Dong et al</td>

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,13 @@`
`1`	`1`	`[`
	`2`	`+ {`
	`3`	`+ "title": "Remix-DiT: Mixing Diffusion Transformers for Multi-Expert Denoising",`
	`4`	`+ "author": "Gongfan Fang et al",`
	`5`	`+ "year": "2024",`
	`6`	`+ "topic": "dit, diffusion, moe",`
	`7`	`+ "venue": "NeurIPS",`
	`8`	`+ "description": "This paper introduces a method of mixing diffusion models for multi-expert denoising. Basically, they increase the width of the linear layers by a factor of K, and then modify the forward pass to support it. This allows for K experts that are initialized from the original weights. ",`
	`9`	`+ "link": "https://arxiv.org/pdf/2412.05628"`
	`10`	`+ },`
`2`	`11`	`{`
`3`	`12`	`"title": "Hymba: A Hybrid-head Architecture for Small Language Models",`
`4`	`13`	`"author": "Xin Dong et al",`