|
1 | 1 | [
|
| 2 | + { |
| 3 | + "title": "SMASH: One-Shot Model Architecture Search through Hypernetworks", |
| 4 | + "author": "Tomer Volk et al", |
| 5 | + "year": "2023", |
| 6 | + "topic": "hypernetworks, multi-source adaptation, unseen domains, NLP", |
| 7 | + "venue": "EMNLP", |
| 8 | + "description": "The authors apply hypernets to unsupervised domain adaptation in NLP. They use example-based adaptation. The main idea is that they use an encoder-decoder to initially create the unique signatures from an input example, and then they embed it within the source domain's semantic space. The signature is then used by a hypernet to generate the task classifier's weights. The paper focuses on improving generalization to unseen domains by explicitly modeling the shared and domain specific characteristics of the input. To allow for parameter sharing, they propose modeling based on hypernets, which allow soft weight sharing. ", |
| 9 | + "link": "https://aclanthology.org/2023.findings-emnlp.610.pdf" |
| 10 | + }, |
| 11 | + { |
| 12 | + "title": "Example-based Hypernetworks for Multi-source Adaptation to Unseen Domains", |
| 13 | + "author": "Tomer Volk et al", |
| 14 | + "year": "2023", |
| 15 | + "topic": "hypernetworks, multi-source adaptation, unseen domains, NLP", |
| 16 | + "venue": "EMNLP", |
| 17 | + "description": "The authors apply hypernets to unsupervised domain adaptation in NLP. They use example-based adaptation. The main idea is that they use an encoder-decoder to initially create the unique signatures from an input example, and then they embed it within the source domain's semantic space. The signature is then used by a hypernet to generate the task classifier's weights. The paper focuses on improving generalization to unseen domains by explicitly modeling the shared and domain specific characteristics of the input. To allow for parameter sharing, they propose modeling based on hypernets, which allow soft weight sharing. ", |
| 18 | + "link": "https://aclanthology.org/2023.findings-emnlp.610.pdf" |
| 19 | + }, |
| 20 | + { |
| 21 | + "title": "Meta-Learning via Hypernetworks", |
| 22 | + "author": "Dominic Zhao et al", |
| 23 | + "year": "2020", |
| 24 | + "topic": "hypernetworks, meta-learning", |
| 25 | + "venue": "NeurIps Workshop", |
| 26 | + "description": "The authors propose a soft weight-sharing hypernet architecture that performs well on meta-learning tasks. A good paper to show efforts in meta-learning with regards to hypernets, and comparing them to SOTA methods like Model-Agnostic Meta-Learning (MAML).", |
| 27 | + "link": "https://neurips.cc/virtual/2020/20189" |
| 28 | + }, |
| 29 | + { |
| 30 | + "title": "HyperDynamics: Meta-Learning Object and Agent Dynamics with Hypernetworks", |
| 31 | + "author": "Zhou Xian et al", |
| 32 | + "year": "2021", |
| 33 | + "topic": "hypernetworks, meta-learning, dynamics", |
| 34 | + "venue": "ICLR", |
| 35 | + "description": "The authors present a dynamics meta-learning framework which conditions on an agent's interations w/ env and (optionally) the visual input from it. From this, they can generate params of a neural dynamics model. The three modules they use are 1) an encoding module that encodes a few agent-env interations / agent's visual observations into a feature code, 2) a hypernet that conditions on the latent feature code to generate params of a dynamic model dedicated to this observed system, and 3) a target dynamics model that is made using the generated parameters, and takes input as a low-dim system state / agent action and outputs the prediction of next system state.", |
| 36 | + "link": "https://arxiv.org/pdf/2103.09439" |
| 37 | + }, |
| 38 | + { |
| 39 | + "title": "Principled Weight Initialization for Hypernetworks", |
| 40 | + "author": "Oscar Chang et al", |
| 41 | + "year": "2020", |
| 42 | + "topic": "hypernetworks, weight initialization", |
| 43 | + "venue": "ICLR", |
| 44 | + "description": "Classical weight initialization techniques don't really work on hypernets, because they fail to produce weights for the mainnet in the correct scale. The authors derive formulas for hyperfan-out and hyperfan-in weight initialization, and show that it works well for the mainnet.", |
| 45 | + "link": "https://arxiv.org/pdf/2312.08399" |
| 46 | + }, |
| 47 | + { |
| 48 | + "title": "Continual Learning with Hypernetworks", |
| 49 | + "author": "Johannes von Oswald et al", |
| 50 | + "year": "2020", |
| 51 | + "topic": "hypernetworks, continual learning, meta learning", |
| 52 | + "venue": "ICLR", |
| 53 | + "description": "The authors present a method of preventing catastrophic forgetting, by using task-conditioned hypernets (i.e., hypernets that generate weights of target model based on some task embedding). Thus, rather than memorizing many data characteristics, we can split the problem into just learning a single point per task, given the task embedding.", |
| 54 | + "link": "https://arxiv.org/pdf/1906.00695" |
| 55 | + }, |
| 56 | + { |
| 57 | + "title": "Stochastic Hyperparameter Optimization through Hypernetworks", |
| 58 | + "author": "Jonathan Lorraine et al", |
| 59 | + "year": "2018", |
| 60 | + "topic": "hypernetworks, hyperparameters", |
| 61 | + "venue": "ICLR", |
| 62 | + "description": "Using hypernetworks to learn hyperparameters. They replace the training optimization loop in favor of a differentiable hypernetwork to allow for tuning of hyperparameters using grad descent.", |
| 63 | + "link": "https://arxiv.org/pdf/1802.09419" |
| 64 | + }, |
2 | 65 | {
|
3 | 66 | "title": "Playing Atari with Deep Reinforcement Learning",
|
4 | 67 | "author": "Volodymyr Mnih et al",
|
|
0 commit comments