Merge pull request #35 from bhavnicksm/main

bhavnicksm · web-flow · commit cda9685ad95d · 2023-04-30T18:21:19.000+05:30
Add AdaBelief; Update README
diff --git a/README.md b/README.md
@@ -55,20 +55,22 @@ optimizer.step()
 
 # Supported Optimisers
 
-| Optimiser 	| Paper 	                                            |
-|:---------:	|:-----:	                                            |
-|  **SGD**  	| https://paperswithcode.com/method/sgd                 |
-|  **Momentum** | https://paperswithcode.com/method/sgd-with-momentum   |
-|  **NAG**      | https://jlmelville.github.io/mize/nesterov.html       |
+| Optimiser 	| Paper 	                                                 |
+|:---------:	|:-----:	                                                 |
+|  **SGD**  	| https://paperswithcode.com/method/sgd                      |
+|  **Momentum** | https://paperswithcode.com/method/sgd-with-momentum        |
+|  **NAG**      | https://jlmelville.github.io/mize/nesterov.html            |
 |  **Adagrad** 	| https://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf |
-|  **RMSProp** 	| https://paperswithcode.com/method/rmsprop             |
-|  **Adam**     | https://arxiv.org/abs/1412.6980v9                     |
-|  **Adamax**   | https://arxiv.org/abs/1412.6980v9                     |
-|  **AdamW**    | https://arxiv.org/abs/1711.05101v3                    |
-|  **Adadelta** | https://arxiv.org/abs/1212.5701v1                     |
-|  **AMSGrad**    | https://arxiv.org/abs/1904.09237v1                  |
-|  **RAdam**    | https://arxiv.org/abs/1908.03265v4                    |
-|  **Lion**     | https://arxiv.org/abs/2302.06675                      |
+|  **RMSProp** 	| https://paperswithcode.com/method/rmsprop                  |
+|  **Adam**     | https://arxiv.org/abs/1412.6980v9                          |
+|  **Adamax**   | https://arxiv.org/abs/1412.6980v9                          |
+|  **AdamW**    | https://arxiv.org/abs/1711.05101v3                         |
+|  **Adadelta** | https://arxiv.org/abs/1212.5701v1                          |
+|  **AMSGrad**  | https://arxiv.org/abs/1904.09237v1                         |
+|  **RAdam**    | https://arxiv.org/abs/1908.03265v4                         |
+|  **Lion**     | https://arxiv.org/abs/2302.06675                           |
+|  **AdaBelief**| https://arxiv.org/pdf/2010.07468v5.pdf                     |
+|  **NAdam**    | http://cs229.stanford.edu/proj2015/054_report.pdf          |
 
 # Acknowledgements
 
diff --git a/src/nadir/adabelief.py b/src/nadir/adabelief.py
@@ -0,0 +1,52 @@
+### Copyright 2023 [Dawn Of Eve]
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict, Any, Optional
+from dataclasses import dataclass
+
+import torch
+
+from .adam import Adam, AdamConfig
+
+__all__ = ['Adabelief', 'AdabeliefConfig']
+
+@dataclass
+class AdabeliefConfig(AdamConfig):
+  lr : float = 3E-4
+  nesterov : bool = True
+
+class Adabelief(Adam):
+  def __init__ (self, params, config : AdabeliefConfig = AdabeliefConfig()):
+    super().__init__(params, config)
+    self.config = config
+  
+  @Adam.amsgrad
+  def adaptivity(self, 
+                 state, 
+                 grad):
+    
+    step = state['step']
+    v = state['adaptivity']
+    m = state['momentum']
+    beta_2 = self.config.beta_2
+    bias_correction = self.config.bias_correction
+
+    v.mul_(beta_2).addcmul_(grad - m, grad - m, value = (1 - beta_2))
+
+    if bias_correction:
+      v_hat = v.div(1 - beta_2**(step + 1))
+    else:
+      v_hat = v
+    
+    state['adaptivity'] = v
+    return torch.sqrt(v_hat + self.config.eps)