Merge pull request #32 from akhilsinghal1234/ogle

akhilsinghal1234 · web-flow · commit f6559ca9d6e6 · 2019-05-14T12:52:26.000+05:30
Added OGLE variable star classification
diff --git a/astropy_helpers b/astropy_helpers
@@ -1 +1 @@
-Subproject commit 587610315f783f2164a5b478d57831380ff01c0b
+Subproject commit 9f82aac6c2141b425e2d639560f7260189d90b54
diff --git a/mirapy/classifiers/models.py b/mirapy/classifiers/models.py
@@ -1,6 +1,7 @@
 import os
 from keras.optimizers import *
 from keras.models import load_model, Sequential
+# from keras.layers import Input, Dense, LSTM, Dropout
 from keras.layers import *
 import matplotlib.pyplot as plt
 
@@ -10,6 +11,7 @@ def __init__(self):
         self.model = None
         self.optimizer = None
         self.activation = None
+        self.history = None
 
     def compile(self, optimizer, loss='mean_squared_error'):
         pass
@@ -20,8 +22,8 @@ def save_model(self, model_name, path):
     def load_model(self, model_name, path):
         pass
 
-    def train(self, x_train, y_train, epochs=100, batch_size=32,
-              validation_split=0.1):
+    def train(self, x_train, y_train, epochs, batch_size, reset_weights,
+              class_weight, validation_data, verbose):
         pass
 
     def predict(self, x):
@@ -45,10 +47,8 @@ class XRayBinaryClassifier(Classifier):
     """
     build pre-worked model
     """
-    def __init__(self, activation='relu',
-                 optimizer=Adam(lr=0.0001, decay=1e-6)):
+    def __init__(self, activation='relu'):
         self.activation = activation
-        self.optimizer = optimizer
 
         model = Sequential()
         model.add(Dense(32, input_shape=(3,), activation=self.activation))
@@ -57,13 +57,27 @@ def __init__(self, activation='relu',
         model.add(Dense(3, activation='softmax'))
         self.model = model
 
-    def compile(self, loss='mean_squared_error'):
+    def compile(self, optimizer=Adam(lr=0.0001, decay=1e-6),
+                loss='mean_squared_error'):
         """
         build the model
         """
+        self.optimizer = optimizer
         self.model.compile(self.optimizer,
                            loss=loss, metrics=['accuracy'])
 
+    def train(self, x_train, y_train, epochs=50, batch_size=100,
+              reset_weights=True, class_weight=None, validation_data=None,
+              verbose=1):
+        if reset_weights:
+            self.reset()
+
+        self.history = self.model.fit(x_train, y_train, batch_size=batch_size,
+                                      epochs=epochs,
+                                      validation_data=validation_data,
+                                      class_weight=class_weight, shuffle=True,
+                                      verbose=verbose)
+
     def save_model(self, model_name, path='models/'):
         """
         save model
@@ -81,27 +95,16 @@ def load_model(self, model_name, path='models/'):
         else:
             raise FileNotFoundError("Model does not exists")
 
-    def train(self, x_train, y_train, epochs=100, batch_size=32,
-              validation_split=0.1):
-
-        if not isinstance(x_train, np.ndarray) and\
-                isinstance(y_train, np.ndarray):
-            raise ValueError('Input array should be numpy arrays')
-
-        self.model.fit(x_train, y_train, epochs=epochs, shuffle=True,
-                       batch_size=batch_size,
-                       validation_split=validation_split)
-
     def test(self, x_test):
         return self.model.predict_classes(x_test)
 
 
 class AtlasVarStarClassifier(Classifier):
 
-    def __init__(self, activation='relu', optimizer=Adam(lr=0.01, decay=0.01),
-                 input_size=22, num_classes=9):
+    def __init__(self, activation='relu', input_size=22, num_classes=9):
         self.activation = activation
-        self.optimizer = optimizer
+        self.history = None
+
         model = Sequential()
         model.add(Dense(64, input_shape=(input_size,),
                         activation=self.activation))
@@ -111,13 +114,27 @@ def __init__(self, activation='relu', optimizer=Adam(lr=0.01, decay=0.01),
         model.add(Dense(num_classes, activation='softmax'))
         self.model = model
 
-    def compile(self, loss='mean_squared_error'):
+    def compile(self, optimizer=Adam(lr=0.01, decay=0.01),
+                loss='mean_squared_error'):
         """
         build the model
         """
+        self.optimizer = optimizer
         self.model.compile(self.optimizer,
                            loss=loss, metrics=['accuracy'])
 
+    def train(self, x_train, y_train, epochs=50, batch_size=100,
+              reset_weights=True, class_weight=None, validation_data=None,
+              verbose=1):
+        if reset_weights:
+            self.reset()
+
+        self.history = self.model.fit(x_train, y_train, batch_size=batch_size,
+                                      epochs=epochs,
+                                      validation_data=validation_data,
+                                      class_weight=class_weight, shuffle=True,
+                                      verbose=verbose)
+
     def save_model(self, model_name, path='models/'):
         """
         save model
@@ -135,19 +152,45 @@ def load_model(self, model_name, path='models/'):
         else:
             raise FileNotFoundError("Model does not exists")
 
-    def train(self, x_train, y_train, epochs=100, batch_size=32,
-              validation_split=0.1):
+    def test(self, x_test):
+        return self.model.predict_classes(x_test)
 
-        if not isinstance(x_train, np.ndarray) and\
-                isinstance(y_train, np.ndarray):
-            raise ValueError('Input array should be numpy arrays')
 
-        self.model.fit(x_train, y_train, epochs=epochs, shuffle=True,
-                       batch_size=batch_size,
-                       validation_split=validation_split)
+class OGLEClassifier(Classifier):
 
-    def test(self, x_test):
-        return self.model.predict_classes(x_test)
+    def __init__(self, activation='relu', input_size=50, num_classes=5):
+        self.activation = activation
+        self.history = None
+
+        model = Sequential()
+        model.add(LSTM(units=64, input_shape=(input_size, 1)))
+        model.add(Dense(64, activation=self.activation))
+        model.add(Dropout(0.2))
+        model.add(Dense(16, activation=self.activation))
+        model.add(Dense(num_classes, activation='softmax'))
+        self.model = model
+
+    def compile(self, optimizer='adam', loss='categorical_crossentropy'):
+        """
+        build the model
+        """
+        self.optimizer = optimizer
+        self.model.compile(self.optimizer, loss=loss, metrics=['accuracy'])
+
+    def train(self, x_train, y_train, epochs=50, batch_size=100,
+              reset_weights=True, class_weight=None, validation_data=None,
+              verbose=1):
+        if reset_weights:
+            self.reset()
+
+        self.history = self.model.fit(x_train, y_train, batch_size=batch_size,
+                                      epochs=epochs,
+                                      validation_data=validation_data,
+                                      class_weight=class_weight, shuffle=True,
+                                      verbose=verbose)
+
+    def predict(self, x):
+        return self.model.predict_classes(x)
 
 
 class HTRU1Classifier(Classifier):
diff --git a/mirapy/data/load_dataset.py b/mirapy/data/load_dataset.py
@@ -136,6 +136,37 @@ def load_atlas_star_data(path, test_split, standard_scaler=True,
     return x_train, y_train, x_test, y_test
 
 
+# handle class inequality
+def load_ogle_dataset(path, classes, test_split=0.2, time_len=50, pad=False):
+    mag, y = [], []
+    for class_ in classes:
+        folder = path + '/' + class_ + '/I'
+        for file in os.listdir(folder):
+            num_lines = sum(1 for line in open(folder + '/' + file))
+            mag_i, j = [0 for i in range(time_len)], 0
+
+            if not pad and num_lines < time_len:
+                continue
+            for line in open(folder + '/' + file):
+                try:
+                    _, b, _ = line.split(' ')
+                except Exception:
+                    break
+                mag_i[j] = float(b)
+                j += 1
+                if j is time_len or j is num_lines:
+                    mag.append(np.array(mag_i))
+                    y.append(classes.index(class_))
+                    break
+
+    mag = np.array(mag)
+    y = np.array(y)
+    mag = mag.reshape(mag.shape[0], mag.shape[1], 1)
+    x_train, x_test, y_train, y_test = \
+        train_test_split(mag, y, test_size=test_split, random_state=42)
+    return x_train, y_train, x_test, y_test
+
+
 def load_htru1_data(data_dir='htru1-batches-py'):
     x_train = None
     y_train = []