Merge branch 'devel'

Miklnl · Miklnl · commit 219cfb5beae9 · 2019-07-17T17:07:14.000+02:00
diff --git a/.travis.yml b/.travis.yml
@@ -22,10 +22,27 @@ jobs:
 
 # command to install dependencies
 install:
-  - pip install -r requirements.txt
+  - pip install --upgrade pip
+  - pip uninstall numpy Bottleneck -y
+  - pip install numpy==1.15.4 --no-cache-dir
+  - pip install -r requirements.txt --no-cache-dir
 script:
   - pytest
 branches:
   only:
     - master
-    - devel
+    - devel
+    - /^v\d+\.\d+.+$/
+
+# Deploy to GitHub Releases, making a release of the
+# code whenever a new tag is added on the master branch
+deploy:
+  provider: releases
+  api_key: $GITHUB_TOKEN
+  skip_cleanup: true
+  draft: true
+  on:
+    branch:
+      - master
+      - /^v\d+\.\d+.+$/
+    tags: true
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-master-v0.1
+master-v1.0.9
diff --git a/corrections/cbv_corrector/GOC_code.py b/corrections/cbv_corrector/GOC_code.py
@@ -8,6 +8,7 @@
 
 import os
 import numpy as np
+import math as m
 import logging
 from tqdm import tqdm
 from bottleneck import nanmean, nanmedian
@@ -17,23 +18,105 @@
 import six
 from lightkurve import TessLightCurve
 from astropy.io import fits
-
+from astropy.stats import LombScargle
 
 from .cbv_main import lc_matrix_calc
 from ..plots import plt
 from ..quality import CorrectorQualityFlags, TESSQualityFlags
-from .manual_filters import manual_exclude
+from ..manual_filters import manual_exclude
+
+import matplotlib.pyplot as pl
 
 # =============================================================================
 # 
 # =============================================================================
 
-def wn(ori_flux, corrected_flux):
-	"""Calculate added white nosie between two light curves"""
+def psd_scargle(time, flux, Nsample = 10.):
+	"""
+	   Calculate the power spectral density using the Lomb-Scargle (L-S) periodogram
+	   
+	   Parameters:
+	        time (numpy array, float): time stamps of the light curve
+	        flux (numpy array, float): the flux variations of the light curve
+	        Nsample (optional, float): oversampling rate for the periodogram. Default value = 10.
+	   
+	   Returns:
+	        fr (numpy array, float): evaluated frequency values in the domain of the periodogram
+	        sc (numpy array, float): the PSD values of the L-S periodogram
+	
+	.. codeauthor:: Timothy Van Reeth <timothy.vanreeth@kuleuven.be>
+	"""
+	ndata = len(time)                                            # The number of data points
+	fnyq = 0.5/np.median(time[1:]-time[:-1])                     # the Nyquist frequency
+	fres = 1./(time[-1]-time[0])                                 # the frequency resolution
+	fr = np.arange(0.,fnyq,fres/float(Nsample))                  # the frequencies
+	sc1 = LombScargle(time, flux).power(fr, normalization='psd')   # The non-normalized Lomb-Scargle "power"
+	
+	# Computing the appropriate rescaling factors (to convert to astrophysical units)
+	fct = m.sqrt(4./ndata)
+	T = time.ptp()
+	sc = fct**2. * sc1 * T
+	
+	# Ensuring the output does not contain nans
+	if(np.isnan(sc).any()):
+	    fr = fr[~np.isnan(sc)]
+	    sc = sc[~np.isnan(sc)]
+	
+	return fr, sc
+
+
+
+def wn(ori_lc, corrected_lc, alpha_n = 1.):
+	"""
+	   Calculate added white noise between two light curves.
+	   Based on Eq. 8.4-8.5 in the Kepler PDC 
+	   
+	   Parameters:
+	        ori_lc (light kurve object): the uncorrected TESS light curve
+	        corrected_lc (light kurve object): the corrected TESS light curve
+	        alpha_n (optional, float): scaling factor. Default value = 1.
+	        
+	   Returns:
+	        Gn (float): goodness metric for the added white noise.
+	                    In the limit where ori_lc and corrected_lc are identical, Gn approaches 0.
+	                    In the (improbable?) case where noise is removed instead of added, Gn = -1.
+	                    
+	
+	.. codeauthor:: Timothy Van Reeth <timothy.vanreeth@kuleuven.be>
+	"""
+	
+	# Excluding nans from the input LCs to avoid problems
+	ori_time0 = ori_lc.time[~np.isnan(ori_lc.flux)]
+	ori_flux0 = ori_lc.flux[~np.isnan(ori_lc.flux)]
+	corr_time0 = corrected_lc.time[~np.isnan(corrected_lc.flux)]
+	corr_flux0 = corrected_lc.flux[~np.isnan(corrected_lc.flux)]
+	
+	# Calculating the Noise floor of both LCs, defined as the differences between adjacent flux values
+	ori_time = ori_time0[:-1]
+	ori_Nf = ori_flux0[1:] - ori_flux0[:-1]
 	
-	pass
+	corr_time = corr_time0[:-1]
+	corr_Nf = corr_flux0[1:] - corr_flux0[:-1]
+	
+	# Computing the PSDs of the noise floors
+	corr_fr,corr_psd = psd_scargle(corr_time, corr_Nf - np.mean(corr_Nf))
+	ori_fr,ori_psd = psd_scargle(ori_time, ori_Nf - np.mean(ori_Nf))
+	
+	# Ensuring both PSDs are evaluated for the same frequencies
+	int_corr_psd = np.interp(ori_fr, corr_fr, corr_psd)
+	
+	# Integrate the log of the ratio of PSDs, ensuring the integral exists
+	if(np.r_[int_corr_psd < ori_psd].all()):
+	    Gn = -1.
+	else:
+	    integrand = np.log10(int_corr_psd/ori_psd)
+	    integrand[np.r_[int_corr_psd < ori_psd]] = 0.
+	    Gn = alpha_n * np.trapz(integrand, x=ori_fr)
+	
+	return Gn
+
+
 
-	# Check Kepler PDC Eq. 8.4-8.5
 
 
 class LCValidation(object):
@@ -185,6 +268,7 @@ def load_lightcurve(self, task, ver='RAW'):
 
 		return lc
 
+
 	def search_database(self, select=None, search=None, order_by=None, limit=None, distinct=False):
 		"""
 		Search list of lightcurves and return a list of tasks/stars matching the given criteria.
@@ -352,6 +436,7 @@ def correlations(self, cbv_area):
 	def added_noise(self):
 		
 		#call wn on loaded targets
+		
 		pass
 	
 	
@@ -453,4 +538,4 @@ def added_noise(self):
 	
 
 
-			
+			
diff --git a/corrections/taskmanager.py b/corrections/taskmanager.py
@@ -43,6 +43,7 @@ def __init__(self, todo_file, cleanup=False, overwrite=False, summary=None, summ
 		self.conn = sqlite3.connect(todo_file)
 		self.conn.row_factory = sqlite3.Row
 		self.cursor = self.conn.cursor()
+		self.cursor.execute("PRAGMA foreign_keys=ON;")
 
 		self.summary_file = summary
 		self.summary_interval = summary_interval
@@ -63,6 +64,10 @@ def __init__(self, todo_file, cleanup=False, overwrite=False, summary=None, summ
 			self.cursor.execute("CREATE INDEX corr_status_idx ON todolist (corr_status);")
 			self.conn.commit()
 
+		# Create indicies
+		self.cursor.execute("CREATE INDEX IF NOT EXISTS datavalidation_raw_approved_idx ON datavalidation_raw (approved);")
+		self.conn.commit()
+
 		# Reset the status of everything for a new run:
 		if overwrite:
 			self.cursor.execute("UPDATE todolist SET corr_status=NULL;")
@@ -73,7 +78,7 @@ def __init__(self, todo_file, cleanup=False, overwrite=False, summary=None, summ
 		self.cursor.execute("""CREATE TABLE IF NOT EXISTS diagnostics_corr (
 			priority INT PRIMARY KEY NOT NULL,
 			lightcurve TEXT,
-			elaptime REAL NOT NULL,
+			elaptime REAL,
 			worker_wait_time REAL NOT NULL,
 			variance DOUBLE PRECISION,
 			rms_hour DOUBLE PRECISION,
@@ -84,18 +89,22 @@ def __init__(self, todo_file, cleanup=False, overwrite=False, summary=None, summ
 		self.conn.commit()
 
 		# Reset calculations with status STARTED or ABORT:
-		clear_status = str(STATUS.STARTED.value) + ',' + str(STATUS.ABORT.value)
+		clear_status = str(STATUS.STARTED.value) + ',' + str(STATUS.ABORT.value) + ',' +  str(STATUS.ERROR.value)
 		self.cursor.execute("DELETE FROM diagnostics_corr WHERE priority IN (SELECT todolist.priority FROM todolist WHERE corr_status IN (" + clear_status + "));")
 		self.cursor.execute("UPDATE todolist SET corr_status=NULL WHERE corr_status IN (" + clear_status + ");")
 		self.conn.commit()
-		
+
+		# Analyze the tables for better query planning:
+		self.cursor.execute("ANALYZE;")
+
 		# Prepare summary object:
 		self.summary = {
 			'slurm_jobid': os.environ.get('SLURM_JOB_ID', None),
 			'numtasks': 0,
 			'tasks_run': 0,
 			'last_error': None,
-			'mean_elaptime': 0.0
+			'mean_elaptime': None,
+			'mean_worker_waittime': None
 		}
 		# Make sure to add all the different status to summary:
 		for s in STATUS: self.summary[s.name] = 0
@@ -154,10 +163,9 @@ def get_number_tasks(self, starid=None, camera=None, ccd=None, datasource=None):
 		else:
 			constraints = ''
 
-		self.cursor.execute("SELECT COUNT(*) AS num FROM todolist INNER JOIN diagnostics ON todolist.priority=diagnostics.priority INNER JOIN datavalidation_raw ON todolist.priority=datavalidation_raw.priority WHERE status IN (%d,%d) AND (corr_status IS NULL OR corr_status = %d) AND datavalidation_raw.approved=1 %s ORDER BY todolist.priority LIMIT 1;" % (
+		self.cursor.execute("SELECT COUNT(*) AS num FROM todolist INNER JOIN diagnostics ON todolist.priority=diagnostics.priority INNER JOIN datavalidation_raw ON todolist.priority=datavalidation_raw.priority WHERE status IN (%d,%d) AND corr_status IS NULL AND datavalidation_raw.approved=1 %s ORDER BY todolist.priority LIMIT 1;" % (
 			STATUS.OK.value,
 			STATUS.WARNING.value,
-			STATUS.ERROR.value,
 			constraints
 		))
 
@@ -188,11 +196,9 @@ def get_task(self, starid=None, camera=None, ccd=None, datasource=None):
 		else:
 			constraints = ''
 
-
-		self.cursor.execute("SELECT * FROM todolist INNER JOIN diagnostics ON todolist.priority=diagnostics.priority INNER JOIN datavalidation_raw ON todolist.priority=datavalidation_raw.priority WHERE status IN (%d,%d) AND (corr_status IS NULL OR corr_status = %d) AND datavalidation_raw.approved=1 %s ORDER BY todolist.priority LIMIT 1;" % (
+		self.cursor.execute("SELECT * FROM todolist INNER JOIN diagnostics ON todolist.priority=diagnostics.priority INNER JOIN datavalidation_raw ON todolist.priority=datavalidation_raw.priority WHERE status IN (%d,%d) AND corr_status IS NULL AND datavalidation_raw.approved=1 %s ORDER BY todolist.priority LIMIT 1;" % (
 			STATUS.OK.value,
 			STATUS.WARNING.value,
-			STATUS.ERROR.value,
 			constraints
 		))
 		task = self.cursor.fetchone()
@@ -203,7 +209,7 @@ def save_results(self, result):
 
 		# Extract details dictionary:
 		details = result.get('details', {})
-		
+
 		# The status of this target returned by the photometry:
 		my_status = result['status_corr']
 
@@ -213,15 +219,14 @@ def save_results(self, result):
 				result['status_corr'].value,
 				result['priority']
 			))
-			
+
 			self.summary['tasks_run'] += 1
 			self.summary[my_status.name] += 1
 			self.summary['STARTED'] -= 1
-	
+
 			# Save additional diagnostics:
 			error_msg = details.get('errors', None)
 			if error_msg:
-#				error_msg = '\n'.join(error_msg)
 				self.summary['last_error'] = error_msg
 
 			# Save additional diagnostics:
@@ -239,13 +244,22 @@ def save_results(self, result):
 		except:
 			self.conn.rollback()
 			raise
-		
-		# Calculate mean elapsed time using "streaming mean":
-		self.summary['mean_elaptime'] += (result['elaptime_corr'] - self.summary['mean_elaptime']) / self.summary['tasks_run']
-		
+
+		# Calculate mean elapsed time using "streaming weighted mean" with (alpha=0.1):
+		# https://dev.to/nestedsoftware/exponential-moving-average-on-streaming-data-4hhl
+		if self.summary['mean_elaptime'] is None:
+			self.summary['mean_elaptime'] = result['elaptime_corr']
+		elif result.get('elaptime_corr') is not None:
+			self.summary['mean_elaptime'] += 0.1 * (result['elaptime_corr'] - self.summary['mean_elaptime'])
+
+		if self.summary['mean_worker_waittime'] is None:
+			self.summary['mean_worker_waittime'] = result['worker_wait_time']
+		elif result.get('worker_wait_time') is not None:
+			self.summary['mean_worker_waittime'] += 0.1 * (result['worker_wait_time'] - self.summary['mean_worker_waittime'])
+
 		# Write summary file:
 		if self.summary_file and self.summary['tasks_run'] % self.summary_interval == 0:
-			self.write_summary()	
+			self.write_summary()
 
 	def start_task(self, taskid):
 		"""
@@ -254,8 +268,8 @@ def start_task(self, taskid):
 		self.cursor.execute("UPDATE todolist SET corr_status=? WHERE priority=?;", (STATUS.STARTED.value, taskid))
 		self.conn.commit()
 		self.summary['STARTED'] += 1
-		
-		
+
+
 	def get_random_task(self):
 		"""
 		Get random task to be processed.
diff --git a/corrections/version.py b/corrections/version.py
@@ -151,7 +151,9 @@ def get_version(pep440=False, include_branch=True):
 		return read_release_version()
 
 	if include_branch:
-		git_version = call_git_getbranch() + '-' + git_version
+		git_branch = call_git_getbranch()
+		if not git_branch is None:
+			git_version = git_branch + '-' + git_version
 
 	return git_version
 
diff --git a/tests/test_ensemble.py b/tests/test_ensemble.py
diff --git a/tests/test_tesscorr.py b/tests/test_tesscorr.py