Skip to content

push stats on demand #91

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion BRB/PushButton.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,7 @@ def GetResults(config, project, libraries):
)
log.info(f"Processing {dataPath}")
except:
print("external data")
print(f"GetResults with ignore=True, {project} is external data.")
ignore = True
validLibraryTypes = {v: i for i, v in enumerate(config.get('Options', 'validLibraryTypes').split(','))}
pipelines = config.get('Options', 'pipelines').split(',')
Expand Down
136 changes: 100 additions & 36 deletions BRB/run.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env python
import glob
import sys
import os
import BRB.getConfig
Expand All @@ -13,6 +14,36 @@
from pathlib import Path
from rich import print


def process_data(config, ParkourDict):
bdir = "{}/{}".format(config.get('Paths', 'baseData'), config.get('Options', 'runID'))
msg = []
for k, v in ParkourDict.items():
if not os.path.exists("{}/Project_{}".format(bdir, BRB.misc.pacifier(k))):
log.info("{}/Project_{} doesn't exist, probably lives on another lane.".format(bdir, BRB.misc.pacifier(k)))
continue
try:
msg = msg + BRB.PushButton.GetResults(config, k, v)
except Exception as e:
BRB.email.errorEmail(config, sys.exc_info(), "Received an error running PushButton.GetResults() with {} and {}".format(k, v))
log.critical("Received an error running PushButton.GetResults() with {} and {}".format(k, v))
print("Received an error running PushButton.GetResults() with {} and {}".format(k, v), file=sys.stderr)
raise

# Email finished message
log.info('Create e-mail')
log.info(msg)
BRB.email.finishedEmail(config, msg)

return


def validate_fcid_with_stats(ctx, param, value):
if ctx.params.get('stats') and not value:
raise click.UsageError('--fcid is required when --stats standalone run is active.')
return value


@click.command(
context_settings=dict(
help_option_names=["-h", "--help"]
Expand All @@ -24,49 +55,82 @@
type=click.Path(exists=True),
required=False,
default=os.path.expanduser('~/configs/BigRedButton.ini'),
help='specify a custom ini file.',
help='Specify a custom ini file.',
show_default=True
)
def run_brb(configfile):
@click.option(
"-s",
"--stats",
required=False,
is_flag=True,
help='Standalone run, will not run any pipelines. Requires --fcid to indicate target.'
)
@click.option('--fcid', callback=validate_fcid_with_stats, help='Flowcell ID to push stats.')
def run_brb(configfile, stats, fcid):

while True:
#Read the config file
# Read the config file
config = BRB.getConfig.getConfig(configfile)

if not stats:
# Get the next flow cell to process, or sleep
config, ParkourDict = BRB.findFinishedFlowCells.newFlowCell(config)
if (config.get('Options','runID') == '') or ParkourDict is None:
sleep(60*60)
continue

#Get the next flow cell to process, or sleep
config, ParkourDict = BRB.findFinishedFlowCells.newFlowCell(config)
if(config.get('Options','runID') == '') or ParkourDict is None:
sleep(60*60)
continue
# Open log file
logFile = Path(
config['Paths']['logPath'],
config.get('Options','runID') + '.log'
)
print(f"Logging into: {logFile}")
setLog(logFile)

# Open log file
logFile = Path(
config['Paths']['logPath'],
config.get('Options','runID') + '.log'
)
print(f"Logging into: {logFile}")
setLog(logFile)
else:
# Push stats on-demand
log.info(f"Pushing stats for flowcell: {fcid}")
d = [d for d in glob.glob("{}/*/fastq.made".format(config.get('Paths', 'baseData'))) if fcid in d]
dual_lane = len(d) == 2
if len(d) == 0:
log.error(f"No fastq.made files found for {fcid}")
return # Exit BRB if no files found.
elif len(d) > 2:
log.error(f"How many lanes does {fcid} have?!")
return # Exit BRB this error shouldn't happen at all.

config.set('Options','runID',d[0].split("/")[-2])
ParkourDict = BRB.findFinishedFlowCells.queryParkour(config)

if dual_lane:
config1 = BRB.getConfig.getConfig(configfile)
config1.set('Options','runID',d[1].split("/")[-2])
ParkourDict1 = BRB.findFinishedFlowCells.queryParkour(config)

#Process each group's data, ignore cases where the project isn't in the lanes being processed
bdir = "{}/{}".format(config.get('Paths', 'baseData'), config.get('Options', 'runID'))
msg = []
for k, v in ParkourDict.items():
if not os.path.exists("{}/Project_{}".format(bdir, BRB.misc.pacifier(k))):
log.info("{}/Project_{} doesn't exist, probably lives on another lane.".format(bdir, BRB.misc.pacifier(k)))
continue
try:
msg = msg + BRB.PushButton.GetResults(config, k, v)
except Exception as e:
BRB.email.errorEmail(config, sys.exc_info(), "Received an error running PushButton.GetResults() with {} and {}".format(k, v))
log.critical("Received an error running PushButton.GetResults() with {} and {}".format(k, v))
print("Received an error running PushButton.GetResults() with {} and {}".format(k, v), file=sys.stderr)
raise
# Open log file
if not dual_lane:
logFile = Path(config['Paths']['logPath'], config.get('Options','runID') + '.log')
else:
logFile = Path(config['Paths']['logPath'], config.get('Options','runID') + '_2' + '.log')
print(f"Logging into: {logFile}")
setLog(logFile)

if dual_lane:
log.info("Same log-file is being used for both lanes. Hopefully this is not too confusing :$")


# Process each group's data, ignore cases where the project isn't in the lanes being processed
process_data(config, ParkourDict)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still fail to understand the logic here, sorry..

stats not set -> only set a log file
stats set -> infer some lane status and set some other paths ?

Afterwards all the data gets processed anyway. For me this is effectively the same as running BRB twice ( with the added disadvantage that we are hardcoded for two lanes now).

If you don't have FID as an argument, but an actual path to a processed flowcell (which might be split per lane or not, but at least you don't have to infer it here), and subsequently have:
if not stats:
process_data
else:
if analysis was actually done already (because analysis worked, parkour was just dead) -> run phoneHome
else no analysis was actually done (because non-std libtype, or external data) -> run telegraphHome


if stats and dual_lane:
process_data(config1, ParkourDict1)


if not stats:
# Mark the flow cell as having been processed
BRB.findFinishedFlowCells.markFinished(config)
log.info('=== finished flowcell ===')

#Email finished message
log.info('Create e-mail')
log.info(msg)
BRB.email.finishedEmail(config, msg)

#Mark the flow cell as having been processed
BRB.findFinishedFlowCells.markFinished(config)
log.info('=== finished flowcell ===')
if stats:
return # don't do anything else.