Skip to content

Commit 236fbaa

Browse files
authored
feat: Add pyhf.Workspace.sorted to give ability to sort Workspace (#1042)
* Add utility to sort a workspace * Add CLI for sorting a workspace * Add tests for sorting a workspace
1 parent f2e2db9 commit 236fbaa

File tree

5 files changed

+112
-0
lines changed

5 files changed

+112
-0
lines changed

src/pyhf/cli/cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def pyhf():
2626
pyhf.add_command(spec.rename)
2727
pyhf.add_command(spec.combine)
2828
pyhf.add_command(spec.digest)
29+
pyhf.add_command(spec.sort)
2930

3031
# pyhf.add_command(infer.cli)
3132
pyhf.add_command(infer.cls)

src/pyhf/cli/spec.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,3 +353,44 @@ def digest(workspace, algorithm, output_json):
353353
)
354354

355355
click.echo(output)
356+
357+
358+
@cli.command()
359+
@click.argument('workspace', default='-')
360+
@click.option(
361+
'--output-file',
362+
help='The location of the output json file. If not specified, prints to screen.',
363+
default=None,
364+
)
365+
def sort(workspace, output_file):
366+
"""
367+
Sort the workspace.
368+
369+
See :func:`pyhf.workspace.Workspace.sorted` for more information.
370+
371+
Example:
372+
373+
.. code-block:: shell
374+
375+
$ curl -sL https://raw.githubusercontent.com/scikit-hep/pyhf/master/docs/examples/json/2-bin_1-channel.json | pyhf sort | jq '.' | md5
376+
8be5186ec249d2704e14dd29ef05ffb0
377+
378+
.. code-block:: shell
379+
380+
$ curl -sL https://raw.githubusercontent.com/scikit-hep/pyhf/master/docs/examples/json/2-bin_1-channel.json | jq -S '.channels|=sort_by(.name)|.channels[].samples|=sort_by(.name)|.channels[].samples[].modifiers|=sort_by(.name,.type)|.observations|=sort_by(.name)' | md5
381+
8be5186ec249d2704e14dd29ef05ffb0
382+
383+
384+
"""
385+
with click.open_file(workspace, 'r') as specstream:
386+
spec = json.load(specstream)
387+
388+
workspace = Workspace(spec)
389+
sorted_ws = Workspace.sorted(workspace)
390+
391+
if output_file is None:
392+
click.echo(json.dumps(sorted_ws, indent=4, sort_keys=True))
393+
else:
394+
with open(output_file, 'w+') as out_file:
395+
json.dump(sorted_ws, out_file, indent=4, sort_keys=True)
396+
log.debug(f"Written to {output_file}")

src/pyhf/workspace.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,3 +676,31 @@ def combine(cls, left, right, join='none'):
676676
'version': new_version,
677677
}
678678
return cls(newspec)
679+
680+
@classmethod
681+
def sorted(cls, workspace):
682+
"""
683+
Return a new workspace specification that is sorted.
684+
685+
Args:
686+
workspace (~pyhf.workspace.Workspace): A workspace to sort
687+
688+
Returns:
689+
~pyhf.workspace.Workspace: A new sorted workspace object
690+
691+
"""
692+
newspec = copy.deepcopy(dict(workspace))
693+
694+
newspec['channels'].sort(key=lambda e: e['name'])
695+
for channel in newspec['channels']:
696+
channel['samples'].sort(key=lambda e: e['name'])
697+
for sample in channel['samples']:
698+
sample['modifiers'].sort(key=lambda e: (e['name'], e['type']))
699+
700+
newspec['measurements'].sort(key=lambda e: e['name'])
701+
for measurement in newspec['measurements']:
702+
measurement['config']['parameters'].sort(key=lambda e: e['name'])
703+
704+
newspec['observations'].sort(key=lambda e: e['name'])
705+
706+
return cls(newspec)

tests/test_scripts.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,3 +539,30 @@ def test_patchset_apply(datadir, tmpdir, script_runner, output_file):
539539
"hi": 1.2,
540540
"lo": 0.8,
541541
}
542+
543+
544+
def test_sort(tmpdir, script_runner):
545+
temp = tmpdir.join("parsed_output.json")
546+
command = 'pyhf xml2json validation/xmlimport_input/config/example.xml --basedir validation/xmlimport_input/ --output-file {0:s} --hide-progress'.format(
547+
temp.strpath
548+
)
549+
ret = script_runner.run(*shlex.split(command))
550+
551+
command = f'pyhf sort {temp.strpath}'
552+
553+
ret = script_runner.run(*shlex.split(command))
554+
assert ret.success
555+
556+
557+
def test_sort_outfile(tmpdir, script_runner):
558+
temp = tmpdir.join("parsed_output.json")
559+
command = 'pyhf xml2json validation/xmlimport_input/config/example.xml --basedir validation/xmlimport_input/ --output-file {0:s} --hide-progress'.format(
560+
temp.strpath
561+
)
562+
ret = script_runner.run(*shlex.split(command))
563+
564+
tempout = tmpdir.join("sort_output.json")
565+
command = f'pyhf sort {temp.strpath} --output-file {tempout.strpath}'
566+
567+
ret = script_runner.run(*shlex.split(command))
568+
assert ret.success

tests/test_workspace.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,3 +746,18 @@ class FooWorkspace(pyhf.Workspace):
746746

747747
combined = FooWorkspace.combine(ws, new_ws)
748748
assert isinstance(combined, FooWorkspace)
749+
750+
751+
def test_sorted(workspace_factory):
752+
ws = workspace_factory()
753+
# force the first sample in each channel to be last
754+
for channel in ws['channels']:
755+
channel['samples'][0]['name'] = 'zzzzlast'
756+
757+
new_ws = pyhf.Workspace.sorted(ws)
758+
for channel in ws['channels']:
759+
# check no sort
760+
assert channel['samples'][0]['name'] == 'zzzzlast'
761+
for channel in new_ws['channels']:
762+
# check sort
763+
assert channel['samples'][-1]['name'] == 'zzzzlast'

0 commit comments

Comments
 (0)