Skip to content

Commit 76e3d1a

Browse files
committed
comments
1 parent da45b2d commit 76e3d1a

File tree

5 files changed

+61
-16
lines changed

5 files changed

+61
-16
lines changed

workflow/src/export-settings.lib.tengo

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11
ll := import("@platforma-sdk/workflow-tengo:ll")
22
text := import("text")
33

4+
// ==============================================
5+
//
6+
// Every function in this file will return:
7+
// pfconvParams - params to run xsv.importFileMap on exported files
8+
// cmdArgs - additional args for MiXCR to specify what fields to extract
9+
//
10+
// ==============================================
11+
12+
// export of threes whithout nodes
413
shmTreeTableOptions := func(dataDescription, runWithSingleCell) {
514
// TODO add forChain if runWithSingleCell
615

@@ -211,6 +220,7 @@ shmTreeTableOptions := func(dataDescription, runWithSingleCell) {
211220
}
212221
}
213222

223+
// export data that is uniq for node
214224
shmTreeNodesTableOptions := func(dataDescription, runWithSingleCell) {
215225
// TODO add forChain if runWithSingleCell
216226

@@ -409,6 +419,8 @@ shmTreeNodesTableOptions := func(dataDescription, runWithSingleCell) {
409419
}
410420
}
411421

422+
// export data that is unique for clones, but not unique for a node
423+
// (different clones could be in the same topology node, for example, different time points)
412424
shmTreeNodesWithClonesTableOptions := func(dataDescription, donorColumn, runWithSingleCell) {
413425
// TODO add forChain if runWithSingleCell
414426
donorColumnSpec := donorColumn.get("spec").getDataAsJson()
@@ -643,6 +655,7 @@ shmTreeNodesWithClonesTableOptions := func(dataDescription, donorColumn, runWith
643655
}
644656
}
645657

658+
// to use the file as a library, we should explicitly export functions
646659
export ll.toStrict({
647660
shmTreeTableOptions: shmTreeTableOptions,
648661
shmTreeNodesTableOptions: shmTreeNodesTableOptions,

workflow/src/main.tpl.tengo

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ wf.body(func(args) {
1616
ll.panic("No datasets to process")
1717
}
1818

19+
// we could not use array as request for waiting (see below), so we store datasets in a dictionary
1920
datasets := {}
2021
for datasetRef in args.datasetColumns {
2122
if is_undefined(datasetRef) {
@@ -26,6 +27,9 @@ wf.body(func(args) {
2627

2728
donorColumn := wf.resolve(args.donorColumn)
2829

30+
// The problem is that refs for data is not resolved.
31+
// To deal with it, we should call resolve resolve that will return feature.
32+
// Then to resolve feature we should call another templete where we will describe what's to wait
2933
results := render.createEphemeral(processTpl, {
3034
datasets: datasets,
3135
donorColumn: donorColumn
@@ -40,6 +44,8 @@ wf.body(func(args) {
4044
"allelesLogs": results.output("allelesLogs"),
4145
"treesLogs": results.output("treesLogs"),
4246

47+
// files should be explicitly published, otherwise it will not be assesable from GUI
48+
// TODO it should be automated
4349
"allelesReports": pframes.exportColumnData(results.output("allelesReports")),
4450
"treesReports": pframes.exportColumnData(results.output("treesReports"))
4551
},

workflow/src/prepare-donor-column.lib.tengo

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,19 +51,21 @@ groupDataByDonorId := func(donorColumn, datasets) {
5151

5252
sampleToDonor := {}
5353

54+
// columns with meta could be fetched as data direcctly
5455
for k, v in donorColumn.get("data").getDataAsJson()["data"] {
5556
sampleId := json.decode(k)[0]
5657
sampleToDonor[sampleId] = v
5758
}
5859

59-
dataBuilder := smart.structBuilder(_P_COLUMN_DATA_RESOURCE_MAP, json.encode({
60-
keyLength: 3
61-
}))
60+
// build pColumn by hand
61+
dataBuilder := smart.structBuilder(_P_COLUMN_DATA_RESOURCE_MAP, json.encode({ keyLength: 3 }))
6262

63+
// collect all the clns files that we have into pColumn
6364
for blockId, dataset in datasets {
6465
for sKey, fileRef in dataset.get("data").inputs() {
6566
sampleId := json.decode(sKey)[0]
66-
dataBuilder.createInputField(json.encode([sampleToDonor[sampleId], sampleId, blockId])).set(fileRef)
67+
donor := sampleToDonor[sampleId]
68+
dataBuilder.createInputField(json.encode([donor, sampleId, blockId])).set(fileRef)
6769
}
6870
}
6971

@@ -73,6 +75,7 @@ groupDataByDonorId := func(donorColumn, datasets) {
7375
}
7476
}
7577

78+
// to use the file as a library, we should explicitly export functions
7679
export ll.toStrict({
7780
groupDataByDonorId: groupDataByDonorId
7881
})

workflow/src/process.tpl.tengo

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,16 @@ pframes := import("@platforma-sdk/workflow-tengo:pframes")
1111

1212
reconstructShmTreesTpl := assets.importTemplate(":reconstruct-shm-trees")
1313

14+
// this templete should run only after resolving of all inputs
1415
self.awaitState("datasets", { wildcard: "*" }, "ResourceReady")
1516
self.awaitState("donorColumn", "ResourceReady")
1617

1718
self.body(func(inputs) {
19+
// overall description of data that we have.
1820
dataDescription := {
1921
"hasUmiTags": false,
2022
"hasCellTags": false,
23+
// will be filled
2124
"coveredFeatures": []
2225
}
2326

@@ -32,23 +35,30 @@ self.body(func(inputs) {
3235
dataDescription["hasUmiTags"] = true
3336
}
3437
dataDescription["coveredFeatures"] = text.re_split(',', presetAnnotations["mixcr.com/coveredFeaturesOnExport"])
38+
// check that assemblingFeature feature is the same. If so, coveredFeatures will be the same too
3539
if (assemblingFeature == "") {
3640
assemblingFeature = dataDescription["mixcr.com/assemblingFeature"]
3741
} else if (assemblingFeature != dataDescription["mixcr.com/assemblingFeature"]) {
3842
ll.panic("Assmble features should be the same for process tress. Got " + assemblingFeature + " and " + dataDescription["mixcr.com/assemblingFeature"])
3943
}
4044
}
4145

46+
// there should be call join on pfFrames, but it's not implements, so we will do it by hand
4247
dataGroupedByDonorId := prepareDonorColumn.groupDataByDonorId(inputs.donorColumn, inputs.datasets)
4348

49+
// collect params for running export commands and to parse result tsv files into pColumns
4450
shmTreeTableOptions := exportSettings.shmTreeTableOptions(dataDescription, false)
4551
shmTreeNodesTableOptions := exportSettings.shmTreeNodesTableOptions(dataDescription, false)
4652
shmTreeNodesWithClonesTableOptions := exportSettings.shmTreeNodesWithClonesTableOptions(dataDescription, inputs.donorColumn, false)
4753

54+
// TODO that call is too low level. Should be replaced with something that works with pColumns, not data only
4855
mixcrResults := llPFrames.aggregate(
56+
// files to iterate through
4957
dataGroupedByDonorId["data"],
58+
// columns not to combine - sampleId and mixcrBlockId
5059
[1, 2],
5160
reconstructShmTreesTpl,
61+
// all the outputs that should be gethered
5262
[
5363
{
5464
"name": "trees",
@@ -74,43 +84,46 @@ self.body(func(inputs) {
7484
}
7585
],
7686
false,
87+
// inputs
7788
{
7889
"shmTreeTableOptions": shmTreeTableOptions["cmdArgs"],
7990
"shmTreeNodesTableOptions": shmTreeNodesTableOptions["cmdArgs"],
8091
"shmTreeNodesWithClonesTableOptions": shmTreeNodesWithClonesTableOptions["cmdArgs"]
8192
}
8293
)
8394

95+
// donorId axis is inherited from dataGroupedByDonorId and we should specify it explicitly (other axes will be supplied by pfconvParams)
96+
additionalArgsForImportTsv := {
97+
additionalAxesSpec: dataGroupedByDonorId["spec"]["axesSpec"][:1]
98+
}
99+
84100
trees := xsv.importFileMap(
85101
mixcrResults.output("trees"),
86102
"tsv",
87103
shmTreeTableOptions["pfconvParams"],
88-
{
89-
additionalAxesSpec: dataGroupedByDonorId["spec"]["axesSpec"][:1]
90-
}
104+
additionalArgsForImportTsv
91105
)
92106

93107
treeNodes := xsv.importFileMap(
94108
mixcrResults.output("treeNodes"),
95109
"tsv",
96110
shmTreeNodesTableOptions["pfconvParams"],
97-
{
98-
additionalAxesSpec: dataGroupedByDonorId["spec"]["axesSpec"][:1]
99-
}
111+
additionalArgsForImportTsv
100112
)
101113

102114
treeNodesWithClones := xsv.importFileMap(
103115
mixcrResults.output("treeNodesWithClones"),
104116
"tsv",
105117
shmTreeNodesWithClonesTableOptions["pfconvParams"],
106-
{
107-
additionalAxesSpec: dataGroupedByDonorId["spec"]["axesSpec"][:1]
108-
}
118+
additionalArgsForImportTsv
109119
)
110120

111121
return {
122+
// combine columns into pFrame
112123
"trees": pframes.exportFrame(trees),
124+
// combine columns into pFrame
113125
"treeNodes": pframes.exportFrame(treeNodes),
126+
// combine columns into pFrame
114127
"treeNodesWithClones": pframes.exportFrame(treeNodesWithClones),
115128

116129
"allelesLogs": mixcrResults.output("allelesLog"),

workflow/src/reconstruct-shm-trees.tpl.tengo

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,16 @@ exec := import("@platforma-sdk/workflow-tengo:exec")
66

77
json := import("json")
88

9+
// for usage in aggregate function, we should specify all outputs that will be used
910
self.defineOutputs(
1011
"trees", "treeNodes", "treeNodesWithClones",
1112
"allelesLog", "treesLog",
1213
"allelesReport", "treesReport"
1314
)
1415

16+
// import MiXCR as a software to use
1517
mixcrSw := assets.importSoftware("@milaboratory/mixcr:main")
18+
// env for MiXCR to format progress messages
1619
progressPrefix := "[==PROGRESS==]"
1720

1821
self.body(func(inputs) {
@@ -25,19 +28,20 @@ self.body(func(inputs) {
2528
arg("findAlleles").
2629
arg("--report").arg("report.txt").
2730
saveFile("report.txt").
31+
// template specifies where result files will be written
2832
arg("--output-template").arg("alleles/{file_name}.clns")
2933

3034
toProcess := []
3135
for sKey, inputFile in inputData.inputs() {
3236
key := json.decode(sKey)
3337
sampleId := key[0]
3438
clonotypingBlockId := key[1]
39+
// file name should encode axis values. It will be parsed by xsv.importFileMap afterwards to restore axis for clones data
3540
fileName := sampleId + "___" + clonotypingBlockId + ".clns"
36-
element := {
41+
toProcess = append(toProcess, {
3742
"fileName": fileName,
3843
"input": inputFile
39-
}
40-
toProcess = append(toProcess, element)
44+
})
4145
}
4246

4347
for input in toProcess {
@@ -69,6 +73,7 @@ self.body(func(inputs) {
6973
shmTrees := shmTreesCmdBuilder.run()
7074

7175

76+
// export trees without nodes
7277
shmTreeExportsCmdBuilder := exec.builder().
7378
printErrStreamToStdout().
7479
env("MI_PROGRESS_PREFIX", progressPrefix).
@@ -89,6 +94,7 @@ self.body(func(inputs) {
8994

9095

9196

97+
// export tree nodes with data uniq for the node
9298
shmTreeNodesExportsCmdBuilder := exec.builder().
9399
printErrStreamToStdout().
94100
env("MI_PROGRESS_PREFIX", progressPrefix).
@@ -107,11 +113,15 @@ self.body(func(inputs) {
107113

108114
shmTreeNodesExports := shmTreeNodesExportsCmdBuilder.run()
109115

116+
117+
118+
// export nodes with clones. For each node could be several clones
110119
shmTreeNodesWithClonesExportsCmdBuilder := exec.builder().
111120
printErrStreamToStdout().
112121
env("MI_PROGRESS_PREFIX", progressPrefix).
113122
software(mixcrSw).
114123
arg("exportShmTreesWithNodes").
124+
// don't export nodes that don't have clones
115125
arg("--only-observed")
116126

117127
for arg in inputs.shmTreeNodesWithClonesTableOptions {

0 commit comments

Comments
 (0)