comments

gnefedev · gnefedev · commit 76e3d1acaa1f · 2024-10-09T10:31:33.000+02:00
diff --git a/workflow/src/export-settings.lib.tengo b/workflow/src/export-settings.lib.tengo
@@ -1,6 +1,15 @@
 ll := import("@platforma-sdk/workflow-tengo:ll")
 text := import("text")
 
+// ==============================================
+//
+// Every function in this file will return:
+//   pfconvParams - params to run xsv.importFileMap on exported files
+//   cmdArgs - additional args for MiXCR to specify what fields to extract
+// 
+// ==============================================
+
+// export of threes whithout nodes
 shmTreeTableOptions := func(dataDescription, runWithSingleCell) {
     // TODO add forChain if runWithSingleCell
 
@@ -211,6 +220,7 @@ shmTreeTableOptions := func(dataDescription, runWithSingleCell) {
     }
 }
 
+// export data that is uniq for node
 shmTreeNodesTableOptions := func(dataDescription, runWithSingleCell) {
     // TODO add forChain if runWithSingleCell
 
@@ -409,6 +419,8 @@ shmTreeNodesTableOptions := func(dataDescription, runWithSingleCell) {
     }
 }
 
+// export data that is unique for clones, but not unique for a node 
+// (different clones could be in the same topology node, for example, different time points)
 shmTreeNodesWithClonesTableOptions := func(dataDescription, donorColumn, runWithSingleCell) {
     // TODO add forChain if runWithSingleCell
     donorColumnSpec := donorColumn.get("spec").getDataAsJson()
@@ -643,6 +655,7 @@ shmTreeNodesWithClonesTableOptions := func(dataDescription, donorColumn, runWith
     }
 }
 
+// to use the file as a library, we should explicitly export functions
 export ll.toStrict({
 	shmTreeTableOptions: shmTreeTableOptions,
     shmTreeNodesTableOptions: shmTreeNodesTableOptions,
diff --git a/workflow/src/main.tpl.tengo b/workflow/src/main.tpl.tengo
@@ -16,6 +16,7 @@ wf.body(func(args) {
 		ll.panic("No datasets to process")
 	}
 
+	// we could not use array as request for waiting (see below), so we store datasets in a dictionary
 	datasets := {}
 	for datasetRef in args.datasetColumns {
 		if is_undefined(datasetRef) {
@@ -26,6 +27,9 @@ wf.body(func(args) {
 
 	donorColumn := wf.resolve(args.donorColumn)
 
+	// The problem is that refs for data is not resolved. 
+	// To deal with it, we should call resolve resolve that will return feature.
+	// Then to resolve feature we should call another templete where we will describe what's to wait
 	results := render.createEphemeral(processTpl, {
 		datasets: datasets,
 		donorColumn: donorColumn
@@ -40,6 +44,8 @@ wf.body(func(args) {
 			"allelesLogs": results.output("allelesLogs"),
 			"treesLogs": results.output("treesLogs"),
 
+			// files should be explicitly published, otherwise it will not be assesable from GUI
+			// TODO it should be automated
 			"allelesReports": pframes.exportColumnData(results.output("allelesReports")),
 			"treesReports": pframes.exportColumnData(results.output("treesReports"))
 		},
diff --git a/workflow/src/prepare-donor-column.lib.tengo b/workflow/src/prepare-donor-column.lib.tengo
@@ -51,19 +51,21 @@ groupDataByDonorId := func(donorColumn, datasets) {
 
     sampleToDonor := {}
 
+    // columns with meta could be fetched as data direcctly
 	for k, v in donorColumn.get("data").getDataAsJson()["data"] {
         sampleId := json.decode(k)[0]
         sampleToDonor[sampleId] = v
 	}
 
-    dataBuilder := smart.structBuilder(_P_COLUMN_DATA_RESOURCE_MAP, json.encode({
-        keyLength: 3
-    }))
+    // build pColumn by hand
+    dataBuilder := smart.structBuilder(_P_COLUMN_DATA_RESOURCE_MAP, json.encode({ keyLength: 3 }))
 
+    // collect all the clns files that we have into pColumn
 	for blockId, dataset in datasets {
 		for sKey, fileRef in dataset.get("data").inputs() {
             sampleId := json.decode(sKey)[0]
-            dataBuilder.createInputField(json.encode([sampleToDonor[sampleId], sampleId, blockId])).set(fileRef)
+            donor := sampleToDonor[sampleId]
+            dataBuilder.createInputField(json.encode([donor, sampleId, blockId])).set(fileRef)
 		}
     }
 
@@ -73,6 +75,7 @@ groupDataByDonorId := func(donorColumn, datasets) {
     }
 }
 
+// to use the file as a library, we should explicitly export functions
 export ll.toStrict({
 	groupDataByDonorId: groupDataByDonorId
 })
diff --git a/workflow/src/process.tpl.tengo b/workflow/src/process.tpl.tengo
@@ -11,13 +11,16 @@ pframes := import("@platforma-sdk/workflow-tengo:pframes")
 
 reconstructShmTreesTpl := assets.importTemplate(":reconstruct-shm-trees")
 
+// this templete should run only after resolving of all inputs
 self.awaitState("datasets", { wildcard: "*" }, "ResourceReady")
 self.awaitState("donorColumn", "ResourceReady")
 
 self.body(func(inputs) {
+	// overall description of data that we have. 
     dataDescription := {
 		"hasUmiTags": false,
 		"hasCellTags": false,
+		// will be filled
 		"coveredFeatures": []
 	}
 
@@ -32,23 +35,30 @@ self.body(func(inputs) {
 			dataDescription["hasUmiTags"] = true
 		}
 		dataDescription["coveredFeatures"] = text.re_split(',', presetAnnotations["mixcr.com/coveredFeaturesOnExport"])
+		// check that assemblingFeature feature is the same. If so, coveredFeatures will be the same too
 		if (assemblingFeature == "") {
 			assemblingFeature = dataDescription["mixcr.com/assemblingFeature"]
 		} else if (assemblingFeature != dataDescription["mixcr.com/assemblingFeature"]) {
 			ll.panic("Assmble features should be the same for process tress. Got " + assemblingFeature + " and " + dataDescription["mixcr.com/assemblingFeature"])
 		}
 	}
 
+	// there should be call join on pfFrames, but it's not implements, so we will do it by hand
 	dataGroupedByDonorId := prepareDonorColumn.groupDataByDonorId(inputs.donorColumn, inputs.datasets)
 
+	// collect params for running export commands and to parse result tsv files into pColumns
 	shmTreeTableOptions := exportSettings.shmTreeTableOptions(dataDescription, false)
 	shmTreeNodesTableOptions := exportSettings.shmTreeNodesTableOptions(dataDescription, false)
 	shmTreeNodesWithClonesTableOptions := exportSettings.shmTreeNodesWithClonesTableOptions(dataDescription, inputs.donorColumn, false)
 
+	// TODO that call is too low level. Should be replaced with something that works with pColumns, not data only
 	mixcrResults := llPFrames.aggregate(
+		// files to iterate through
 		dataGroupedByDonorId["data"], 
+		// columns not to combine - sampleId and mixcrBlockId
 		[1, 2], 
 		reconstructShmTreesTpl,
+		// all the outputs that should be gethered
 		[
 			{
 				"name": "trees", 
@@ -74,43 +84,46 @@ self.body(func(inputs) {
 			}
 		],
 		false,
+		// inputs
 		{
 			"shmTreeTableOptions": shmTreeTableOptions["cmdArgs"],
 			"shmTreeNodesTableOptions": shmTreeNodesTableOptions["cmdArgs"],
 			"shmTreeNodesWithClonesTableOptions": shmTreeNodesWithClonesTableOptions["cmdArgs"]
 		}
 	)
 
+	// donorId axis is inherited from dataGroupedByDonorId and we should specify it explicitly (other axes will be supplied by pfconvParams)
+	additionalArgsForImportTsv := {
+		additionalAxesSpec: dataGroupedByDonorId["spec"]["axesSpec"][:1]
+	}
+
 	trees := xsv.importFileMap(
         mixcrResults.output("trees"), 
         "tsv", 
         shmTreeTableOptions["pfconvParams"],
-        {
-            additionalAxesSpec: dataGroupedByDonorId["spec"]["axesSpec"][:1]
-        }
+        additionalArgsForImportTsv
     )
 
 	treeNodes := xsv.importFileMap(
         mixcrResults.output("treeNodes"), 
         "tsv", 
         shmTreeNodesTableOptions["pfconvParams"],
-        {
-            additionalAxesSpec: dataGroupedByDonorId["spec"]["axesSpec"][:1]
-        }
+        additionalArgsForImportTsv
     )
 
 	treeNodesWithClones := xsv.importFileMap(
         mixcrResults.output("treeNodesWithClones"), 
         "tsv", 
         shmTreeNodesWithClonesTableOptions["pfconvParams"],
-        {
-            additionalAxesSpec: dataGroupedByDonorId["spec"]["axesSpec"][:1]
-        }
+        additionalArgsForImportTsv
     )
 
     return {
+		// combine columns into pFrame
         "trees": pframes.exportFrame(trees),
+        // combine columns into pFrame
         "treeNodes": pframes.exportFrame(treeNodes),
+        // combine columns into pFrame
         "treeNodesWithClones": pframes.exportFrame(treeNodesWithClones),
         
 		"allelesLogs": mixcrResults.output("allelesLog"),
diff --git a/workflow/src/reconstruct-shm-trees.tpl.tengo b/workflow/src/reconstruct-shm-trees.tpl.tengo
@@ -6,13 +6,16 @@ exec := import("@platforma-sdk/workflow-tengo:exec")
 
 json := import("json")
 
+// for usage in aggregate function, we should specify all outputs that will be used
 self.defineOutputs(
     "trees", "treeNodes", "treeNodesWithClones",
     "allelesLog", "treesLog",
     "allelesReport", "treesReport"
 )
 
+// import MiXCR as a software to use
 mixcrSw := assets.importSoftware("@milaboratory/mixcr:main")
+// env for MiXCR to format progress messages
 progressPrefix := "[==PROGRESS==]"
 
 self.body(func(inputs) {
@@ -25,19 +28,20 @@ self.body(func(inputs) {
         arg("findAlleles").
         arg("--report").arg("report.txt").
         saveFile("report.txt").
+        // template specifies where result files will be written
         arg("--output-template").arg("alleles/{file_name}.clns")
 
     toProcess := []
     for sKey, inputFile in inputData.inputs() {
         key := json.decode(sKey)
         sampleId := key[0]
         clonotypingBlockId := key[1]
+        // file name should encode axis values. It will be parsed by xsv.importFileMap afterwards to restore axis for clones data
         fileName := sampleId + "___" + clonotypingBlockId + ".clns"
-        element := {
+        toProcess = append(toProcess, {
             "fileName": fileName,
             "input": inputFile
-        }
-        toProcess = append(toProcess, element)
+        })
     }
 
     for input in toProcess {
@@ -69,6 +73,7 @@ self.body(func(inputs) {
     shmTrees := shmTreesCmdBuilder.run()
 
 
+    // export trees without nodes
     shmTreeExportsCmdBuilder := exec.builder().
         printErrStreamToStdout().
         env("MI_PROGRESS_PREFIX", progressPrefix).
@@ -89,6 +94,7 @@ self.body(func(inputs) {
 
 
 
+    // export tree nodes with data uniq for the node
     shmTreeNodesExportsCmdBuilder := exec.builder().
         printErrStreamToStdout().
         env("MI_PROGRESS_PREFIX", progressPrefix).
@@ -107,11 +113,15 @@ self.body(func(inputs) {
 
     shmTreeNodesExports := shmTreeNodesExportsCmdBuilder.run()
 
+   
+   
+    // export nodes with clones. For each node could be several clones
     shmTreeNodesWithClonesExportsCmdBuilder := exec.builder().
         printErrStreamToStdout().
         env("MI_PROGRESS_PREFIX", progressPrefix).
         software(mixcrSw).
         arg("exportShmTreesWithNodes").
+        // don't export nodes that don't have clones
         arg("--only-observed")
 
     for arg in inputs.shmTreeNodesWithClonesTableOptions {

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,15 @@`
`1`	`1`	`ll := import("@platforma-sdk/workflow-tengo:ll")`
`2`	`2`	`text := import("text")`
`3`	`3`
	`4`	`+// ==============================================`
	`5`	`+//`
	`6`	`+// Every function in this file will return:`
	`7`	`+// pfconvParams - params to run xsv.importFileMap on exported files`
	`8`	`+// cmdArgs - additional args for MiXCR to specify what fields to extract`
	`9`	`+//`
	`10`	`+// ==============================================`
	`11`	`+`
	`12`	`+// export of threes whithout nodes`
`4`	`13`	`shmTreeTableOptions := func(dataDescription, runWithSingleCell) {`
`5`	`14`	`// TODO add forChain if runWithSingleCell`
`6`	`15`
`@@ -211,6 +220,7 @@ shmTreeTableOptions := func(dataDescription, runWithSingleCell) {`
`211`	`220`	`}`
`212`	`221`	`}`
`213`	`222`
	`223`	`+// export data that is uniq for node`
`214`	`224`	`shmTreeNodesTableOptions := func(dataDescription, runWithSingleCell) {`
`215`	`225`	`// TODO add forChain if runWithSingleCell`
`216`	`226`
`@@ -409,6 +419,8 @@ shmTreeNodesTableOptions := func(dataDescription, runWithSingleCell) {`
`409`	`419`	`}`
`410`	`420`	`}`
`411`	`421`
	`422`	`+// export data that is unique for clones, but not unique for a node`
	`423`	`+// (different clones could be in the same topology node, for example, different time points)`
`412`	`424`	`shmTreeNodesWithClonesTableOptions := func(dataDescription, donorColumn, runWithSingleCell) {`
`413`	`425`	`// TODO add forChain if runWithSingleCell`
`414`	`426`	`donorColumnSpec := donorColumn.get("spec").getDataAsJson()`
`@@ -643,6 +655,7 @@ shmTreeNodesWithClonesTableOptions := func(dataDescription, donorColumn, runWith`
`643`	`655`	`}`
`644`	`656`	`}`
`645`	`657`
	`658`	`+// to use the file as a library, we should explicitly export functions`
`646`	`659`	`export ll.toStrict({`
`647`	`660`	`shmTreeTableOptions: shmTreeTableOptions,`
`648`	`661`	`shmTreeNodesTableOptions: shmTreeNodesTableOptions,`
Original file line number	Diff line number	Diff line change
`@@ -51,19 +51,21 @@ groupDataByDonorId := func(donorColumn, datasets) {`
`51`	`51`
`52`	`52`	`sampleToDonor := {}`
`53`	`53`
	`54`	`+ // columns with meta could be fetched as data direcctly`
`54`	`55`	`for k, v in donorColumn.get("data").getDataAsJson()["data"] {`
`55`	`56`	`sampleId := json.decode(k)[0]`
`56`	`57`	`sampleToDonor[sampleId] = v`
`57`	`58`	`}`
`58`	`59`
`59`		`- dataBuilder := smart.structBuilder(_P_COLUMN_DATA_RESOURCE_MAP, json.encode({`
`60`		`- keyLength: 3`
`61`		`- }))`
	`60`	`+ // build pColumn by hand`
	`61`	`+ dataBuilder := smart.structBuilder(_P_COLUMN_DATA_RESOURCE_MAP, json.encode({ keyLength: 3 }))`
`62`	`62`
	`63`	`+ // collect all the clns files that we have into pColumn`
`63`	`64`	`for blockId, dataset in datasets {`
`64`	`65`	`for sKey, fileRef in dataset.get("data").inputs() {`
`65`	`66`	`sampleId := json.decode(sKey)[0]`
`66`		`- dataBuilder.createInputField(json.encode([sampleToDonor[sampleId], sampleId, blockId])).set(fileRef)`
	`67`	`+ donor := sampleToDonor[sampleId]`
	`68`	`+ dataBuilder.createInputField(json.encode([donor, sampleId, blockId])).set(fileRef)`
`67`	`69`	`}`
`68`	`70`	`}`
`69`	`71`
`@@ -73,6 +75,7 @@ groupDataByDonorId := func(donorColumn, datasets) {`
`73`	`75`	`}`
`74`	`76`	`}`
`75`	`77`
	`78`	`+// to use the file as a library, we should explicitly export functions`
`76`	`79`	`export ll.toStrict({`
`77`	`80`	`groupDataByDonorId: groupDataByDonorId`
`78`	`81`	`})`