From 4dd9409f951bd0d92a5b703bc970f98946abc678 Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 19 May 2025 10:20:42 +0200 Subject: [PATCH 001/100] WIP: Read zarr agglomerate files --- .../services/AgglomerateService.scala | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index ed970194131..2e0bfbd8b5e 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -24,7 +24,18 @@ import scala.annotation.tailrec import scala.collection.compat.immutable.ArraySeq import scala.concurrent.duration.DurationInt -class AgglomerateService @Inject()(config: DataStoreConfig) extends DataConverter with LazyLogging { +class ZarrAgglomerateService @Inject()(config: DataStoreConfig) extends DataConverter with LazyLogging { + def applyAgglomerateHdf5(request: DataServiceDataRequest)(data: Array[Byte]): Box[Array[Byte]] = tryo(data) + +} + +class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConverter with LazyLogging { + // TODO +} + +class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateService: ZarrAgglomerateService) + extends DataConverter + with LazyLogging { private val agglomerateDir = "agglomerates" private val agglomerateFileExtension = "hdf5" private val datasetName = "/segment_to_agglomerate" @@ -47,7 +58,12 @@ class AgglomerateService @Inject()(config: DataStoreConfig) extends DataConverte .toSet } - def applyAgglomerate(request: DataServiceDataRequest)(data: Array[Byte]): Box[Array[Byte]] = tryo { + def applyAgglomerate(request: DataServiceDataRequest)(data: Array[Byte]): Box[Array[Byte]] = + if (true) { + zarrAgglomerateService.applyAgglomerateHdf5(request)(data) + } else applyAgglomerateHdf5(request)(data) + + private def applyAgglomerateHdf5(request: DataServiceDataRequest)(data: Array[Byte]): Box[Array[Byte]] = tryo { val agglomerateFileKey = AgglomerateFileKey.fromDataRequest(request) From 380bd69ed47cc23bafca221798a714ca0d98fae7 Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 19 May 2025 11:27:05 +0200 Subject: [PATCH 002/100] zarr group path --- .../datastore/services/AgglomerateService.scala | 13 ++++++++++++- .../datastore/storage/AgglomerateFileCache.scala | 8 ++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 2e0bfbd8b5e..3f84333d6e4 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -25,7 +25,18 @@ import scala.collection.compat.immutable.ArraySeq import scala.concurrent.duration.DurationInt class ZarrAgglomerateService @Inject()(config: DataStoreConfig) extends DataConverter with LazyLogging { - def applyAgglomerateHdf5(request: DataServiceDataRequest)(data: Array[Byte]): Box[Array[Byte]] = tryo(data) + private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) + private val agglomerateDir = "agglomerates" + private val agglomerateFileExtension = "" + + def applyAgglomerateHdf5(request: DataServiceDataRequest)(data: Array[Byte]): Box[Array[Byte]] = tryo { + + val agglomerateFileKey = AgglomerateFileKey.fromDataRequest(request) + + val zarrGroupPath = agglomerateFileKey.zarrGroupPath(dataBaseDir, agglomerateDir) + + data + } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/AgglomerateFileCache.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/AgglomerateFileCache.scala index 018bd27e9f9..9a9e2106ba4 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/AgglomerateFileCache.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/AgglomerateFileCache.scala @@ -32,6 +32,14 @@ case class AgglomerateFileKey( .resolve(layerName) .resolve(agglomerateDir) .resolve(s"$mappingName.$agglomerateFileExtension") + + def zarrGroupPath(dataBaseDir: Path, agglomerateDir: String): Path = + dataBaseDir + .resolve(organizationId) + .resolve(datasetDirectoryName) + .resolve(layerName) + .resolve(agglomerateDir) + .resolve(mappingName) } object AgglomerateFileKey { From 7fb643fe19ad2be8037b34bcb85f91cc7f1246f0 Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 19 May 2025 13:24:57 +0200 Subject: [PATCH 003/100] test reading from zarr array --- .../datastore/controllers/Application.scala | 22 ++++-- .../datastore/datareaders/DatasetArray.scala | 5 +- .../datareaders/zarr3/Zarr3Array.scala | 7 +- .../services/AgglomerateService.scala | 68 ++++++++++++++++--- .../services/BinaryDataService.scala | 2 +- .../services/mesh/AdHocMeshService.scala | 10 +-- ....scalableminds.webknossos.datastore.routes | 1 + 7 files changed, 91 insertions(+), 24 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala index 693b917d7dd..3ede25cb42f 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala @@ -3,8 +3,13 @@ package com.scalableminds.webknossos.datastore.controllers import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.Fox import com.scalableminds.webknossos.datastore.helpers.NativeBucketScanner -import com.scalableminds.webknossos.datastore.models.datasource.ElementClass -import com.scalableminds.webknossos.datastore.services.ApplicationHealthService +import com.scalableminds.webknossos.datastore.models.datasource.{DataSourceId, ElementClass} +import com.scalableminds.webknossos.datastore.models.requests.DataServiceDataRequest +import com.scalableminds.webknossos.datastore.services.{ + AgglomerateService, + ApplicationHealthService, + ZarrAgglomerateService +} import com.scalableminds.webknossos.datastore.storage.DataStoreRedisStore import net.liftweb.common.Box.tryo @@ -13,8 +18,9 @@ import play.api.mvc.{Action, AnyContent} import scala.concurrent.ExecutionContext -class Application @Inject()(redisClient: DataStoreRedisStore, applicationHealthService: ApplicationHealthService)( - implicit ec: ExecutionContext) +class Application @Inject()(redisClient: DataStoreRedisStore, + applicationHealthService: ApplicationHealthService, + agglomerateService: ZarrAgglomerateService)(implicit ec: ExecutionContext) extends Controller { override def allowRemoteOrigin: Boolean = true @@ -34,6 +40,14 @@ class Application @Inject()(redisClient: DataStoreRedisStore, applicationHealthS } } + def testAgglomerateZarr: Action[AnyContent] = Action.async { implicit request => + log() { + for { + data <- agglomerateService.readFromSegmentToAgglomerate + } yield Ok(s"got ${data.length} bytes") + } + } + // Test that the NativeBucketScanner works. // The result is stored in a val because we expect that this continues to work if it works on startup. private lazy val testNativeBucketScanner = tryo { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala index 5c17139b01e..2a00dd70a7b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala @@ -116,8 +116,9 @@ class DatasetArray(vaultPath: VaultPath, } // returns byte array in fortran-order with little-endian values - private def readBytes(shape: Array[Int], offset: Array[Int])(implicit ec: ExecutionContext, - tc: TokenContext): Fox[Array[Byte]] = + // TODO should possibly be private again + def readBytes(shape: Array[Int], offset: Array[Int])(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Array[Byte]] = for { typedMultiArray <- readAsFortranOrder(shape, offset) asBytes <- BytesConverter.toByteArray(typedMultiArray, header.resolvedDataType, ByteOrder.LITTLE_ENDIAN).toFox diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala index 40ea0c2e934..5a7d0c7b807 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala @@ -22,10 +22,10 @@ object Zarr3Array extends LazyLogging with FoxImplicits { channelIndex: Option[Int], additionalAxes: Option[Seq[AdditionalAxis]], sharedChunkContentsCache: AlfuCache[String, MultiArray])(implicit ec: ExecutionContext, - tc: TokenContext): Fox[Zarr3Array] = + tc: TokenContext): Fox[Zarr3Array] = { + val headerPath = path / Zarr3ArrayHeader.FILENAME_ZARR_JSON for { - headerBytes <- (path / Zarr3ArrayHeader.FILENAME_ZARR_JSON) - .readBytes() ?~> s"Could not read header at ${Zarr3ArrayHeader.FILENAME_ZARR_JSON}" + headerBytes <- headerPath.readBytes() ?~> s"Could not read header at $headerPath" header <- JsonHelper.parseAs[Zarr3ArrayHeader](headerBytes).toFox ?~> "Could not parse array header" array <- tryo( new Zarr3Array(path, @@ -37,6 +37,7 @@ object Zarr3Array extends LazyLogging with FoxImplicits { additionalAxes, sharedChunkContentsCache)).toFox ?~> "Could not open zarr3 array" } yield array + } } class Zarr3Array(vaultPath: VaultPath, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 3f84333d6e4..e098be3c369 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -1,41 +1,89 @@ package com.scalableminds.webknossos.datastore.services import ch.systemsx.cisd.hdf5._ +import com.scalableminds.util.accesscontext.TokenContext +import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.geometry.Vec3Int import com.scalableminds.util.io.PathUtils import com.scalableminds.util.time.Instant +import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.AgglomerateGraph.{AgglomerateEdge, AgglomerateGraph} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.SkeletonTracing.{Edge, SkeletonTracing, Tree, TreeTypeProto} +import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array import com.scalableminds.webknossos.datastore.geometry.Vec3IntProto import com.scalableminds.webknossos.datastore.helpers.{NodeDefaults, SkeletonTracingDefaults} -import com.scalableminds.webknossos.datastore.models.datasource.ElementClass +import com.scalableminds.webknossos.datastore.models.datasource.{DataSourceId, ElementClass} import com.scalableminds.webknossos.datastore.models.requests.DataServiceDataRequest import com.scalableminds.webknossos.datastore.storage._ import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.{Box, Failure, Full} import net.liftweb.common.Box.tryo import org.apache.commons.io.FilenameUtils +import ucar.ma2.{Array => MultiArray} +import java.net.URI import java.nio._ import java.nio.file.{Files, Paths} import javax.inject.Inject import scala.annotation.tailrec import scala.collection.compat.immutable.ArraySeq +import scala.concurrent.ExecutionContext import scala.concurrent.duration.DurationInt -class ZarrAgglomerateService @Inject()(config: DataStoreConfig) extends DataConverter with LazyLogging { +class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService: DataVaultService) + extends DataConverter + with LazyLogging { private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) private val agglomerateDir = "agglomerates" private val agglomerateFileExtension = "" - def applyAgglomerateHdf5(request: DataServiceDataRequest)(data: Array[Byte]): Box[Array[Byte]] = tryo { + private lazy val sharedChunkContentsCache: AlfuCache[String, MultiArray] = { + // Used by DatasetArray-based datasets. Measure item weight in kilobytes because the weigher can only return int, not long + + val maxSizeKiloBytes = Math.floor(config.Datastore.Cache.ImageArrayChunks.maxSizeBytes.toDouble / 1000.0).toInt + + def cacheWeight(key: String, arrayBox: Box[MultiArray]): Int = + arrayBox match { + case Full(array) => + (array.getSizeBytes / 1000L).toInt + case _ => 0 + } + + AlfuCache(maxSizeKiloBytes, weighFn = Some(cacheWeight)) + } + + def readFromSegmentToAgglomerate(implicit ec: ExecutionContext): Fox[Array[Byte]] = { + val zarrGroupPath = + dataBaseDir + .resolve("sample_organization/test-agglomerate-file-zarr/segmentation/agglomerates/agglomerate_view_5") + .toAbsolutePath + for { + groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(new URI(s"file://$zarrGroupPath"), None)) + segmentToAgglomeratePath = groupVaultPath / "segment_to_agglomerate" + zarrArray <- Zarr3Array.open(segmentToAgglomeratePath, + DataSourceId("zarr", "test"), + "layer", + None, + None, + None, + sharedChunkContentsCache)(ec, TokenContext(None)) + read <- zarrArray.readBytes(Array(5), Array(0))(ec, TokenContext(None)) + _ = logger.info(s"read ${read.length} bytes from agglomerate file") + } yield read + } + + def applyAgglomerateHdf5(request: DataServiceDataRequest)(data: Array[Byte])( + implicit ec: ExecutionContext): Fox[Array[Byte]] = { val agglomerateFileKey = AgglomerateFileKey.fromDataRequest(request) - val zarrGroupPath = agglomerateFileKey.zarrGroupPath(dataBaseDir, agglomerateDir) + val zarrGroupPath = agglomerateFileKey.zarrGroupPath(dataBaseDir, agglomerateDir).toAbsolutePath + + for { + _ <- readFromSegmentToAgglomerate + } yield data - data } } @@ -46,7 +94,8 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateService: ZarrAgglomerateService) extends DataConverter - with LazyLogging { + with LazyLogging + with FoxImplicits { private val agglomerateDir = "agglomerates" private val agglomerateFileExtension = "hdf5" private val datasetName = "/segment_to_agglomerate" @@ -66,13 +115,14 @@ class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateServi } .toOption .getOrElse(Nil) - .toSet + .toSet ++ Set("agglomerate_view_5") // TODO } - def applyAgglomerate(request: DataServiceDataRequest)(data: Array[Byte]): Box[Array[Byte]] = + def applyAgglomerate(request: DataServiceDataRequest)(data: Array[Byte])( + implicit ec: ExecutionContext): Fox[Array[Byte]] = if (true) { zarrAgglomerateService.applyAgglomerateHdf5(request)(data) - } else applyAgglomerateHdf5(request)(data) + } else applyAgglomerateHdf5(request)(data).toFox private def applyAgglomerateHdf5(request: DataServiceDataRequest)(data: Array[Byte]): Box[Array[Byte]] = tryo { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala index 55837d02665..6d82ad62077 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala @@ -148,7 +148,7 @@ class BinaryDataService(val dataBaseDir: Path, convertIfNecessary( request.settings.appliedAgglomerate.isDefined && request.dataLayer.category == Category.segmentation && request.cuboid.mag.maxDim <= MaxMagForAgglomerateMapping, clippedData, - data => agglomerateService.applyAgglomerate(request)(data).toFox, + data => agglomerateService.applyAgglomerate(request)(data), request ) }.toFox.fillEmpty(Fox.successful(clippedData)) ?~> "Failed to apply agglomerate mapping" diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/AdHocMeshService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/AdHocMeshService.scala index 36e01971f44..cf8f5c2a135 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/AdHocMeshService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/AdHocMeshService.scala @@ -111,7 +111,7 @@ class AdHocMeshService(binaryDataService: BinaryDataService, Fox.successful(data) } - def applyAgglomerate(data: Array[Byte]): Box[Array[Byte]] = + def applyAgglomerate(data: Array[Byte]): Fox[Array[Byte]] = request.mapping match { case Some(_) => request.mappingType match { @@ -124,12 +124,12 @@ class AdHocMeshService(binaryDataService: BinaryDataService, DataServiceRequestSettings(halfByte = false, request.mapping, None) ) agglomerateService.applyAgglomerate(dataRequest)(data) - }.getOrElse(Full(data)) + }.getOrElse(Fox.successful(data)) case _ => - Full(data) + Fox.successful(data) } case _ => - Full(data) + Fox.successful(data) } def convertData(data: Array[Byte]): Array[T] = { @@ -193,7 +193,7 @@ class AdHocMeshService(binaryDataService: BinaryDataService, for { data <- binaryDataService.handleDataRequest(dataRequest) - agglomerateMappedData <- applyAgglomerate(data).toFox ?~> "failed to apply agglomerate for ad-hoc meshing" + agglomerateMappedData <- applyAgglomerate(data) ?~> "failed to apply agglomerate for ad-hoc meshing" typedData = convertData(agglomerateMappedData) mappedData <- applyMapping(typedData) mappedSegmentId <- applyMapping(Array(typedSegmentId)).map(_.head) diff --git a/webknossos-datastore/conf/com.scalableminds.webknossos.datastore.routes b/webknossos-datastore/conf/com.scalableminds.webknossos.datastore.routes index ef176f001ce..ea1aaa70266 100644 --- a/webknossos-datastore/conf/com.scalableminds.webknossos.datastore.routes +++ b/webknossos-datastore/conf/com.scalableminds.webknossos.datastore.routes @@ -3,6 +3,7 @@ # Health endpoint GET /health @com.scalableminds.webknossos.datastore.controllers.Application.health +GET /testAgglomerateZarr @com.scalableminds.webknossos.datastore.controllers.Application.testAgglomerateZarr # Read image data POST /datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.BinaryDataController.requestViaWebknossos(organizationId: String, datasetDirectoryName: String, dataLayerName: String) From f987ebfafa06487921746768e63349e3725800d9 Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 19 May 2025 13:49:32 +0200 Subject: [PATCH 004/100] axisOrder: make y optional --- .../datastore/datareaders/AxisOrder.scala | 22 +++++++++++++------ .../datastore/datareaders/DatasetHeader.scala | 10 ++++++--- .../datastore/datareaders/wkw/WKWArray.scala | 4 ++-- .../N5CompactMultiscalesExplorer.scala | 2 +- .../datastore/explore/N5Explorer.scala | 6 ++--- .../explore/NgffExplorationUtils.scala | 10 ++++----- .../services/AgglomerateService.scala | 3 ++- 7 files changed, 35 insertions(+), 22 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/AxisOrder.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/AxisOrder.scala index 809d4d5e5cf..9d44931e647 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/AxisOrder.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/AxisOrder.scala @@ -5,14 +5,19 @@ import play.api.libs.json.{JsValue, Json, OFormat} // Defines the axis order of a DatasetArray. Note that this ignores transpose codecs/ArrayOrder.F/C. // Those will have to be applied on individual chunk’s contents. -case class AxisOrder(x: Int, y: Int, z: Option[Int], c: Option[Int] = None) { +case class AxisOrder(x: Int, y: Option[Int], z: Option[Int], c: Option[Int] = None) { def hasZAxis: Boolean = z.isDefined + def yWithFallback: Int = y match { + case Some(value) => value + case None => Math.max(x, c.getOrElse(-1)) + 1 + } + def zWithFallback: Int = z match { case Some(value) => value // z is appended to the end of the array (this is reflected in DatasetArray adding 1 at the end of header datasetShape and chunkShape) - case None => Math.max(Math.max(x, y), c.getOrElse(-1)) + 1 + case None => Math.max(Math.max(x, yWithFallback), c.getOrElse(-1)) + 1 } def length: Int = { @@ -27,21 +32,22 @@ object AxisOrder { // assumes that the last three elements of the shape are z,y,x (standard in OME NGFF) def asZyxFromRank(rank: Int): AxisOrder = AxisOrder.xyz(rank - 1, rank - 2, rank - 3) - def xyz(x: Int, y: Int, z: Int): AxisOrder = AxisOrder(x, y, Some(z)) + def xyz(x: Int, y: Int, z: Int): AxisOrder = AxisOrder(x, Some(y), Some(z)) - def xyz: AxisOrder = AxisOrder(0, 1, Some(2)) + def xyz: AxisOrder = AxisOrder(0, Some(1), Some(2)) // assumes that the last three elements of the shape are (c),x,y,z (which is what WEBKNOSSOS sends to the frontend) def asCxyzFromRank(rank: Int): AxisOrder = if (rank == 3) AxisOrder.xyz(rank - 3, rank - 2, rank - 1) else - AxisOrder(rank - 3, rank - 2, Some(rank - 1), Some(rank - 4)) + AxisOrder(rank - 3, Some(rank - 2), Some(rank - 1), Some(rank - 4)) def cxyz: AxisOrder = asCxyzFromRank(rank = 4) // Additional coordinates are inserted between c and xyz - def cAdditionalxyz(rank: Int): AxisOrder = AxisOrder(c = Some(0), x = rank - 3, y = rank - 2, z = Some(rank - 1)) + def cAdditionalxyz(rank: Int): AxisOrder = + AxisOrder(c = Some(0), x = rank - 3, y = Some(rank - 2), z = Some(rank - 1)) implicit val jsonFormat: OFormat[AxisOrder] = Json.format[AxisOrder] } @@ -111,7 +117,9 @@ object FullAxisOrder { additionalAxes: Option[Seq[AdditionalAxis]]): FullAxisOrder = { val asArray: Array[Axis] = Array.fill(rank)(Axis("")) asArray(axisOrder.x) = Axis("x") - asArray(axisOrder.y) = Axis("y") + axisOrder.y.foreach { y => + asArray(y) = Axis("y") + } axisOrder.c.foreach { c => asArray(c) = Axis("c") } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetHeader.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetHeader.scala index 6907d49ecae..095f23a4609 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetHeader.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetHeader.scala @@ -40,13 +40,17 @@ trait DatasetHeader { def boundingBox(axisOrder: AxisOrder): Option[BoundingBox] = datasetShape.flatMap { shape => - if (Math.max(Math.max(axisOrder.x, axisOrder.y), axisOrder.zWithFallback) >= rank && axisOrder.hasZAxis) + if (Math.max(Math.max(axisOrder.x, axisOrder.yWithFallback), axisOrder.zWithFallback) >= rank && axisOrder.hasZAxis) None else { if (axisOrder.hasZAxis) { - Some(BoundingBox(Vec3Int.zeros, shape(axisOrder.x), shape(axisOrder.y), shape(axisOrder.zWithFallback))) + Some( + BoundingBox(Vec3Int.zeros, + shape(axisOrder.x), + shape(axisOrder.yWithFallback), + shape(axisOrder.zWithFallback))) } else { - Some(BoundingBox(Vec3Int.zeros, shape(axisOrder.x), shape(axisOrder.y), 1)) + Some(BoundingBox(Vec3Int.zeros, shape(axisOrder.x), shape(axisOrder.yWithFallback), 1)) } } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/wkw/WKWArray.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/wkw/WKWArray.scala index f5e7232f9f1..a9dddafdde8 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/wkw/WKWArray.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/wkw/WKWArray.scala @@ -112,7 +112,7 @@ class WKWArray(vaultPath: VaultPath, private def getChunkIndexInShardIndex(chunkIndex: Array[Int]): Box[Int] = { val x = chunkIndex(axisOrder.x) - val y = chunkIndex(axisOrder.y) + val y = chunkIndex(axisOrder.y.getOrElse(2)) val z = chunkIndex(axisOrder.z.getOrElse(3)) val chunkOffsetX = x % header.numChunksPerShardDimension val chunkOffsetY = y % header.numChunksPerShardDimension @@ -122,7 +122,7 @@ class WKWArray(vaultPath: VaultPath, override protected def getChunkFilename(chunkIndex: Array[Int]): String = { val x = chunkIndex(axisOrder.x) - val y = chunkIndex(axisOrder.y) + val y = chunkIndex(axisOrder.y.getOrElse(2)) val z = chunkIndex(axisOrder.z.getOrElse(3)) wkwFilePath(x, y, z) } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/N5CompactMultiscalesExplorer.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/N5CompactMultiscalesExplorer.scala index 6f42482ecac..91c03aff9e4 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/N5CompactMultiscalesExplorer.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/N5CompactMultiscalesExplorer.scala @@ -54,7 +54,7 @@ class N5CompactMultiscalesExplorer(implicit val ec: ExecutionContext) extends N5 for { mag <- tryo( Vec3Int(downsamplingFactor(axisOrder.x), - downsamplingFactor(axisOrder.y), + downsamplingFactor(axisOrder.yWithFallback), downsamplingFactor(axisOrder.zWithFallback))).toFox magPath = remotePath / s"s$magIndex" headerPath = magPath / N5Header.FILENAME_ATTRIBUTES_JSON diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/N5Explorer.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/N5Explorer.scala index b17f7e1184f..700fde5ca99 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/N5Explorer.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/N5Explorer.scala @@ -19,7 +19,7 @@ trait N5Explorer extends RemoteLayerExplorer { case Some(units) => for { xUnitFactor <- spaceUnitToNmFactor(units(axisOrder.x)) - yUnitFactor <- spaceUnitToNmFactor(units(axisOrder.y)) + yUnitFactor <- spaceUnitToNmFactor(units(axisOrder.yWithFallback)) zUnitFactor <- spaceUnitToNmFactor(units(axisOrder.zWithFallback)) } yield Vec3Double(xUnitFactor, yUnitFactor, zUnitFactor) case None => Fox.successful(Vec3Double(1e3, 1e3, 1e3)) // assume default micrometers @@ -52,11 +52,11 @@ trait N5Explorer extends RemoteLayerExplorer { val cOpt = if (c == -1) None else Some(c) for { _ <- Fox.fromBool(x >= 0 && y >= 0 && z >= 0) ?~> s"invalid xyz axis order: $x,$y,$z." - } yield AxisOrder(x, y, Some(z), cOpt) + } yield AxisOrder(x, Some(y), Some(z), cOpt) } protected def extractVoxelSizeInAxisUnits(scale: List[Double], axisOrder: AxisOrder): Fox[Vec3Double] = - tryo(Vec3Double(scale(axisOrder.x), scale(axisOrder.y), scale(axisOrder.zWithFallback))).toFox + tryo(Vec3Double(scale(axisOrder.x), scale(axisOrder.yWithFallback), scale(axisOrder.zWithFallback))).toFox protected def layerFromMagsWithAttributes(magsWithAttributes: List[MagWithAttributes], remotePath: VaultPath): Fox[N5Layer] = diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffExplorationUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffExplorationUtils.scala index c62c1ddd5b2..948c23f2283 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffExplorationUtils.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffExplorationUtils.scala @@ -106,9 +106,9 @@ trait NgffExplorationUtils extends FoxImplicits { _ <- Fox.fromBool(x >= 0 && y >= 0) ?~> s"invalid xyz axis order: $x,$y,$z. ${x >= 0 && y >= 0}" } yield if (z >= 0) { - AxisOrder(x, y, Some(z), cOpt) + AxisOrder(x, Some(y), Some(z), cOpt) } else { - AxisOrder(x, y, None, cOpt) + AxisOrder(x, Some(y), None, cOpt) } } @@ -116,7 +116,7 @@ trait NgffExplorationUtils extends FoxImplicits { implicit ec: ExecutionContext): Fox[LengthUnit] = for { xUnit <- axes(axisOrder.x).lengthUnit.toFox - yUnit <- axes(axisOrder.y).lengthUnit.toFox + yUnit <- axes(axisOrder.yWithFallback).lengthUnit.toFox zUnitOpt <- Fox.runIf(axisOrder.hasZAxis)(axes(axisOrder.zWithFallback).lengthUnit.toFox) units: List[LengthUnit] = List(Some(xUnit), Some(yUnit), zUnitOpt).flatten } yield units.minBy(LengthUnit.toNanometer) @@ -125,7 +125,7 @@ trait NgffExplorationUtils extends FoxImplicits { implicit ec: ExecutionContext): Fox[Vec3Double] = for { xUnitToNm <- axes(axisOrder.x).lengthUnit.map(LengthUnit.toNanometer).toFox - yUnitToNm <- axes(axisOrder.y).lengthUnit.map(LengthUnit.toNanometer).toFox + yUnitToNm <- axes(axisOrder.yWithFallback).lengthUnit.map(LengthUnit.toNanometer).toFox zUnitToNmOpt <- Fox.runIf(axisOrder.hasZAxis)( axes(axisOrder.zWithFallback).lengthUnit.map(LengthUnit.toNanometer).toFox) xUnitToTarget = xUnitToNm / LengthUnit.toNanometer(unifiedAxisUnit) @@ -177,7 +177,7 @@ trait NgffExplorationUtils extends FoxImplicits { val filtered = coordinateTransforms.filter(_.`type` == "scale") val scalesFromTransforms = filtered.flatMap(_.scale) val xFactors = scalesFromTransforms.map(_(axisOrder.x)) - val yFactors = scalesFromTransforms.map(_(axisOrder.y)) + val yFactors = scalesFromTransforms.map(_(axisOrder.yWithFallback)) val zFactors = if (axisOrder.hasZAxis) scalesFromTransforms.map(_(axisOrder.zWithFallback)) else Seq(1.0, 1.0) Vec3Double(xFactors.product, yFactors.product, zFactors.product) } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index e098be3c369..c05f74d4c49 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -10,6 +10,7 @@ import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.AgglomerateGraph.{AgglomerateEdge, AgglomerateGraph} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.SkeletonTracing.{Edge, SkeletonTracing, Tree, TreeTypeProto} +import com.scalableminds.webknossos.datastore.datareaders.AxisOrder import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array import com.scalableminds.webknossos.datastore.geometry.Vec3IntProto import com.scalableminds.webknossos.datastore.helpers.{NodeDefaults, SkeletonTracingDefaults} @@ -64,7 +65,7 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService zarrArray <- Zarr3Array.open(segmentToAgglomeratePath, DataSourceId("zarr", "test"), "layer", - None, + Some(AxisOrder(0, None, None)), None, None, sharedChunkContentsCache)(ec, TokenContext(None)) From 7c1cc8bf033606775b79889d59f87adc333371c1 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 22 May 2025 16:08:22 +0200 Subject: [PATCH 005/100] undo attempt to make axisOrder.y optional --- .../datastore/datareaders/AxisOrder.scala | 22 ++++++------------- .../datastore/datareaders/DatasetHeader.scala | 10 +++------ .../datastore/datareaders/wkw/WKWArray.scala | 4 ++-- .../N5CompactMultiscalesExplorer.scala | 2 +- .../datastore/explore/N5Explorer.scala | 6 ++--- .../explore/NgffExplorationUtils.scala | 10 ++++----- 6 files changed, 21 insertions(+), 33 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/AxisOrder.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/AxisOrder.scala index 9d44931e647..809d4d5e5cf 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/AxisOrder.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/AxisOrder.scala @@ -5,19 +5,14 @@ import play.api.libs.json.{JsValue, Json, OFormat} // Defines the axis order of a DatasetArray. Note that this ignores transpose codecs/ArrayOrder.F/C. // Those will have to be applied on individual chunk’s contents. -case class AxisOrder(x: Int, y: Option[Int], z: Option[Int], c: Option[Int] = None) { +case class AxisOrder(x: Int, y: Int, z: Option[Int], c: Option[Int] = None) { def hasZAxis: Boolean = z.isDefined - def yWithFallback: Int = y match { - case Some(value) => value - case None => Math.max(x, c.getOrElse(-1)) + 1 - } - def zWithFallback: Int = z match { case Some(value) => value // z is appended to the end of the array (this is reflected in DatasetArray adding 1 at the end of header datasetShape and chunkShape) - case None => Math.max(Math.max(x, yWithFallback), c.getOrElse(-1)) + 1 + case None => Math.max(Math.max(x, y), c.getOrElse(-1)) + 1 } def length: Int = { @@ -32,22 +27,21 @@ object AxisOrder { // assumes that the last three elements of the shape are z,y,x (standard in OME NGFF) def asZyxFromRank(rank: Int): AxisOrder = AxisOrder.xyz(rank - 1, rank - 2, rank - 3) - def xyz(x: Int, y: Int, z: Int): AxisOrder = AxisOrder(x, Some(y), Some(z)) + def xyz(x: Int, y: Int, z: Int): AxisOrder = AxisOrder(x, y, Some(z)) - def xyz: AxisOrder = AxisOrder(0, Some(1), Some(2)) + def xyz: AxisOrder = AxisOrder(0, 1, Some(2)) // assumes that the last three elements of the shape are (c),x,y,z (which is what WEBKNOSSOS sends to the frontend) def asCxyzFromRank(rank: Int): AxisOrder = if (rank == 3) AxisOrder.xyz(rank - 3, rank - 2, rank - 1) else - AxisOrder(rank - 3, Some(rank - 2), Some(rank - 1), Some(rank - 4)) + AxisOrder(rank - 3, rank - 2, Some(rank - 1), Some(rank - 4)) def cxyz: AxisOrder = asCxyzFromRank(rank = 4) // Additional coordinates are inserted between c and xyz - def cAdditionalxyz(rank: Int): AxisOrder = - AxisOrder(c = Some(0), x = rank - 3, y = Some(rank - 2), z = Some(rank - 1)) + def cAdditionalxyz(rank: Int): AxisOrder = AxisOrder(c = Some(0), x = rank - 3, y = rank - 2, z = Some(rank - 1)) implicit val jsonFormat: OFormat[AxisOrder] = Json.format[AxisOrder] } @@ -117,9 +111,7 @@ object FullAxisOrder { additionalAxes: Option[Seq[AdditionalAxis]]): FullAxisOrder = { val asArray: Array[Axis] = Array.fill(rank)(Axis("")) asArray(axisOrder.x) = Axis("x") - axisOrder.y.foreach { y => - asArray(y) = Axis("y") - } + asArray(axisOrder.y) = Axis("y") axisOrder.c.foreach { c => asArray(c) = Axis("c") } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetHeader.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetHeader.scala index 095f23a4609..6907d49ecae 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetHeader.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetHeader.scala @@ -40,17 +40,13 @@ trait DatasetHeader { def boundingBox(axisOrder: AxisOrder): Option[BoundingBox] = datasetShape.flatMap { shape => - if (Math.max(Math.max(axisOrder.x, axisOrder.yWithFallback), axisOrder.zWithFallback) >= rank && axisOrder.hasZAxis) + if (Math.max(Math.max(axisOrder.x, axisOrder.y), axisOrder.zWithFallback) >= rank && axisOrder.hasZAxis) None else { if (axisOrder.hasZAxis) { - Some( - BoundingBox(Vec3Int.zeros, - shape(axisOrder.x), - shape(axisOrder.yWithFallback), - shape(axisOrder.zWithFallback))) + Some(BoundingBox(Vec3Int.zeros, shape(axisOrder.x), shape(axisOrder.y), shape(axisOrder.zWithFallback))) } else { - Some(BoundingBox(Vec3Int.zeros, shape(axisOrder.x), shape(axisOrder.yWithFallback), 1)) + Some(BoundingBox(Vec3Int.zeros, shape(axisOrder.x), shape(axisOrder.y), 1)) } } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/wkw/WKWArray.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/wkw/WKWArray.scala index a9dddafdde8..f5e7232f9f1 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/wkw/WKWArray.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/wkw/WKWArray.scala @@ -112,7 +112,7 @@ class WKWArray(vaultPath: VaultPath, private def getChunkIndexInShardIndex(chunkIndex: Array[Int]): Box[Int] = { val x = chunkIndex(axisOrder.x) - val y = chunkIndex(axisOrder.y.getOrElse(2)) + val y = chunkIndex(axisOrder.y) val z = chunkIndex(axisOrder.z.getOrElse(3)) val chunkOffsetX = x % header.numChunksPerShardDimension val chunkOffsetY = y % header.numChunksPerShardDimension @@ -122,7 +122,7 @@ class WKWArray(vaultPath: VaultPath, override protected def getChunkFilename(chunkIndex: Array[Int]): String = { val x = chunkIndex(axisOrder.x) - val y = chunkIndex(axisOrder.y.getOrElse(2)) + val y = chunkIndex(axisOrder.y) val z = chunkIndex(axisOrder.z.getOrElse(3)) wkwFilePath(x, y, z) } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/N5CompactMultiscalesExplorer.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/N5CompactMultiscalesExplorer.scala index 91c03aff9e4..6f42482ecac 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/N5CompactMultiscalesExplorer.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/N5CompactMultiscalesExplorer.scala @@ -54,7 +54,7 @@ class N5CompactMultiscalesExplorer(implicit val ec: ExecutionContext) extends N5 for { mag <- tryo( Vec3Int(downsamplingFactor(axisOrder.x), - downsamplingFactor(axisOrder.yWithFallback), + downsamplingFactor(axisOrder.y), downsamplingFactor(axisOrder.zWithFallback))).toFox magPath = remotePath / s"s$magIndex" headerPath = magPath / N5Header.FILENAME_ATTRIBUTES_JSON diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/N5Explorer.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/N5Explorer.scala index 700fde5ca99..b17f7e1184f 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/N5Explorer.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/N5Explorer.scala @@ -19,7 +19,7 @@ trait N5Explorer extends RemoteLayerExplorer { case Some(units) => for { xUnitFactor <- spaceUnitToNmFactor(units(axisOrder.x)) - yUnitFactor <- spaceUnitToNmFactor(units(axisOrder.yWithFallback)) + yUnitFactor <- spaceUnitToNmFactor(units(axisOrder.y)) zUnitFactor <- spaceUnitToNmFactor(units(axisOrder.zWithFallback)) } yield Vec3Double(xUnitFactor, yUnitFactor, zUnitFactor) case None => Fox.successful(Vec3Double(1e3, 1e3, 1e3)) // assume default micrometers @@ -52,11 +52,11 @@ trait N5Explorer extends RemoteLayerExplorer { val cOpt = if (c == -1) None else Some(c) for { _ <- Fox.fromBool(x >= 0 && y >= 0 && z >= 0) ?~> s"invalid xyz axis order: $x,$y,$z." - } yield AxisOrder(x, Some(y), Some(z), cOpt) + } yield AxisOrder(x, y, Some(z), cOpt) } protected def extractVoxelSizeInAxisUnits(scale: List[Double], axisOrder: AxisOrder): Fox[Vec3Double] = - tryo(Vec3Double(scale(axisOrder.x), scale(axisOrder.yWithFallback), scale(axisOrder.zWithFallback))).toFox + tryo(Vec3Double(scale(axisOrder.x), scale(axisOrder.y), scale(axisOrder.zWithFallback))).toFox protected def layerFromMagsWithAttributes(magsWithAttributes: List[MagWithAttributes], remotePath: VaultPath): Fox[N5Layer] = diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffExplorationUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffExplorationUtils.scala index 948c23f2283..c62c1ddd5b2 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffExplorationUtils.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffExplorationUtils.scala @@ -106,9 +106,9 @@ trait NgffExplorationUtils extends FoxImplicits { _ <- Fox.fromBool(x >= 0 && y >= 0) ?~> s"invalid xyz axis order: $x,$y,$z. ${x >= 0 && y >= 0}" } yield if (z >= 0) { - AxisOrder(x, Some(y), Some(z), cOpt) + AxisOrder(x, y, Some(z), cOpt) } else { - AxisOrder(x, Some(y), None, cOpt) + AxisOrder(x, y, None, cOpt) } } @@ -116,7 +116,7 @@ trait NgffExplorationUtils extends FoxImplicits { implicit ec: ExecutionContext): Fox[LengthUnit] = for { xUnit <- axes(axisOrder.x).lengthUnit.toFox - yUnit <- axes(axisOrder.yWithFallback).lengthUnit.toFox + yUnit <- axes(axisOrder.y).lengthUnit.toFox zUnitOpt <- Fox.runIf(axisOrder.hasZAxis)(axes(axisOrder.zWithFallback).lengthUnit.toFox) units: List[LengthUnit] = List(Some(xUnit), Some(yUnit), zUnitOpt).flatten } yield units.minBy(LengthUnit.toNanometer) @@ -125,7 +125,7 @@ trait NgffExplorationUtils extends FoxImplicits { implicit ec: ExecutionContext): Fox[Vec3Double] = for { xUnitToNm <- axes(axisOrder.x).lengthUnit.map(LengthUnit.toNanometer).toFox - yUnitToNm <- axes(axisOrder.yWithFallback).lengthUnit.map(LengthUnit.toNanometer).toFox + yUnitToNm <- axes(axisOrder.y).lengthUnit.map(LengthUnit.toNanometer).toFox zUnitToNmOpt <- Fox.runIf(axisOrder.hasZAxis)( axes(axisOrder.zWithFallback).lengthUnit.map(LengthUnit.toNanometer).toFox) xUnitToTarget = xUnitToNm / LengthUnit.toNanometer(unifiedAxisUnit) @@ -177,7 +177,7 @@ trait NgffExplorationUtils extends FoxImplicits { val filtered = coordinateTransforms.filter(_.`type` == "scale") val scalesFromTransforms = filtered.flatMap(_.scale) val xFactors = scalesFromTransforms.map(_(axisOrder.x)) - val yFactors = scalesFromTransforms.map(_(axisOrder.yWithFallback)) + val yFactors = scalesFromTransforms.map(_(axisOrder.y)) val zFactors = if (axisOrder.hasZAxis) scalesFromTransforms.map(_(axisOrder.zWithFallback)) else Seq(1.0, 1.0) Vec3Double(xFactors.product, yFactors.product, zFactors.product) } From 5af14fcbb83f78971eeea7c86c89353f96b3a16c Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 22 May 2025 16:33:35 +0200 Subject: [PATCH 006/100] read multi array, ignoring underlying storage and axis order --- .../datastore/controllers/Application.scala | 2 +- .../datastore/datareaders/DatasetArray.scala | 63 ++++++++++++++++--- .../services/AgglomerateService.scala | 10 +-- 3 files changed, 62 insertions(+), 13 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala index 3ede25cb42f..f22b572a55d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala @@ -44,7 +44,7 @@ class Application @Inject()(redisClient: DataStoreRedisStore, log() { for { data <- agglomerateService.readFromSegmentToAgglomerate - } yield Ok(s"got ${data.length} bytes") + } yield Ok(s"got ${data.getSize} elements of type ${data.getDataType}: ${data.toString}") } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala index 2a00dd70a7b..242fc71453a 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala @@ -116,9 +116,8 @@ class DatasetArray(vaultPath: VaultPath, } // returns byte array in fortran-order with little-endian values - // TODO should possibly be private again - def readBytes(shape: Array[Int], offset: Array[Int])(implicit ec: ExecutionContext, - tc: TokenContext): Fox[Array[Byte]] = + private def readBytes(shape: Array[Int], offset: Array[Int])(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Array[Byte]] = for { typedMultiArray <- readAsFortranOrder(shape, offset) asBytes <- BytesConverter.toByteArray(typedMultiArray, header.resolvedDataType, ByteOrder.LITTLE_ENDIAN).toFox @@ -158,7 +157,7 @@ class DatasetArray(vaultPath: VaultPath, fullAxisOrder.permuteIndicesArrayToWk(chunkShape), shape, totalOffset) - if (partialCopyingIsNotNeeded(shape, totalOffset, chunkIndices)) { + if (partialCopyingIsNotNeededForWkOrder(shape, totalOffset, chunkIndices)) { for { chunkIndex <- chunkIndices.headOption.toFox sourceChunk: MultiArray <- getSourceChunkDataWithCache(fullAxisOrder.permuteIndicesWkToArray(chunkIndex), @@ -185,10 +184,44 @@ class DatasetArray(vaultPath: VaultPath, } } + def readAsMultiArray(shape: Array[Int], offset: Array[Int])(implicit ec: ExecutionContext, + tc: TokenContext): Fox[MultiArray] = { + val totalOffset: Array[Int] = offset.zip(header.voxelOffset).map { case (o, v) => o - v }.padTo(offset.length, 0) + val chunkIndices = ChunkUtils.computeChunkIndices(datasetShape, chunkShape, shape, totalOffset) + if (partialCopyingIsNotNeededForMultiArray(shape, totalOffset, chunkIndices)) { + for { + chunkIndex <- chunkIndices.headOption.toFox + sourceChunk: MultiArray <- getSourceChunkDataWithCache(chunkIndex, useSkipTypingShortcut = true) + } yield sourceChunk + } else { + val targetBuffer = MultiArrayUtils.createDataBuffer(header.resolvedDataType, shape) + val targetMultiArray = MultiArrayUtils.createArrayWithGivenStorage(targetBuffer, shape.reverse) + val copiedFuture = Fox.combined(chunkIndices.map { chunkIndex: Array[Int] => + for { + sourceChunk: MultiArray <- getSourceChunkDataWithCache(chunkIndex) + offsetInChunk = computeOffsetInChunkIgnoringAxisOrder(chunkIndex, totalOffset).reverse + _ <- tryo(MultiArrayUtils.copyRange(offsetInChunk, sourceChunk, targetMultiArray)).toFox ?~> formatCopyRangeErrorWithoutAxisOrder( + offsetInChunk, + sourceChunk, + targetMultiArray) + } yield () + }) + for { + _ <- copiedFuture + } yield targetMultiArray + } + } + private def formatCopyRangeError(offsetInChunk: Array[Int], sourceChunk: MultiArray, target: MultiArray): String = s"Copying data from dataset chunk failed. Chunk shape (F): ${printAsOuterF(sourceChunk.getShape)}, target shape (F): ${printAsOuterF( target.getShape)}, offsetInChunk: ${printAsOuterF(offsetInChunk)}. Axis order (C): $fullAxisOrder (outer: ${fullAxisOrder.toStringWk})" + private def formatCopyRangeErrorWithoutAxisOrder(offsetInChunk: Array[Int], + sourceChunk: MultiArray, + target: MultiArray): String = + s"Copying data from dataset chunk failed. Chunk shape ${sourceChunk.getShape.mkString(",")}, target shape ${target.getShape + .mkString(",")}, offsetInChunk: ${offsetInChunk.mkString(",")}" + protected def getShardedChunkPathAndRange( chunkIndex: Array[Int])(implicit ec: ExecutionContext, tc: TokenContext): Fox[(VaultPath, NumericRange[Long])] = ??? // Defined in subclass @@ -225,9 +258,20 @@ class DatasetArray(vaultPath: VaultPath, chunkIndex.drop(1).mkString(header.dimension_separator.toString) // (c),x,y,z -> z is dropped in 2d case } - private def partialCopyingIsNotNeeded(bufferShape: Array[Int], - globalOffset: Array[Int], - chunkIndices: List[Array[Int]]): Boolean = + private def partialCopyingIsNotNeededForMultiArray(bufferShape: Array[Int], + globalOffset: Array[Int], + chunkIndices: List[Array[Int]]): Boolean = + chunkIndices match { + case chunkIndex :: Nil => + val offsetInChunk = computeOffsetInChunkIgnoringAxisOrder(chunkIndex, globalOffset) + isZeroOffset(offsetInChunk) && + isBufferShapeEqualChunkShape(bufferShape) + case _ => false + } + + private def partialCopyingIsNotNeededForWkOrder(bufferShape: Array[Int], + globalOffset: Array[Int], + chunkIndices: List[Array[Int]]): Boolean = chunkIndices match { case chunkIndex :: Nil => val offsetInChunk = computeOffsetInChunk(chunkIndex, globalOffset) @@ -249,6 +293,11 @@ class DatasetArray(vaultPath: VaultPath, globalOffset(dim) - (chunkIndex(dim) * fullAxisOrder.permuteIndicesArrayToWk(chunkShape)(dim)) }.toArray + private def computeOffsetInChunkIgnoringAxisOrder(chunkIndex: Array[Int], globalOffset: Array[Int]): Array[Int] = + chunkIndex.indices.map { dim => + globalOffset(dim) - (chunkIndex(dim) * chunkShape(dim)) + }.toArray + override def toString: String = s"${getClass.getCanonicalName} fullAxisOrder=$fullAxisOrder shape=${header.datasetShape.map(s => printAsInner(s))} chunkShape=${printAsInner( header.chunkShape)} dtype=${header.resolvedDataType} fillValue=${header.fillValueNumber}, ${header.compressorImpl}, byteOrder=${header.byteOrder}, vault=${vaultPath.summary}}" diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index c05f74d4c49..a50489364b4 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -54,10 +54,10 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService AlfuCache(maxSizeKiloBytes, weighFn = Some(cacheWeight)) } - def readFromSegmentToAgglomerate(implicit ec: ExecutionContext): Fox[Array[Byte]] = { + def readFromSegmentToAgglomerate(implicit ec: ExecutionContext): Fox[ucar.ma2.Array] = { val zarrGroupPath = dataBaseDir - .resolve("sample_organization/test-agglomerate-file-zarr/segmentation/agglomerates/agglomerate_view_5") + .resolve("sample_organization/test-agglomerate-file-zarr/segmentation/agglomerates/agglomerate_view_55") .toAbsolutePath for { groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(new URI(s"file://$zarrGroupPath"), None)) @@ -65,12 +65,12 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService zarrArray <- Zarr3Array.open(segmentToAgglomeratePath, DataSourceId("zarr", "test"), "layer", - Some(AxisOrder(0, None, None)), + None, None, None, sharedChunkContentsCache)(ec, TokenContext(None)) - read <- zarrArray.readBytes(Array(5), Array(0))(ec, TokenContext(None)) - _ = logger.info(s"read ${read.length} bytes from agglomerate file") + read <- zarrArray.readAsMultiArray(Array(10), Array(2))(ec, TokenContext(None)) + _ = logger.info(s"read ${read.getSize} bytes from agglomerate file") } yield read } From f055c1e6970f41d273e5fec736c88d7161390ada Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 27 May 2025 10:07:00 +0200 Subject: [PATCH 007/100] apply agglomerate --- .../datastore/datareaders/DatasetArray.scala | 5 +- .../datareaders/zarr3/Zarr3Array.scala | 2 +- .../services/AgglomerateService.scala | 76 +++++++++++++++---- 3 files changed, 68 insertions(+), 15 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala index 242fc71453a..d1080732ec5 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala @@ -8,6 +8,7 @@ import com.scalableminds.webknossos.datastore.datavault.VaultPath import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId import com.scalableminds.webknossos.datastore.models.AdditionalCoordinate import com.scalableminds.webknossos.datastore.models.datasource.AdditionalAxis +import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.Box.tryo import ucar.ma2.{Array => MultiArray} @@ -26,7 +27,8 @@ class DatasetArray(vaultPath: VaultPath, channelIndex: Option[Int], additionalAxes: Option[Seq[AdditionalAxis]], sharedChunkContentsCache: AlfuCache[String, MultiArray]) - extends FoxImplicits { + extends FoxImplicits + with LazyLogging { protected lazy val fullAxisOrder: FullAxisOrder = FullAxisOrder.fromAxisOrderAndAdditionalAxes(rank, axisOrder, additionalAxes) @@ -242,6 +244,7 @@ class DatasetArray(vaultPath: VaultPath, if (header.isSharded) { for { (shardPath, chunkRange) <- getShardedChunkPathAndRange(chunkIndex) ?~> "chunk.getShardedPathAndRange.failed" + _ = logger.info(s"chunk cache miss for $shardPath chunk ${chunkIndex.mkString(",")} ") chunkShape = chunkShapeAtIndex(chunkIndex) multiArray <- chunkReader.read(shardPath, chunkShape, Some(chunkRange), useSkipTypingShortcut) } yield multiArray diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala index 5a7d0c7b807..ce417854119 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala @@ -121,7 +121,7 @@ class Zarr3Array(vaultPath: VaultPath, private def readAndParseShardIndex(shardPath: VaultPath)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[(Long, Long)]] = for { - shardIndexRaw <- readShardIndex(shardPath) + shardIndexRaw <- readShardIndex(shardPath) ?~> "readShardIndex.failed" parsed = parseShardIndex(shardIndexRaw) } yield parsed diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index a50489364b4..a6a97c0aad0 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -10,7 +10,7 @@ import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.AgglomerateGraph.{AgglomerateEdge, AgglomerateGraph} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.SkeletonTracing.{Edge, SkeletonTracing, Tree, TreeTypeProto} -import com.scalableminds.webknossos.datastore.datareaders.AxisOrder +import com.scalableminds.webknossos.datastore.datareaders.{AxisOrder, DatasetArray} import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array import com.scalableminds.webknossos.datastore.geometry.Vec3IntProto import com.scalableminds.webknossos.datastore.helpers.{NodeDefaults, SkeletonTracingDefaults} @@ -39,6 +39,9 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService private val agglomerateDir = "agglomerates" private val agglomerateFileExtension = "" + private lazy val openArraysCache = AlfuCache[String, DatasetArray]() + + // TODO unify with existing chunkContentsCache from binaryDataService private lazy val sharedChunkContentsCache: AlfuCache[String, MultiArray] = { // Used by DatasetArray-based datasets. Measure item weight in kilobytes because the weigher can only return int, not long @@ -54,14 +57,32 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService AlfuCache(maxSizeKiloBytes, weighFn = Some(cacheWeight)) } - def readFromSegmentToAgglomerate(implicit ec: ExecutionContext): Fox[ucar.ma2.Array] = { + def readFromSegmentToAgglomerate(implicit ec: ExecutionContext, tc: TokenContext): Fox[ucar.ma2.Array] = + for { + zarrArray <- openZarrArrayCached("segment_to_agglomerate") + read <- zarrArray.readAsMultiArray(Array(10), Array(2)) + _ = logger.info(s"read ${read.getSize} elements from agglomerate file segmentToAgglomerate") + } yield read + + private def mapSingleSegment(segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = + for { + zarrArray <- openZarrArrayCached("segment_to_agglomerate") + // TODO remove the toInt + asMultiArray <- zarrArray.readAsMultiArray(offset = Array(segmentId.toInt), shape = Array(1)) + } yield asMultiArray.getLong(0) + + private def openZarrArrayCached(zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext) = + openArraysCache.getOrLoad(zarrArrayName, zarrArrayName => openZarrArray(zarrArrayName)) + + private def openZarrArray(zarrArrayName: String)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[DatasetArray] = { val zarrGroupPath = dataBaseDir .resolve("sample_organization/test-agglomerate-file-zarr/segmentation/agglomerates/agglomerate_view_55") .toAbsolutePath for { groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(new URI(s"file://$zarrGroupPath"), None)) - segmentToAgglomeratePath = groupVaultPath / "segment_to_agglomerate" + segmentToAgglomeratePath = groupVaultPath / zarrArrayName zarrArray <- Zarr3Array.open(segmentToAgglomeratePath, DataSourceId("zarr", "test"), "layer", @@ -69,21 +90,50 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService None, None, sharedChunkContentsCache)(ec, TokenContext(None)) - read <- zarrArray.readAsMultiArray(Array(10), Array(2))(ec, TokenContext(None)) - _ = logger.info(s"read ${read.getSize} bytes from agglomerate file") - } yield read + } yield zarrArray } - def applyAgglomerateHdf5(request: DataServiceDataRequest)(data: Array[Byte])( - implicit ec: ExecutionContext): Fox[Array[Byte]] = { + def applyAgglomerate(request: DataServiceDataRequest)(data: Array[Byte])(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Array[Byte]] = { val agglomerateFileKey = AgglomerateFileKey.fromDataRequest(request) - val zarrGroupPath = agglomerateFileKey.zarrGroupPath(dataBaseDir, agglomerateDir).toAbsolutePath - for { - _ <- readFromSegmentToAgglomerate - } yield data + def convertToAgglomerate(segmentIds: Array[Long], + bytesPerElement: Int, + putToBufferFunction: (ByteBuffer, Long) => ByteBuffer): Fox[Array[Byte]] = + for { + agglomerateIds <- Fox.serialCombined(segmentIds)(mapSingleSegment) + mappedBytes = agglomerateIds + .foldLeft(ByteBuffer.allocate(bytesPerElement * segmentIds.length).order(ByteOrder.LITTLE_ENDIAN))( + putToBufferFunction) + .array + } yield mappedBytes + + val bytesPerElement = ElementClass.bytesPerElement(request.dataLayer.elementClass) + /* Every value of the segmentation data needs to be converted to Long to then look up the + agglomerate id in the segment-to-agglomerate array. + The value is first converted to the primitive signed number types, and then converted + to Long via uByteToLong, uShortToLong etc, which perform bitwise and to take care of + the unsigned semantics. Using functions avoids allocating intermediate SegmentInteger objects. + Allocating a fixed-length LongBuffer first is a further performance optimization. + */ + convertData(data, request.dataLayer.elementClass) match { + case data: Array[Byte] => + val longBuffer = LongBuffer.allocate(data.length) + data.foreach(e => longBuffer.put(uByteToLong(e))) + convertToAgglomerate(longBuffer.array, bytesPerElement, putByte) + case data: Array[Short] => + val longBuffer = LongBuffer.allocate(data.length) + data.foreach(e => longBuffer.put(uShortToLong(e))) + convertToAgglomerate(longBuffer.array, bytesPerElement, putShort) + case data: Array[Int] => + val longBuffer = LongBuffer.allocate(data.length) + data.foreach(e => longBuffer.put(uIntToLong(e))) + convertToAgglomerate(longBuffer.array, bytesPerElement, putInt) + case data: Array[Long] => convertToAgglomerate(data, bytesPerElement, putLong) + case _ => Fox.successful(data) + } } @@ -122,7 +172,7 @@ class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateServi def applyAgglomerate(request: DataServiceDataRequest)(data: Array[Byte])( implicit ec: ExecutionContext): Fox[Array[Byte]] = if (true) { - zarrAgglomerateService.applyAgglomerateHdf5(request)(data) + zarrAgglomerateService.applyAgglomerate(request)(data)(ec, TokenContext(None)) } else applyAgglomerateHdf5(request)(data).toFox private def applyAgglomerateHdf5(request: DataServiceDataRequest)(data: Array[Byte]): Box[Array[Byte]] = tryo { From 13ff0e3ca4588119940fcc507c9b955873d6ca73 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 27 May 2025 10:13:28 +0200 Subject: [PATCH 008/100] offset can be long; pass tokencontext --- .../datastore/datareaders/ChunkUtils.scala | 6 +++--- .../datastore/datareaders/DatasetArray.scala | 14 +++++++------- .../datastore/services/AgglomerateService.scala | 9 ++++----- .../datastore/services/BinaryDataService.scala | 3 ++- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkUtils.scala index 60378c05c7b..f2917923e4a 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkUtils.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkUtils.scala @@ -4,7 +4,7 @@ object ChunkUtils { def computeChunkIndices(arrayShapeOpt: Option[Array[Int]], arrayChunkShape: Array[Int], selectedShape: Array[Int], - selectedOffset: Array[Int]): List[Array[Int]] = { + selectedOffset: Array[Long]): List[Array[Int]] = { val nDims = arrayChunkShape.length val start = new Array[Int](nDims) val end = new Array[Int](nDims) @@ -12,10 +12,10 @@ object ChunkUtils { for (dim <- 0 until nDims) { val largestPossibleIndex = arrayShapeOpt.map(arrayShape => (arrayShape(dim) - 1) / arrayChunkShape(dim)) val smallestPossibleIndex = 0 - val startIndexRaw = selectedOffset(dim) / arrayChunkShape(dim) + val startIndexRaw = (selectedOffset(dim) / arrayChunkShape(dim)).toInt val startIndexClamped = Math.max(smallestPossibleIndex, Math.min(largestPossibleIndex.getOrElse(startIndexRaw), startIndexRaw)) - val endIndexRaw = (selectedOffset(dim) + selectedShape(dim) - 1) / arrayChunkShape(dim) + val endIndexRaw = ((selectedOffset(dim) + selectedShape(dim) - 1) / arrayChunkShape(dim)).toInt val endIndexClampedToBbox = Math.max(smallestPossibleIndex, Math.min(largestPossibleIndex.getOrElse(endIndexRaw), endIndexRaw)) val endIndexClamped = Math.max(startIndexClamped, endIndexClampedToBbox) // end index must be greater or equal to start index diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala index d1080732ec5..76ab19b7e84 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala @@ -158,7 +158,7 @@ class DatasetArray(vaultPath: VaultPath, val chunkIndices = ChunkUtils.computeChunkIndices(datasetShape.map(fullAxisOrder.permuteIndicesArrayToWk), fullAxisOrder.permuteIndicesArrayToWk(chunkShape), shape, - totalOffset) + totalOffset.map(_.toLong)) if (partialCopyingIsNotNeededForWkOrder(shape, totalOffset, chunkIndices)) { for { chunkIndex <- chunkIndices.headOption.toFox @@ -186,9 +186,9 @@ class DatasetArray(vaultPath: VaultPath, } } - def readAsMultiArray(shape: Array[Int], offset: Array[Int])(implicit ec: ExecutionContext, - tc: TokenContext): Fox[MultiArray] = { - val totalOffset: Array[Int] = offset.zip(header.voxelOffset).map { case (o, v) => o - v }.padTo(offset.length, 0) + def readAsMultiArray(shape: Array[Int], offset: Array[Long])(implicit ec: ExecutionContext, + tc: TokenContext): Fox[MultiArray] = { + val totalOffset: Array[Long] = offset.zip(header.voxelOffset).map { case (o, v) => o - v }.padTo(offset.length, 0) val chunkIndices = ChunkUtils.computeChunkIndices(datasetShape, chunkShape, shape, totalOffset) if (partialCopyingIsNotNeededForMultiArray(shape, totalOffset, chunkIndices)) { for { @@ -262,7 +262,7 @@ class DatasetArray(vaultPath: VaultPath, } private def partialCopyingIsNotNeededForMultiArray(bufferShape: Array[Int], - globalOffset: Array[Int], + globalOffset: Array[Long], chunkIndices: List[Array[Int]]): Boolean = chunkIndices match { case chunkIndex :: Nil => @@ -296,9 +296,9 @@ class DatasetArray(vaultPath: VaultPath, globalOffset(dim) - (chunkIndex(dim) * fullAxisOrder.permuteIndicesArrayToWk(chunkShape)(dim)) }.toArray - private def computeOffsetInChunkIgnoringAxisOrder(chunkIndex: Array[Int], globalOffset: Array[Int]): Array[Int] = + private def computeOffsetInChunkIgnoringAxisOrder(chunkIndex: Array[Int], globalOffset: Array[Long]): Array[Int] = chunkIndex.indices.map { dim => - globalOffset(dim) - (chunkIndex(dim) * chunkShape(dim)) + (globalOffset(dim) - (chunkIndex(dim).toLong * chunkShape(dim).toLong)).toInt }.toArray override def toString: String = diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index a6a97c0aad0..db34ed3c269 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -67,8 +67,7 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService private def mapSingleSegment(segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = for { zarrArray <- openZarrArrayCached("segment_to_agglomerate") - // TODO remove the toInt - asMultiArray <- zarrArray.readAsMultiArray(offset = Array(segmentId.toInt), shape = Array(1)) + asMultiArray <- zarrArray.readAsMultiArray(offset = Array(segmentId), shape = Array(1)) } yield asMultiArray.getLong(0) private def openZarrArrayCached(zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext) = @@ -169,10 +168,10 @@ class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateServi .toSet ++ Set("agglomerate_view_5") // TODO } - def applyAgglomerate(request: DataServiceDataRequest)(data: Array[Byte])( - implicit ec: ExecutionContext): Fox[Array[Byte]] = + def applyAgglomerate(request: DataServiceDataRequest)(data: Array[Byte])(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Array[Byte]] = if (true) { - zarrAgglomerateService.applyAgglomerate(request)(data)(ec, TokenContext(None)) + zarrAgglomerateService.applyAgglomerate(request)(data) } else applyAgglomerateHdf5(request)(data).toFox private def applyAgglomerateHdf5(request: DataServiceDataRequest)(data: Array[Byte]): Box[Array[Byte]] = tryo { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala index 6d82ad62077..58d4dd006e1 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala @@ -136,7 +136,8 @@ class BinaryDataService(val dataBaseDir: Path, Full(outputArray) } - private def convertAccordingToRequest(request: DataServiceDataRequest, inputArray: Array[Byte]): Fox[Array[Byte]] = + private def convertAccordingToRequest(request: DataServiceDataRequest, inputArray: Array[Byte])( + implicit tc: TokenContext): Fox[Array[Byte]] = for { clippedData <- convertIfNecessary( !request.cuboid.toMag1BoundingBox.isFullyContainedIn(request.dataLayer.boundingBox), From d8533899ed1abe0a850c6ba48faae305603f69a6 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 27 May 2025 11:54:00 +0200 Subject: [PATCH 009/100] WIP read agglomerate skeleton --- conf/application.conf | 2 +- .../controllers/DataSourceController.scala | 8 +- .../datastore/datareaders/DatasetArray.scala | 4 +- .../services/AgglomerateService.scala | 328 +++++++++++------- 4 files changed, 218 insertions(+), 124 deletions(-) diff --git a/conf/application.conf b/conf/application.conf index aac6419d24e..c295317e578 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -347,4 +347,4 @@ pidfile.path = "/dev/null" # uncomment these lines for faster restart during local backend development (but beware the then-missing features): -#slick.checkSchemaOnStartup = false +slick.checkSchemaOnStartup = false diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index c6f84eaf0e0..c7bcdb229de 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -277,9 +277,11 @@ class DataSourceController @Inject()( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - skeleton <- agglomerateService - .generateSkeleton(organizationId, datasetDirectoryName, dataLayerName, mappingName, agglomerateId) - .toFox ?~> "agglomerateSkeleton.failed" + skeleton <- agglomerateService.generateSkeleton(organizationId, + datasetDirectoryName, + dataLayerName, + mappingName, + agglomerateId) ?~> "agglomerateSkeleton.failed" } yield Ok(skeleton.toByteArray).as(protobufMimeType) } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala index 76ab19b7e84..4efc89f0c81 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala @@ -3,6 +3,7 @@ package com.scalableminds.webknossos.datastore.datareaders import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.geometry.Vec3Int +import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.datavault.VaultPath import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId @@ -201,7 +202,7 @@ class DatasetArray(vaultPath: VaultPath, val copiedFuture = Fox.combined(chunkIndices.map { chunkIndex: Array[Int] => for { sourceChunk: MultiArray <- getSourceChunkDataWithCache(chunkIndex) - offsetInChunk = computeOffsetInChunkIgnoringAxisOrder(chunkIndex, totalOffset).reverse + offsetInChunk = computeOffsetInChunkIgnoringAxisOrder(chunkIndex, totalOffset) _ <- tryo(MultiArrayUtils.copyRange(offsetInChunk, sourceChunk, targetMultiArray)).toFox ?~> formatCopyRangeErrorWithoutAxisOrder( offsetInChunk, sourceChunk, @@ -244,7 +245,6 @@ class DatasetArray(vaultPath: VaultPath, if (header.isSharded) { for { (shardPath, chunkRange) <- getShardedChunkPathAndRange(chunkIndex) ?~> "chunk.getShardedPathAndRange.failed" - _ = logger.info(s"chunk cache miss for $shardPath chunk ${chunkIndex.mkString(",")} ") chunkShape = chunkShapeAtIndex(chunkIndex) multiArray <- chunkReader.read(shardPath, chunkShape, Some(chunkRange), useSkipTypingShortcut) } yield multiArray diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index db34ed3c269..10eedf6e8b5 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -10,10 +10,10 @@ import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.AgglomerateGraph.{AgglomerateEdge, AgglomerateGraph} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.SkeletonTracing.{Edge, SkeletonTracing, Tree, TreeTypeProto} -import com.scalableminds.webknossos.datastore.datareaders.{AxisOrder, DatasetArray} +import com.scalableminds.webknossos.datastore.datareaders.DatasetArray import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array import com.scalableminds.webknossos.datastore.geometry.Vec3IntProto -import com.scalableminds.webknossos.datastore.helpers.{NodeDefaults, SkeletonTracingDefaults} +import com.scalableminds.webknossos.datastore.helpers.{NativeBucketScanner, NodeDefaults, SkeletonTracingDefaults} import com.scalableminds.webknossos.datastore.models.datasource.{DataSourceId, ElementClass} import com.scalableminds.webknossos.datastore.models.requests.DataServiceDataRequest import com.scalableminds.webknossos.datastore.storage._ @@ -21,7 +21,7 @@ import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.{Box, Failure, Full} import net.liftweb.common.Box.tryo import org.apache.commons.io.FilenameUtils -import ucar.ma2.{Array => MultiArray} +import ucar.ma2.{DataType, Index2D, Array => MultiArray} import java.net.URI import java.nio._ @@ -37,17 +37,16 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService with LazyLogging { private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) private val agglomerateDir = "agglomerates" - private val agglomerateFileExtension = "" private lazy val openArraysCache = AlfuCache[String, DatasetArray]() - // TODO unify with existing chunkContentsCache from binaryDataService + // TODO unify with existing chunkContentsCache from binaryDataService? private lazy val sharedChunkContentsCache: AlfuCache[String, MultiArray] = { // Used by DatasetArray-based datasets. Measure item weight in kilobytes because the weigher can only return int, not long val maxSizeKiloBytes = Math.floor(config.Datastore.Cache.ImageArrayChunks.maxSizeBytes.toDouble / 1000.0).toInt - def cacheWeight(key: String, arrayBox: Box[MultiArray]): Int = + def cacheWeight(_key: String, arrayBox: Box[MultiArray]): Int = arrayBox match { case Full(array) => (array.getSizeBytes / 1000L).toInt @@ -57,6 +56,8 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService AlfuCache(maxSizeKiloBytes, weighFn = Some(cacheWeight)) } + protected lazy val bucketScanner = new NativeBucketScanner() + def readFromSegmentToAgglomerate(implicit ec: ExecutionContext, tc: TokenContext): Fox[ucar.ma2.Array] = for { zarrArray <- openZarrArrayCached("segment_to_agglomerate") @@ -64,10 +65,10 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService _ = logger.info(s"read ${read.getSize} elements from agglomerate file segmentToAgglomerate") } yield read - private def mapSingleSegment(segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = + private def mapSingleSegment(zarrArray: DatasetArray, segmentId: Long)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Long] = for { - zarrArray <- openZarrArrayCached("segment_to_agglomerate") - asMultiArray <- zarrArray.readAsMultiArray(offset = Array(segmentId), shape = Array(1)) + asMultiArray <- zarrArray.readAsMultiArray(shape = Array(1), offset = Array(segmentId)) } yield asMultiArray.getLong(0) private def openZarrArrayCached(zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext) = @@ -88,7 +89,7 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService None, None, None, - sharedChunkContentsCache)(ec, TokenContext(None)) + sharedChunkContentsCache) } yield zarrArray } @@ -99,43 +100,126 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService val zarrGroupPath = agglomerateFileKey.zarrGroupPath(dataBaseDir, agglomerateDir).toAbsolutePath def convertToAgglomerate(segmentIds: Array[Long], + relevantAgglomerateMap: Map[Long, Long], bytesPerElement: Int, - putToBufferFunction: (ByteBuffer, Long) => ByteBuffer): Fox[Array[Byte]] = - for { - agglomerateIds <- Fox.serialCombined(segmentIds)(mapSingleSegment) - mappedBytes = agglomerateIds - .foldLeft(ByteBuffer.allocate(bytesPerElement * segmentIds.length).order(ByteOrder.LITTLE_ENDIAN))( - putToBufferFunction) - .array - } yield mappedBytes + putToBufferFunction: (ByteBuffer, Long) => ByteBuffer): Array[Byte] = { + val agglomerateIds = segmentIds.map(relevantAgglomerateMap) + agglomerateIds + .foldLeft(ByteBuffer.allocate(bytesPerElement * segmentIds.length).order(ByteOrder.LITTLE_ENDIAN))( + putToBufferFunction) + .array + } val bytesPerElement = ElementClass.bytesPerElement(request.dataLayer.elementClass) - /* Every value of the segmentation data needs to be converted to Long to then look up the - agglomerate id in the segment-to-agglomerate array. - The value is first converted to the primitive signed number types, and then converted - to Long via uByteToLong, uShortToLong etc, which perform bitwise and to take care of - the unsigned semantics. Using functions avoids allocating intermediate SegmentInteger objects. - Allocating a fixed-length LongBuffer first is a further performance optimization. - */ - convertData(data, request.dataLayer.elementClass) match { - case data: Array[Byte] => - val longBuffer = LongBuffer.allocate(data.length) - data.foreach(e => longBuffer.put(uByteToLong(e))) - convertToAgglomerate(longBuffer.array, bytesPerElement, putByte) - case data: Array[Short] => - val longBuffer = LongBuffer.allocate(data.length) - data.foreach(e => longBuffer.put(uShortToLong(e))) - convertToAgglomerate(longBuffer.array, bytesPerElement, putShort) - case data: Array[Int] => - val longBuffer = LongBuffer.allocate(data.length) - data.foreach(e => longBuffer.put(uIntToLong(e))) - convertToAgglomerate(longBuffer.array, bytesPerElement, putInt) - case data: Array[Long] => convertToAgglomerate(data, bytesPerElement, putLong) - case _ => Fox.successful(data) - } + val distinctSegmentIds = + bucketScanner.collectSegmentIds(data, bytesPerElement, isSigned = false, skipZeroes = false) + for { + zarrArray <- openZarrArrayCached("segment_to_agglomerate") + beforeBuildMap = Instant.now + relevantAgglomerateMap: Map[Long, Long] <- Fox + .serialCombined(distinctSegmentIds) { segmentId => + mapSingleSegment(zarrArray, segmentId).map((segmentId, _)) + } + .map(_.toMap) + _ = Instant.logSince(beforeBuildMap, "build map") + mappedBytes: Array[Byte] = convertData(data, request.dataLayer.elementClass) match { + case data: Array[Byte] => + val longBuffer = LongBuffer.allocate(data.length) + data.foreach(e => longBuffer.put(uByteToLong(e))) + convertToAgglomerate(longBuffer.array, relevantAgglomerateMap, bytesPerElement, putByte) + case data: Array[Short] => + val longBuffer = LongBuffer.allocate(data.length) + data.foreach(e => longBuffer.put(uShortToLong(e))) + convertToAgglomerate(longBuffer.array, relevantAgglomerateMap, bytesPerElement, putShort) + case data: Array[Int] => + val longBuffer = LongBuffer.allocate(data.length) + data.foreach(e => longBuffer.put(uIntToLong(e))) + convertToAgglomerate(longBuffer.array, relevantAgglomerateMap, bytesPerElement, putInt) + case data: Array[Long] => convertToAgglomerate(data, relevantAgglomerateMap, bytesPerElement, putLong) + case _ => data + } + } yield mappedBytes } + def generateSkeleton(organizationId: String, + datasetDirectoryName: String, + dataLayerName: String, + mappingName: String, + agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[SkeletonTracing] = + for { + before <- Instant.nowFox + agglomerate_to_segments_offsets <- openZarrArrayCached("agglomerate_to_segments_offsets") + agglomerate_to_edges_offsets <- openZarrArrayCached("agglomerate_to_edges_offsets") + + positionsRange: MultiArray <- agglomerate_to_segments_offsets.readAsMultiArray(shape = Array(2), + offset = Array(agglomerateId)) + edgesRange: MultiArray <- agglomerate_to_edges_offsets.readAsMultiArray(shape = Array(2), + offset = Array(agglomerateId)) + nodeCount = positionsRange.getLong(1) - positionsRange.getLong(0) + edgeCount = edgesRange.getLong(1) - edgesRange.getLong(0) + edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges + _ <- Fox.fromBool(nodeCount <= edgeLimit) ?~> s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)" + _ <- Fox.fromBool(edgeCount <= edgeLimit) ?~> s"Agglomerate has too many edges ($edgeCount > $edgeLimit)" + positions: MultiArray <- if (nodeCount == 0L) { + Fox.successful(MultiArray.factory(DataType.LONG, Array(0, 0))) + } else { + for { + agglomerate_to_positions <- openZarrArrayCached("agglomerate_to_positions") + positions <- agglomerate_to_positions.readAsMultiArray(offset = Array(positionsRange.getLong(0), 0), + shape = Array(nodeCount.toInt, 3)) + } yield positions + } + edges: MultiArray <- if (edgeCount == 0L) { + Fox.successful(MultiArray.factory(DataType.LONG, Array(0, 0))) + } else { + for { + agglomerate_to_edges <- openZarrArrayCached("agglomerate_to_edges") + edges <- agglomerate_to_edges.readAsMultiArray(offset = Array(edgesRange.getLong(0), 0), + shape = Array(edgeCount.toInt, 2)) + } yield edges + } + + nodeIdStartAtOneOffset = 1 + + nodes = (0 until nodeCount.toInt).map { nodeIdx => + NodeDefaults.createInstance.copy( + id = nodeIdx + nodeIdStartAtOneOffset, + position = Vec3IntProto(positions.getInt(new Index2D(Array(nodeIdx, 0))), + positions.getInt(new Index2D(Array(nodeIdx, 1))), + positions.getInt(new Index2D(Array(nodeIdx, 2)))) + ) + } + + skeletonEdges = (0 until edges.getShape()(1)).map { edgeIdx => + Edge(source = edges.getInt(new Index2D(Array(edgeIdx, 0))) + nodeIdStartAtOneOffset, + target = edges.getInt(new Index2D(Array(edgeIdx, 1))) + nodeIdStartAtOneOffset) + } + + trees = Seq( + Tree( + treeId = math.abs(agglomerateId.toInt), // used only to deterministically select tree color + createdTimestamp = System.currentTimeMillis(), + // unsafeWrapArray is fine, because the underlying arrays are never mutated + nodes = nodes, + edges = skeletonEdges, + name = s"agglomerate $agglomerateId ($mappingName)", + `type` = Some(TreeTypeProto.AGGLOMERATE) + )) + + skeleton = SkeletonTracingDefaults.createInstance.copy( + datasetName = datasetDirectoryName, + trees = trees + ) + + _ = if (Instant.since(before) > (100 milliseconds)) { + Instant.logSince( + before, + s"Generating skeleton from agglomerate file with ${skeletonEdges.length} edges, ${nodes.length} nodes", + logger) + } + + } yield skeleton } class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConverter with LazyLogging { @@ -229,7 +313,7 @@ class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateServi // We don't need to differentiate between the data types because the underlying library does the conversion for us reader.uint64().readArrayBlockWithOffset(hdf5Dataset, blockSize.toInt, segmentId) - // This uses the datasetDirectoryName, which allows us to call it on the same hdf file in parallel. + // This uses the datasetName, which allows us to call it on the same hdf file in parallel. private def readHDF(reader: IHDF5Reader, segmentId: Long, blockSize: Long) = reader.uint64().readArrayBlockWithOffset(datasetName, blockSize.toInt, segmentId) @@ -268,89 +352,97 @@ class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateServi datasetDirectoryName: String, dataLayerName: String, mappingName: String, - agglomerateId: Long): Box[SkeletonTracing] = - try { - val before = Instant.now - val hdfFile = - dataBaseDir - .resolve(organizationId) - .resolve(datasetDirectoryName) - .resolve(dataLayerName) - .resolve(agglomerateDir) - .resolve(s"$mappingName.$agglomerateFileExtension") - .toFile - - val reader = HDF5FactoryProvider.get.openForReading(hdfFile) - val positionsRange: Array[Long] = - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId) - val edgesRange: Array[Long] = - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_edges_offsets", 2, agglomerateId) - - val nodeCount = positionsRange(1) - positionsRange(0) - val edgeCount = edgesRange(1) - edgesRange(0) - val edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges - if (nodeCount > edgeLimit) { - throw new Exception(s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)") - } - if (edgeCount > edgeLimit) { - throw new Exception(s"Agglomerate has too many edges ($edgeCount > $edgeLimit)") - } - val positions: Array[Array[Long]] = - if (nodeCount == 0L) { - Array.empty[Array[Long]] - } else { - reader - .uint64() - .readMatrixBlockWithOffset("/agglomerate_to_positions", nodeCount.toInt, 3, positionsRange(0), 0) + agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[SkeletonTracing] = + if (true) { + zarrAgglomerateService.generateSkeleton(organizationId, + datasetDirectoryName, + dataLayerName, + mappingName, + agglomerateId) + } else { + (try { + val before = Instant.now + val hdfFile = + dataBaseDir + .resolve(organizationId) + .resolve(datasetDirectoryName) + .resolve(dataLayerName) + .resolve(agglomerateDir) + .resolve(s"$mappingName.$agglomerateFileExtension") + .toFile + + val reader = HDF5FactoryProvider.get.openForReading(hdfFile) + val positionsRange: Array[Long] = + reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId) + val edgesRange: Array[Long] = + reader.uint64().readArrayBlockWithOffset("/agglomerate_to_edges_offsets", 2, agglomerateId) + + val nodeCount = positionsRange(1) - positionsRange(0) + val edgeCount = edgesRange(1) - edgesRange(0) + val edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges + if (nodeCount > edgeLimit) { + throw new Exception(s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)") } - val edges: Array[Array[Long]] = { - if (edgeCount == 0L) { - Array.empty[Array[Long]] - } else { - reader.uint64().readMatrixBlockWithOffset("/agglomerate_to_edges", edgeCount.toInt, 2, edgesRange(0), 0) + if (edgeCount > edgeLimit) { + throw new Exception(s"Agglomerate has too many edges ($edgeCount > $edgeLimit)") + } + val positions: Array[Array[Long]] = + if (nodeCount == 0L) { + Array.empty[Array[Long]] + } else { + reader + .uint64() + .readMatrixBlockWithOffset("/agglomerate_to_positions", nodeCount.toInt, 3, positionsRange(0), 0) + } + val edges: Array[Array[Long]] = { + if (edgeCount == 0L) { + Array.empty[Array[Long]] + } else { + reader.uint64().readMatrixBlockWithOffset("/agglomerate_to_edges", edgeCount.toInt, 2, edgesRange(0), 0) + } } - } - - val nodeIdStartAtOneOffset = 1 - - val nodes = positions.zipWithIndex.map { - case (pos, idx) => - NodeDefaults.createInstance.copy( - id = idx + nodeIdStartAtOneOffset, - position = Vec3IntProto(pos(0).toInt, pos(1).toInt, pos(2).toInt) - ) - } - val skeletonEdges = edges.map { e => - Edge(source = e(0).toInt + nodeIdStartAtOneOffset, target = e(1).toInt + nodeIdStartAtOneOffset) - } + val nodeIdStartAtOneOffset = 1 - val trees = Seq( - Tree( - treeId = math.abs(agglomerateId.toInt), // used only to deterministically select tree color - createdTimestamp = System.currentTimeMillis(), - // unsafeWrapArray is fine, because the underlying arrays are never mutated - nodes = ArraySeq.unsafeWrapArray(nodes), - edges = ArraySeq.unsafeWrapArray(skeletonEdges), - name = s"agglomerate $agglomerateId ($mappingName)", - `type` = Some(TreeTypeProto.AGGLOMERATE) - )) + val nodes = positions.zipWithIndex.map { + case (pos, idx) => + NodeDefaults.createInstance.copy( + id = idx + nodeIdStartAtOneOffset, + position = Vec3IntProto(pos(0).toInt, pos(1).toInt, pos(2).toInt) + ) + } - val skeleton = SkeletonTracingDefaults.createInstance.copy( - datasetName = datasetDirectoryName, - trees = trees - ) + val skeletonEdges = edges.map { e => + Edge(source = e(0).toInt + nodeIdStartAtOneOffset, target = e(1).toInt + nodeIdStartAtOneOffset) + } - if (Instant.since(before) > (100 milliseconds)) { - Instant.logSince( - before, - s"Generating skeleton from agglomerate file with ${skeletonEdges.length} edges, ${nodes.length} nodes", - logger) - } + val trees = Seq( + Tree( + treeId = math.abs(agglomerateId.toInt), // used only to deterministically select tree color + createdTimestamp = System.currentTimeMillis(), + // unsafeWrapArray is fine, because the underlying arrays are never mutated + nodes = ArraySeq.unsafeWrapArray(nodes), + edges = ArraySeq.unsafeWrapArray(skeletonEdges), + name = s"agglomerate $agglomerateId ($mappingName)", + `type` = Some(TreeTypeProto.AGGLOMERATE) + )) + + val skeleton = SkeletonTracingDefaults.createInstance.copy( + datasetName = datasetDirectoryName, + trees = trees + ) + + if (Instant.since(before) > (100 milliseconds)) { + Instant.logSince( + before, + s"Generating skeleton from agglomerate file with ${skeletonEdges.length} edges, ${nodes.length} nodes", + logger) + } - Full(skeleton) - } catch { - case e: Exception => Failure(e.getMessage) + Full(skeleton) + } catch { + case e: Exception => Failure(e.getMessage) + }).toFox } def largestAgglomerateId(agglomerateFileKey: AgglomerateFileKey): Box[Long] = { From 56ce08ba40f59cacb42290dc8320fd0cb5e88538 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 27 May 2025 13:53:58 +0200 Subject: [PATCH 010/100] fix reading agglomerate skeleton --- .../datastore/datareaders/DatasetArray.scala | 2 +- .../services/AgglomerateService.scala | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala index 4efc89f0c81..c09f0500d75 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala @@ -198,7 +198,7 @@ class DatasetArray(vaultPath: VaultPath, } yield sourceChunk } else { val targetBuffer = MultiArrayUtils.createDataBuffer(header.resolvedDataType, shape) - val targetMultiArray = MultiArrayUtils.createArrayWithGivenStorage(targetBuffer, shape.reverse) + val targetMultiArray = MultiArrayUtils.createArrayWithGivenStorage(targetBuffer, shape) val copiedFuture = Fox.combined(chunkIndices.map { chunkIndex: Array[Int] => for { sourceChunk: MultiArray <- getSourceChunkDataWithCache(chunkIndex) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 10eedf6e8b5..7fed7a1a999 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -21,7 +21,7 @@ import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.{Box, Failure, Full} import net.liftweb.common.Box.tryo import org.apache.commons.io.FilenameUtils -import ucar.ma2.{DataType, Index2D, Array => MultiArray} +import ucar.ma2.{DataType, Array => MultiArray} import java.net.URI import java.nio._ @@ -182,18 +182,23 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService nodeIdStartAtOneOffset = 1 + // TODO use multiarray index iterators? nodes = (0 until nodeCount.toInt).map { nodeIdx => NodeDefaults.createInstance.copy( id = nodeIdx + nodeIdStartAtOneOffset, - position = Vec3IntProto(positions.getInt(new Index2D(Array(nodeIdx, 0))), - positions.getInt(new Index2D(Array(nodeIdx, 1))), - positions.getInt(new Index2D(Array(nodeIdx, 2)))) + position = Vec3IntProto( + positions.getInt(positions.getIndex.set(Array(nodeIdx, 0))), + positions.getInt(positions.getIndex.set(Array(nodeIdx, 1))), + positions.getInt(positions.getIndex.set(Array(nodeIdx, 2))) + ) ) } - skeletonEdges = (0 until edges.getShape()(1)).map { edgeIdx => - Edge(source = edges.getInt(new Index2D(Array(edgeIdx, 0))) + nodeIdStartAtOneOffset, - target = edges.getInt(new Index2D(Array(edgeIdx, 1))) + nodeIdStartAtOneOffset) + skeletonEdges = (0 until edges.getShape()(0)).map { edgeIdx => + Edge( + source = edges.getInt(edges.getIndex.set(Array(edgeIdx, 0))) + nodeIdStartAtOneOffset, + target = edges.getInt(edges.getIndex.set(Array(edgeIdx, 1))) + nodeIdStartAtOneOffset + ) } trees = Seq( From 2855d2b31468c88c7fd09550f0fd2f1a6635ab89 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 27 May 2025 14:20:04 +0200 Subject: [PATCH 011/100] Change DatasetArray shape from Int to Long. Implement reading largestAgglomerateId --- .../datastore/dataformats/wkw/WKWHeader.scala | 2 +- .../datastore/datareaders/AxisOrder.scala | 3 +++ .../datastore/datareaders/ChunkUtils.scala | 8 ++++---- .../datastore/datareaders/DatasetArray.scala | 19 +++++++++++-------- .../datastore/datareaders/DatasetHeader.scala | 10 +++++++--- .../datastore/datareaders/n5/N5Header.scala | 4 ++-- .../precomputed/PrecomputedHeader.scala | 6 +++--- .../datastore/datareaders/wkw/WKWArray.scala | 2 +- .../datareaders/zarr/ZarrHeader.scala | 6 +++--- .../datareaders/zarr3/Zarr3ArrayHeader.scala | 8 ++++---- .../explore/NgffExplorationUtils.scala | 6 +++--- .../datastore/explore/NgffV0_4Explorer.scala | 4 ++-- .../datastore/explore/NgffV0_5Explorer.scala | 2 +- .../explore/PrecomputedExplorer.scala | 2 +- .../services/AgglomerateService.scala | 12 +++++++++++- ...VolumeTracingZarrStreamingController.scala | 6 +++--- 16 files changed, 60 insertions(+), 40 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/dataformats/wkw/WKWHeader.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/dataformats/wkw/WKWHeader.scala index d694ef96163..adde6bb2817 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/dataformats/wkw/WKWHeader.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/dataformats/wkw/WKWHeader.scala @@ -78,7 +78,7 @@ case class WKWHeader( } } - override def datasetShape: Option[Array[Int]] = None + override def datasetShape: Option[Array[Long]] = None override def chunkShape: Array[Int] = Array(numChannels, numVoxelsPerChunkDimension, numVoxelsPerChunkDimension, numVoxelsPerChunkDimension) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/AxisOrder.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/AxisOrder.scala index 809d4d5e5cf..b8deb810cd6 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/AxisOrder.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/AxisOrder.scala @@ -96,6 +96,9 @@ case class FullAxisOrder(axes: Seq[Axis]) { def permuteIndicesArrayToWk(indices: Array[Int]): Array[Int] = arrayToWkPermutation.map(indices(_)) + def permuteIndicesArrayToWkLong(indices: Array[Long]): Array[Long] = + arrayToWkPermutation.map(indices(_)) + def toWkLibsJson: JsValue = Json.toJson(axes.zipWithIndex.collect { case (axis, index) if axis.name == "x" || axis.name == "y" || axis.name == "z" => diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkUtils.scala index f2917923e4a..8959b15e2bc 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkUtils.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkUtils.scala @@ -1,16 +1,16 @@ package com.scalableminds.webknossos.datastore.datareaders object ChunkUtils { - def computeChunkIndices(arrayShapeOpt: Option[Array[Int]], + def computeChunkIndices(arrayShapeOpt: Option[Array[Long]], arrayChunkShape: Array[Int], selectedShape: Array[Int], - selectedOffset: Array[Long]): List[Array[Int]] = { + selectedOffset: Array[Long]): Seq[Array[Int]] = { val nDims = arrayChunkShape.length val start = new Array[Int](nDims) val end = new Array[Int](nDims) var numChunks = 1 for (dim <- 0 until nDims) { - val largestPossibleIndex = arrayShapeOpt.map(arrayShape => (arrayShape(dim) - 1) / arrayChunkShape(dim)) + val largestPossibleIndex = arrayShapeOpt.map(arrayShape => ((arrayShape(dim) - 1) / arrayChunkShape(dim)).toInt) val smallestPossibleIndex = 0 val startIndexRaw = (selectedOffset(dim) / arrayChunkShape(dim)).toInt val startIndexClamped = @@ -38,6 +38,6 @@ object ChunkUtils { dimIndex = -1 } } - chunkIndices.toList + chunkIndices.toSeq } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala index c09f0500d75..c4b5309caab 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala @@ -51,7 +51,7 @@ class DatasetArray(vaultPath: VaultPath, header.rank + 1 } - lazy val datasetShape: Option[Array[Int]] = if (axisOrder.hasZAxis) { + lazy val datasetShape: Option[Array[Long]] = if (axisOrder.hasZAxis) { header.datasetShape } else { header.datasetShape.map(shape => shape :+ 1) @@ -156,10 +156,12 @@ class DatasetArray(vaultPath: VaultPath, private def readAsFortranOrder(shape: Array[Int], offset: Array[Int])(implicit ec: ExecutionContext, tc: TokenContext): Fox[MultiArray] = { val totalOffset: Array[Int] = offset.zip(header.voxelOffset).map { case (o, v) => o - v }.padTo(offset.length, 0) - val chunkIndices = ChunkUtils.computeChunkIndices(datasetShape.map(fullAxisOrder.permuteIndicesArrayToWk), - fullAxisOrder.permuteIndicesArrayToWk(chunkShape), - shape, - totalOffset.map(_.toLong)) + val chunkIndices = ChunkUtils.computeChunkIndices( + datasetShape.map(fullAxisOrder.permuteIndicesArrayToWkLong), + fullAxisOrder.permuteIndicesArrayToWk(chunkShape), + shape, + totalOffset.map(_.toLong) + ) if (partialCopyingIsNotNeededForWkOrder(shape, totalOffset, chunkIndices)) { for { chunkIndex <- chunkIndices.headOption.toFox @@ -263,7 +265,7 @@ class DatasetArray(vaultPath: VaultPath, private def partialCopyingIsNotNeededForMultiArray(bufferShape: Array[Int], globalOffset: Array[Long], - chunkIndices: List[Array[Int]]): Boolean = + chunkIndices: Seq[Array[Int]]): Boolean = chunkIndices match { case chunkIndex :: Nil => val offsetInChunk = computeOffsetInChunkIgnoringAxisOrder(chunkIndex, globalOffset) @@ -274,7 +276,7 @@ class DatasetArray(vaultPath: VaultPath, private def partialCopyingIsNotNeededForWkOrder(bufferShape: Array[Int], globalOffset: Array[Int], - chunkIndices: List[Array[Int]]): Boolean = + chunkIndices: Seq[Array[Int]]): Boolean = chunkIndices match { case chunkIndex :: Nil => val offsetInChunk = computeOffsetInChunk(chunkIndex, globalOffset) @@ -301,8 +303,9 @@ class DatasetArray(vaultPath: VaultPath, (globalOffset(dim) - (chunkIndex(dim).toLong * chunkShape(dim).toLong)).toInt }.toArray + // TODO works only for wk dataet arrays, not agglomerate files override def toString: String = - s"${getClass.getCanonicalName} fullAxisOrder=$fullAxisOrder shape=${header.datasetShape.map(s => printAsInner(s))} chunkShape=${printAsInner( + s"${getClass.getCanonicalName} fullAxisOrder=$fullAxisOrder shape=${header.datasetShape.map(s => printAsInner(s.map(_.toInt)))} chunkShape=${printAsInner( header.chunkShape)} dtype=${header.resolvedDataType} fillValue=${header.fillValueNumber}, ${header.compressorImpl}, byteOrder=${header.byteOrder}, vault=${vaultPath.summary}}" } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetHeader.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetHeader.scala index 6907d49ecae..3935b42ea6b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetHeader.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetHeader.scala @@ -11,7 +11,7 @@ import java.nio.ByteOrder trait DatasetHeader { // Note that in DatasetArray, datasetShape and chunkShape are adapted for 2d datasets - def datasetShape: Option[Array[Int]] // shape of the entire array + def datasetShape: Option[Array[Long]] // shape of the entire array def chunkShape: Array[Int] // shape of each chunk, def dimension_separator: DimensionSeparator @@ -44,9 +44,13 @@ trait DatasetHeader { None else { if (axisOrder.hasZAxis) { - Some(BoundingBox(Vec3Int.zeros, shape(axisOrder.x), shape(axisOrder.y), shape(axisOrder.zWithFallback))) + Some( + BoundingBox(Vec3Int.zeros, + shape(axisOrder.x).toInt, + shape(axisOrder.y).toInt, + shape(axisOrder.zWithFallback).toInt)) } else { - Some(BoundingBox(Vec3Int.zeros, shape(axisOrder.x), shape(axisOrder.y), 1)) + Some(BoundingBox(Vec3Int.zeros, shape(axisOrder.x).toInt, shape(axisOrder.y).toInt, 1)) } } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/n5/N5Header.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/n5/N5Header.scala index 7cc5542b940..8e178636971 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/n5/N5Header.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/n5/N5Header.scala @@ -16,7 +16,7 @@ object N5BlockHeader { } case class N5Header( - dimensions: Array[Int], // shape of the entire array + dimensions: Array[Long], // shape of the entire array blockSize: Array[Int], // shape of each chunk compression: Option[Map[String, CompressionSetting]] = None, // specifies compressor to use, with parameters dataType: String, @@ -25,7 +25,7 @@ case class N5Header( val fill_value: Either[String, Number] = Right(0) val order: ArrayOrder = ArrayOrder.F - override lazy val datasetShape: Option[Array[Int]] = Some(dimensions) + override lazy val datasetShape: Option[Array[Long]] = Some(dimensions) lazy val chunkShape: Array[Int] = blockSize diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedHeader.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedHeader.scala index 430547d5ea0..ac05bd26556 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedHeader.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/precomputed/PrecomputedHeader.scala @@ -28,7 +28,7 @@ case class PrecomputedHeader(`type`: String, } case class PrecomputedScale(key: String, - size: Array[Int], + size: Array[Long], resolution: Array[Double], chunk_sizes: Array[Array[Int]], encoding: String, @@ -45,7 +45,7 @@ case class PrecomputedScale(key: String, case class PrecomputedScaleHeader(precomputedScale: PrecomputedScale, precomputedHeader: PrecomputedHeader) extends DatasetHeader { - override def datasetShape: Option[Array[Int]] = Some(precomputedScale.size) + override def datasetShape: Option[Array[Long]] = Some(precomputedScale.size) override def chunkShape: Array[Int] = precomputedScale.chunk_sizes.head @@ -72,7 +72,7 @@ case class PrecomputedScaleHeader(precomputedScale: PrecomputedScale, precompute val (chunkIndexAtDim, dim) = chunkIndexWithDim val beginOffset = voxelOffset(dim) + chunkIndexAtDim * precomputedScale.primaryChunkShape(dim) val endOffset = voxelOffset(dim) + ((chunkIndexAtDim + 1) * precomputedScale.primaryChunkShape(dim)) - .min(precomputedScale.size(dim)) + .min(precomputedScale.size(dim).toInt) (beginOffset, endOffset) }) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/wkw/WKWArray.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/wkw/WKWArray.scala index f5e7232f9f1..31242525413 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/wkw/WKWArray.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/wkw/WKWArray.scala @@ -129,7 +129,7 @@ class WKWArray(vaultPath: VaultPath, private def chunkIndexToShardIndex(chunkIndex: Array[Int]) = ChunkUtils.computeChunkIndices( - header.datasetShape.map(fullAxisOrder.permuteIndicesArrayToWk), + header.datasetShape.map(fullAxisOrder.permuteIndicesArrayToWkLong), fullAxisOrder.permuteIndicesArrayToWk(header.shardShape), header.chunkShape, chunkIndex.zip(header.chunkShape).map { case (i, s) => i * s } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr/ZarrHeader.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr/ZarrHeader.scala index 66fe7090deb..6aaf01e431e 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr/ZarrHeader.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr/ZarrHeader.scala @@ -21,7 +21,7 @@ import play.api.libs.json._ case class ZarrHeader( zarr_format: Int, // format version number - shape: Array[Int], // shape of the entire array + shape: Array[Long], // shape of the entire array chunks: Array[Int], // shape of each chunk compressor: Option[Map[String, CompressionSetting]] = None, // specifies compressor to use, with parameters filters: Option[List[Map[String, String]]] = None, // specifies filters to use, with parameters @@ -31,7 +31,7 @@ case class ZarrHeader( override val order: ArrayOrder ) extends DatasetHeader { - override lazy val datasetShape: Option[Array[Int]] = Some(shape) + override lazy val datasetShape: Option[Array[Long]] = Some(shape) override lazy val chunkShape: Array[Int] = chunks override lazy val byteOrder: ByteOrder = @@ -77,7 +77,7 @@ object ZarrHeader extends JsonImplicits { val chunks = Array(channels) ++ additionalAxesChunksEntries ++ Array(cubeLength, cubeLength, cubeLength) ZarrHeader(zarr_format = 2, - shape = shape, + shape = shape.map(_.toLong), chunks = chunks, compressor = compressor, dtype = dtype, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3ArrayHeader.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3ArrayHeader.scala index 262b462abf0..adee8ddcd2c 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3ArrayHeader.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3ArrayHeader.scala @@ -25,7 +25,7 @@ import java.nio.ByteOrder case class Zarr3ArrayHeader( zarr_format: Int, // must be 3 node_type: String, // must be "array" - shape: Array[Int], + shape: Array[Long], data_type: Either[String, ExtensionDataType], chunk_grid: Either[ChunkGridSpecification, ExtensionChunkGridSpecification], chunk_key_encoding: ChunkKeyEncoding, @@ -36,7 +36,7 @@ case class Zarr3ArrayHeader( dimension_names: Option[Array[String]] ) extends DatasetHeader { - override def datasetShape: Option[Array[Int]] = Some(shape) + override def datasetShape: Option[Array[Long]] = Some(shape) override def chunkShape: Array[Int] = getChunkSize @@ -168,7 +168,7 @@ object Zarr3ArrayHeader extends JsonImplicits { for { zarr_format <- (json \ "zarr_format").validate[Int] node_type <- (json \ "node_type").validate[String] - shape <- (json \ "shape").validate[Array[Int]] + shape <- (json \ "shape").validate[Array[Long]] data_type <- (json \ "data_type").validate[String] chunk_grid <- (json \ "chunk_grid").validate[ChunkGridSpecification] chunk_key_encoding <- (json \ "chunk_key_encoding").validate[ChunkKeyEncoding] @@ -271,7 +271,7 @@ object Zarr3ArrayHeader extends JsonImplicits { zarr_format = 3, node_type = "array", // channel, additional axes, XYZ - shape = Array(1) ++ additionalAxes.map(_.highestValue).toArray ++ xyzBBounds, + shape = (Array(1) ++ additionalAxes.map(_.highestValue).toArray ++ xyzBBounds).map(_.toLong), data_type = Left(dataLayer.elementClass.toString), chunk_grid = Left( ChunkGridSpecification( diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffExplorationUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffExplorationUtils.scala index c62c1ddd5b2..32a67a0d798 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffExplorationUtils.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffExplorationUtils.scala @@ -182,7 +182,7 @@ trait NgffExplorationUtils extends FoxImplicits { Vec3Double(xFactors.product, yFactors.product, zFactors.product) } - protected def getShape(dataset: NgffDataset, path: VaultPath)(implicit tc: TokenContext): Fox[Array[Int]] + protected def getShape(dataset: NgffDataset, path: VaultPath)(implicit tc: TokenContext): Fox[Array[Long]] protected def createAdditionalAxis(name: String, index: Int, bounds: Array[Int]): Box[AdditionalAxis] = for { @@ -203,7 +203,7 @@ trait NgffExplorationUtils extends FoxImplicits { .filter(axis => !defaultAxes.contains(axis.name)) .zipWithIndex .map(axisAndIndex => - createAdditionalAxis(axisAndIndex._1.name, axisAndIndex._2, Array(0, shape(axisAndIndex._2))).toFox)) + createAdditionalAxis(axisAndIndex._1.name, axisAndIndex._2, Array(0, shape(axisAndIndex._2).toInt)).toFox)) duplicateNames = axes.map(_.name).diff(axes.map(_.name).distinct).distinct _ <- Fox.fromBool(duplicateNames.isEmpty) ?~> s"Additional axes names (${duplicateNames.mkString("", ", ", "")}) are not unique." } yield axes @@ -220,7 +220,7 @@ trait NgffExplorationUtils extends FoxImplicits { case Some(channeAxislIndex) => shape(channeAxislIndex) case _ => 1 } - } yield channelCount + } yield channelCount.toInt protected def createLayer(remotePath: VaultPath, credentialId: Option[String], diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffV0_4Explorer.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffV0_4Explorer.scala index 7f6991e1ae3..9b40b90427f 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffV0_4Explorer.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffV0_4Explorer.scala @@ -96,7 +96,7 @@ class NgffV0_4Explorer(implicit val ec: ExecutionContext) parsedHeader <- zarrayPath.parseAsJson[ZarrHeader] ?~> s"failed to read zarr header at $zarrayPath" header = parsedHeader.shape.length match { case 2 => - parsedHeader.copy(shape = parsedHeader.shape ++ Array(1), chunks = parsedHeader.chunks ++ Array(1)) + parsedHeader.copy(shape = parsedHeader.shape ++ Array(1L), chunks = parsedHeader.chunks ++ Array(1)) case _ => parsedHeader } } yield header @@ -125,7 +125,7 @@ class NgffV0_4Explorer(implicit val ec: ExecutionContext) elementClass, boundingBox) - protected def getShape(dataset: NgffDataset, path: VaultPath)(implicit tc: TokenContext): Fox[Array[Int]] = + protected def getShape(dataset: NgffDataset, path: VaultPath)(implicit tc: TokenContext): Fox[Array[Long]] = for { zarrHeader <- getZarrHeader(dataset, path) shape = zarrHeader.shape diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffV0_5Explorer.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffV0_5Explorer.scala index 3b67e6902bb..6ec2421e76a 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffV0_5Explorer.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffV0_5Explorer.scala @@ -122,7 +122,7 @@ class NgffV0_5Explorer(implicit val ec: ExecutionContext) elementClass, boundingBox) - protected def getShape(dataset: NgffDataset, path: VaultPath)(implicit tc: TokenContext): Fox[Array[Int]] = + protected def getShape(dataset: NgffDataset, path: VaultPath)(implicit tc: TokenContext): Fox[Array[Long]] = for { zarrHeader <- getZarrHeader(dataset, path) shape = zarrHeader.shape diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala index 02f15f564a7..7077ccc5e3f 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala @@ -36,7 +36,7 @@ class PrecomputedExplorer(implicit val ec: ExecutionContext) extends RemoteLayer name <- Fox.successful(guessNameFromPath(remotePath)) firstScale <- precomputedHeader.scales.headOption.toFox boundingBox <- BoundingBox - .fromTopLeftAndSize(firstScale.voxel_offset.getOrElse(Array(0, 0, 0)), firstScale.size) + .fromTopLeftAndSize(firstScale.voxel_offset.getOrElse(Array(0, 0, 0)), firstScale.size.map(_.toInt)) .toFox elementClass: ElementClass.Value <- elementClassFromPrecomputedDataType(precomputedHeader.data_type).toFox ?~> s"Unknown data type ${precomputedHeader.data_type}" smallestResolution = firstScale.resolution diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 7fed7a1a999..43cb8e85930 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -38,6 +38,7 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) private val agglomerateDir = "agglomerates" + // TODO clear on dataset reload private lazy val openArraysCache = AlfuCache[String, DatasetArray]() // TODO unify with existing chunkContentsCache from binaryDataService? @@ -46,7 +47,7 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService val maxSizeKiloBytes = Math.floor(config.Datastore.Cache.ImageArrayChunks.maxSizeBytes.toDouble / 1000.0).toInt - def cacheWeight(_key: String, arrayBox: Box[MultiArray]): Int = + def cacheWeight(key: String, arrayBox: Box[MultiArray]): Int = arrayBox match { case Full(array) => (array.getSizeBytes / 1000L).toInt @@ -225,6 +226,15 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService } } yield skeleton + + def largestAgglomerateId(agglomerateFileKey: AgglomerateFileKey)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Long] = + for { + array <- openZarrArrayCached("agglomerate_to_segments_offsets") + shape <- array.datasetShape.toFox ?~> "Could not determine array shape" + shapeFirstElement <- tryo(shape(0)).toFox + } yield shapeFirstElement + } class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConverter with LazyLogging { diff --git a/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/controllers/VolumeTracingZarrStreamingController.scala b/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/controllers/VolumeTracingZarrStreamingController.scala index 8330a386eb3..9c24db0cb66 100644 --- a/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/controllers/VolumeTracingZarrStreamingController.scala +++ b/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/controllers/VolumeTracingZarrStreamingController.scala @@ -159,7 +159,7 @@ class VolumeTracingZarrStreamingController @Inject()( chunks = Array(channels, cubeLength, cubeLength, cubeLength) zarrHeader = ZarrHeader(zarr_format = 2, - shape = shape, + shape = shape.map(_.toLong), chunks = chunks, compressor = compressor, dtype = dtype, @@ -188,11 +188,11 @@ class VolumeTracingZarrStreamingController @Inject()( zarr_format = 3, node_type = "array", // channel, additional axes, XYZ - shape = Array(1) ++ additionalAxes.map(_.highestValue).toArray ++ Array( + shape = (Array(1) ++ additionalAxes.map(_.highestValue).toArray ++ Array( (tracing.boundingBox.width + tracing.boundingBox.topLeft.x) / magParsed.x, (tracing.boundingBox.height + tracing.boundingBox.topLeft.y) / magParsed.y, (tracing.boundingBox.depth + tracing.boundingBox.topLeft.z) / magParsed.z - ), + )).map(_.toLong), data_type = Left(tracing.elementClass.toString), chunk_grid = Left( ChunkGridSpecification( From 4ed54836428fbceffc4f01d813ad2807f940a7fb Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 27 May 2025 14:27:07 +0200 Subject: [PATCH 012/100] remove unused agglomeratesForAllSegments --- .../controllers/DataSourceController.scala | 24 ------------------- .../services/AgglomerateService.scala | 9 ------- ....scalableminds.webknossos.datastore.routes | 1 - 3 files changed, 34 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index c7bcdb229de..b49ebb2bd72 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -374,30 +374,6 @@ class DataSourceController @Inject()( } } - def agglomerateIdsForAllSegmentIds( - organizationId: String, - datasetDirectoryName: String, - dataLayerName: String, - mappingName: String - ): Action[ListOfLong] = Action.async(validateProto[ListOfLong]) { implicit request => - accessTokenService.validateAccessFromTokenContext( - UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { - for { - agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - agglomerateIds: Array[Long] <- agglomerateService - .agglomerateIdsForAllSegmentIds( - AgglomerateFileKey( - organizationId, - datasetDirectoryName, - dataLayerName, - mappingName - ) - ) - .toFox - } yield Ok(Json.toJson(agglomerateIds)) - } - } - def update(organizationId: String, datasetDirectoryName: String): Action[DataSource] = Action.async(validateJson[DataSource]) { implicit request => accessTokenService.validateAccessFromTokenContext( diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 43cb8e85930..88d497c7fb3 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -509,15 +509,6 @@ class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateServi } - def agglomerateIdsForAllSegmentIds(agglomerateFileKey: AgglomerateFileKey): Box[Array[Long]] = { - val file = agglomerateFileKey.path(dataBaseDir, agglomerateDir, agglomerateFileExtension).toFile - tryo { - val reader = HDF5FactoryProvider.get.openForReading(file) - val agglomerateIds: Array[Long] = reader.uint64().readArray("/segment_to_agglomerate") - agglomerateIds - } - } - def positionForSegmentId(agglomerateFileKey: AgglomerateFileKey, segmentId: Long): Box[Vec3Int] = { val hdfFile = agglomerateFileKey.path(dataBaseDir, agglomerateDir, agglomerateFileExtension).toFile val reader: IHDF5Reader = HDF5FactoryProvider.get.openForReading(hdfFile) diff --git a/webknossos-datastore/conf/com.scalableminds.webknossos.datastore.routes b/webknossos-datastore/conf/com.scalableminds.webknossos.datastore.routes index ea1aaa70266..bcaf26dc3f6 100644 --- a/webknossos-datastore/conf/com.scalableminds.webknossos.datastore.routes +++ b/webknossos-datastore/conf/com.scalableminds.webknossos.datastore.routes @@ -76,7 +76,6 @@ GET /datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerN GET /datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/agglomerates/:mappingName/agglomerateGraph/:agglomerateId @com.scalableminds.webknossos.datastore.controllers.DataSourceController.agglomerateGraph(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mappingName: String, agglomerateId: Long) GET /datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/agglomerates/:mappingName/largestAgglomerateId @com.scalableminds.webknossos.datastore.controllers.DataSourceController.largestAgglomerateId(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mappingName: String) POST /datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/agglomerates/:mappingName/agglomeratesForSegments @com.scalableminds.webknossos.datastore.controllers.DataSourceController.agglomerateIdsForSegmentIds(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mappingName: String) -GET /datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/agglomerates/:mappingName/agglomeratesForAllSegments @com.scalableminds.webknossos.datastore.controllers.DataSourceController.agglomerateIdsForAllSegmentIds(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mappingName: String) GET /datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/agglomerates/:mappingName/positionForSegment @com.scalableminds.webknossos.datastore.controllers.DataSourceController.positionForSegmentViaAgglomerateFile(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mappingName: String, segmentId: Long) # Mesh files From 291aab5ed133fd15866855d6582501a6e2fda5c8 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 27 May 2025 16:14:12 +0200 Subject: [PATCH 013/100] add shortcut for shape.product==0; implement segmentIdsForAgglomerateId --- .../controllers/DSMeshController.scala | 2 +- .../controllers/DataSourceController.scala | 24 +- .../datastore/datareaders/DatasetArray.scala | 49 ++-- .../datareaders/MultiArrayUtils.scala | 4 + .../services/AgglomerateService.scala | 267 +++++++++++------- .../services/SegmentIndexFileService.scala | 33 ++- .../services/mesh/MeshFileService.scala | 15 +- .../services/mesh/MeshMappingHelper.scala | 42 +-- ...uroglancerPrecomputedMeshFileService.scala | 2 +- 9 files changed, 252 insertions(+), 186 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala index 9bc8d02096a..9d838421d4e 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala @@ -66,7 +66,7 @@ class DSMeshController @Inject()( datasetDirectoryName, dataLayerName, request.body.meshFile.name) - segmentIds: List[Long] <- segmentIdsForAgglomerateIdIfNeeded( + segmentIds: Seq[Long] <- segmentIdsForAgglomerateIdIfNeeded( organizationId, datasetDirectoryName, dataLayerName, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index b49ebb2bd72..06508d7c469 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -297,11 +297,9 @@ class DataSourceController @Inject()( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - agglomerateGraph <- agglomerateService - .generateAgglomerateGraph( - AgglomerateFileKey(organizationId, datasetDirectoryName, dataLayerName, mappingName), - agglomerateId) - .toFox ?~> "agglomerateGraph.failed" + agglomerateGraph <- agglomerateService.generateAgglomerateGraph( + AgglomerateFileKey(organizationId, datasetDirectoryName, dataLayerName, mappingName), + agglomerateId) ?~> "agglomerateGraph.failed" } yield Ok(agglomerateGraph.toByteArray).as(protobufMimeType) } } @@ -335,16 +333,14 @@ class DataSourceController @Inject()( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - largestAgglomerateId: Long <- agglomerateService - .largestAgglomerateId( - AgglomerateFileKey( - organizationId, - datasetDirectoryName, - dataLayerName, - mappingName - ) + largestAgglomerateId: Long <- agglomerateService.largestAgglomerateId( + AgglomerateFileKey( + organizationId, + datasetDirectoryName, + dataLayerName, + mappingName ) - .toFox + ) } yield Ok(Json.toJson(largestAgglomerateId)) } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala index c4b5309caab..dfa848800f4 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala @@ -190,32 +190,35 @@ class DatasetArray(vaultPath: VaultPath, } def readAsMultiArray(shape: Array[Int], offset: Array[Long])(implicit ec: ExecutionContext, - tc: TokenContext): Fox[MultiArray] = { - val totalOffset: Array[Long] = offset.zip(header.voxelOffset).map { case (o, v) => o - v }.padTo(offset.length, 0) - val chunkIndices = ChunkUtils.computeChunkIndices(datasetShape, chunkShape, shape, totalOffset) - if (partialCopyingIsNotNeededForMultiArray(shape, totalOffset, chunkIndices)) { - for { - chunkIndex <- chunkIndices.headOption.toFox - sourceChunk: MultiArray <- getSourceChunkDataWithCache(chunkIndex, useSkipTypingShortcut = true) - } yield sourceChunk + tc: TokenContext): Fox[MultiArray] = + if (shape.product == 0) { + Fox.successful(MultiArrayUtils.createEmpty(rank)) } else { - val targetBuffer = MultiArrayUtils.createDataBuffer(header.resolvedDataType, shape) - val targetMultiArray = MultiArrayUtils.createArrayWithGivenStorage(targetBuffer, shape) - val copiedFuture = Fox.combined(chunkIndices.map { chunkIndex: Array[Int] => + val totalOffset: Array[Long] = offset.zip(header.voxelOffset).map { case (o, v) => o - v }.padTo(offset.length, 0) + val chunkIndices = ChunkUtils.computeChunkIndices(datasetShape, chunkShape, shape, totalOffset) + if (partialCopyingIsNotNeededForMultiArray(shape, totalOffset, chunkIndices)) { for { - sourceChunk: MultiArray <- getSourceChunkDataWithCache(chunkIndex) - offsetInChunk = computeOffsetInChunkIgnoringAxisOrder(chunkIndex, totalOffset) - _ <- tryo(MultiArrayUtils.copyRange(offsetInChunk, sourceChunk, targetMultiArray)).toFox ?~> formatCopyRangeErrorWithoutAxisOrder( - offsetInChunk, - sourceChunk, - targetMultiArray) - } yield () - }) - for { - _ <- copiedFuture - } yield targetMultiArray + chunkIndex <- chunkIndices.headOption.toFox + sourceChunk: MultiArray <- getSourceChunkDataWithCache(chunkIndex, useSkipTypingShortcut = true) + } yield sourceChunk + } else { + val targetBuffer = MultiArrayUtils.createDataBuffer(header.resolvedDataType, shape) + val targetMultiArray = MultiArrayUtils.createArrayWithGivenStorage(targetBuffer, shape) + val copiedFuture = Fox.combined(chunkIndices.map { chunkIndex: Array[Int] => + for { + sourceChunk: MultiArray <- getSourceChunkDataWithCache(chunkIndex) + offsetInChunk = computeOffsetInChunkIgnoringAxisOrder(chunkIndex, totalOffset) + _ <- tryo(MultiArrayUtils.copyRange(offsetInChunk, sourceChunk, targetMultiArray)).toFox ?~> formatCopyRangeErrorWithoutAxisOrder( + offsetInChunk, + sourceChunk, + targetMultiArray) + } yield () + }) + for { + _ <- copiedFuture + } yield targetMultiArray + } } - } private def formatCopyRangeError(offsetInChunk: Array[Int], sourceChunk: MultiArray, target: MultiArray): String = s"Copying data from dataset chunk failed. Chunk shape (F): ${printAsOuterF(sourceChunk.getShape)}, target shape (F): ${printAsOuterF( diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala index f1af69b890c..a9a2160b7a6 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala @@ -45,6 +45,10 @@ object MultiArrayUtils extends LazyLogging { } } + def createEmpty(rank: Int): MultiArray = { + MultiArray.factory(MADataType.FLOAT, Array.fill(rank)(0)) + } + /** * Offset describes the displacement between source and target array.
*
diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 88d497c7fb3..88a76e099f0 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -150,37 +150,24 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[SkeletonTracing] = for { before <- Instant.nowFox - agglomerate_to_segments_offsets <- openZarrArrayCached("agglomerate_to_segments_offsets") - agglomerate_to_edges_offsets <- openZarrArrayCached("agglomerate_to_edges_offsets") + agglomerateToSegmentsOffsets <- openZarrArrayCached("agglomerate_to_segments_offsets") + agglomerateToEdgesOffsets <- openZarrArrayCached("agglomerate_to_edges_offsets") - positionsRange: MultiArray <- agglomerate_to_segments_offsets.readAsMultiArray(shape = Array(2), - offset = Array(agglomerateId)) - edgesRange: MultiArray <- agglomerate_to_edges_offsets.readAsMultiArray(shape = Array(2), - offset = Array(agglomerateId)) + positionsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(shape = Array(2), + offset = Array(agglomerateId)) + edgesRange: MultiArray <- agglomerateToEdgesOffsets.readAsMultiArray(shape = Array(2), + offset = Array(agglomerateId)) nodeCount = positionsRange.getLong(1) - positionsRange.getLong(0) edgeCount = edgesRange.getLong(1) - edgesRange.getLong(0) edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges _ <- Fox.fromBool(nodeCount <= edgeLimit) ?~> s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)" _ <- Fox.fromBool(edgeCount <= edgeLimit) ?~> s"Agglomerate has too many edges ($edgeCount > $edgeLimit)" - positions: MultiArray <- if (nodeCount == 0L) { - Fox.successful(MultiArray.factory(DataType.LONG, Array(0, 0))) - } else { - for { - agglomerate_to_positions <- openZarrArrayCached("agglomerate_to_positions") - positions <- agglomerate_to_positions.readAsMultiArray(offset = Array(positionsRange.getLong(0), 0), - shape = Array(nodeCount.toInt, 3)) - } yield positions - } - edges: MultiArray <- if (edgeCount == 0L) { - Fox.successful(MultiArray.factory(DataType.LONG, Array(0, 0))) - } else { - for { - agglomerate_to_edges <- openZarrArrayCached("agglomerate_to_edges") - edges <- agglomerate_to_edges.readAsMultiArray(offset = Array(edgesRange.getLong(0), 0), - shape = Array(edgeCount.toInt, 2)) - } yield edges - } - + agglomerateToPositions <- openZarrArrayCached("agglomerate_to_positions") + positions <- agglomerateToPositions.readAsMultiArray(offset = Array(positionsRange.getLong(0), 0), + shape = Array(nodeCount.toInt, 3)) + agglomerateToEdges <- openZarrArrayCached("agglomerate_to_edges") + edges: MultiArray <- agglomerateToEdges.readAsMultiArray(offset = Array(edgesRange.getLong(0), 0), + shape = Array(edgeCount.toInt, 2)) nodeIdStartAtOneOffset = 1 // TODO use multiarray index iterators? @@ -235,6 +222,69 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService shapeFirstElement <- tryo(shape(0)).toFox } yield shapeFirstElement + def generateAgglomerateGraph(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[AgglomerateGraph] = + for { + agglomerateToSegmentsOffsets <- openZarrArrayCached("agglomerate_to_segments_offsets") + agglomerateToEdgesOffsets <- openZarrArrayCached("agglomerate_to_edges_offsets") + + positionsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(shape = Array(2), + offset = Array(agglomerateId)) + edgesRange: MultiArray <- agglomerateToEdgesOffsets.readAsMultiArray(shape = Array(2), + offset = Array(agglomerateId)) + nodeCount = positionsRange.getLong(1) - positionsRange.getLong(0) + edgeCount = edgesRange.getLong(1) - edgesRange.getLong(0) + edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges + _ <- Fox.fromBool(nodeCount <= edgeLimit) ?~> s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)" + _ <- Fox.fromBool(edgeCount <= edgeLimit) ?~> s"Agglomerate has too many edges ($edgeCount > $edgeLimit)" + agglomerateToPositions <- openZarrArrayCached("agglomerate_to_positions") + positions: MultiArray <- agglomerateToPositions.readAsMultiArray(offset = Array(positionsRange.getLong(0), 0), + shape = Array(nodeCount.toInt, 3)) + agglomerateToSegments <- openZarrArrayCached("agglomerate_to_segments") + segmentIds: MultiArray <- agglomerateToSegments.readAsMultiArray(offset = Array(positionsRange.getInt(0)), + shape = Array(nodeCount.toInt)) + agglomerateToEdges <- openZarrArrayCached("agglomerate_to_edges") + edges: MultiArray <- agglomerateToEdges.readAsMultiArray(offset = Array(edgesRange.getLong(0), 0), + shape = Array(edgeCount.toInt, 2)) + agglomerateToAffinities <- openZarrArray("agglomerate_to_affinities") + affinities: MultiArray <- agglomerateToAffinities.readAsMultiArray(offset = Array(edgesRange.getLong(0)), + shape = Array(edgeCount.toInt)) + + agglomerateGraph = AgglomerateGraph( + // unsafeWrapArray is fine, because the underlying arrays are never mutated + segments = ArraySeq.unsafeWrapArray(segmentIds.getStorage.asInstanceOf[Array[Long]]), + edges = (0 until edges.getShape()(0)).map { edgeIdx: Int => + AgglomerateEdge( + source = segmentIds.getLong(edges.getInt(edges.getIndex.set(Array(edgeIdx, 0)))), + target = segmentIds.getLong(edges.getInt(edges.getIndex.set(Array(edgeIdx, 1)))) + ) + }, + positions = (0 until nodeCount.toInt).map { nodeIdx: Int => + Vec3IntProto( + positions.getInt(positions.getIndex.set(Array(nodeIdx, 0))), + positions.getInt(positions.getIndex.set(Array(nodeIdx, 1))), + positions.getInt(positions.getIndex.set(Array(nodeIdx, 2))) + ) + }, + affinities = ArraySeq.unsafeWrapArray(affinities.getStorage.asInstanceOf[Array[Float]]) + ) + } yield agglomerateGraph + + def segmentIdsForAgglomerateId(agglomerateFileKey: AgglomerateFileKey, + agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = + for { + agglomerateToSegmentsOffsets <- openZarrArrayCached("agglomerate_to_segments_offsets") + agglomerateToSegments <- openZarrArrayCached("agglomerate_to_segments") + segmentRange <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = Array(agglomerateId), shape = Array(2)) + segmentCount = segmentRange.getLong(1) - segmentRange.getLong(0) + segmentIds <- if (segmentCount == 0) + Fox.successful(MultiArray.factory(DataType.LONG, Array(0, 0))) + else + agglomerateToSegments.readAsMultiArray(offset = Array(segmentRange.getLong(0)), + shape = Array(segmentCount.toInt)) + } yield segmentIds.getStorage.asInstanceOf[Array[Long]].toSeq + } class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConverter with LazyLogging { @@ -251,6 +301,9 @@ class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateServi private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) private val cumsumFileName = "cumsum.json" + // TODO remove + private val useZarr = true + lazy val agglomerateFileCache = new AgglomerateFileCache(config.Datastore.Cache.AgglomerateFile.maxFileHandleEntries) def exploreAgglomerates(organizationId: String, datasetDirectoryName: String, dataLayerName: String): Set[String] = { @@ -269,7 +322,7 @@ class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateServi def applyAgglomerate(request: DataServiceDataRequest)(data: Array[Byte])(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Byte]] = - if (true) { + if (useZarr) { zarrAgglomerateService.applyAgglomerate(request)(data) } else applyAgglomerateHdf5(request)(data).toFox @@ -368,7 +421,7 @@ class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateServi dataLayerName: String, mappingName: String, agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[SkeletonTracing] = - if (true) { + if (useZarr) { zarrAgglomerateService.generateSkeleton(organizationId, datasetDirectoryName, dataLayerName, @@ -460,39 +513,45 @@ class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateServi }).toFox } - def largestAgglomerateId(agglomerateFileKey: AgglomerateFileKey): Box[Long] = { - val hdfFile = agglomerateFileKey.path(dataBaseDir, agglomerateDir, agglomerateFileExtension).toFile - - tryo { - val reader = HDF5FactoryProvider.get.openForReading(hdfFile) - reader.`object`().getNumberOfElements("/agglomerate_to_segments_offsets") - 1L + def largestAgglomerateId(agglomerateFileKey: AgglomerateFileKey)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Long] = + if (useZarr) zarrAgglomerateService.largestAgglomerateId(agglomerateFileKey) + else { + val hdfFile = agglomerateFileKey.path(dataBaseDir, agglomerateDir, agglomerateFileExtension).toFile + tryo { + val reader = HDF5FactoryProvider.get.openForReading(hdfFile) + reader.`object`().getNumberOfElements("/agglomerate_to_segments_offsets") - 1L + }.toFox } - } - def segmentIdsForAgglomerateId(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long): Box[List[Long]] = { - val hdfFile = - dataBaseDir - .resolve(agglomerateFileKey.organizationId) - .resolve(agglomerateFileKey.datasetDirectoryName) - .resolve(agglomerateFileKey.layerName) - .resolve(agglomerateDir) - .resolve(s"${agglomerateFileKey.mappingName}.$agglomerateFileExtension") - .toFile + def segmentIdsForAgglomerateId(agglomerateFileKey: AgglomerateFileKey, + agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = + if (useZarr) + zarrAgglomerateService.segmentIdsForAgglomerateId(agglomerateFileKey, agglomerateId) + else { + val hdfFile = + dataBaseDir + .resolve(agglomerateFileKey.organizationId) + .resolve(agglomerateFileKey.datasetDirectoryName) + .resolve(agglomerateFileKey.layerName) + .resolve(agglomerateDir) + .resolve(s"${agglomerateFileKey.mappingName}.$agglomerateFileExtension") + .toFile + + tryo { + val reader = HDF5FactoryProvider.get.openForReading(hdfFile) + val positionsRange: Array[Long] = + reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId) - tryo { - val reader = HDF5FactoryProvider.get.openForReading(hdfFile) - val positionsRange: Array[Long] = - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId) - - val segmentCount = positionsRange(1) - positionsRange(0) - val segmentIds: Array[Long] = - if (segmentCount == 0) Array.empty[Long] - else { - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments", segmentCount.toInt, positionsRange(0)) - } - segmentIds.toList + val segmentCount = positionsRange(1) - positionsRange(0) + val segmentIds: Array[Long] = + if (segmentCount == 0) Array.empty[Long] + else { + reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments", segmentCount.toInt, positionsRange(0)) + } + segmentIds.toSeq + }.toFox } - } def agglomerateIdsForSegmentIds(agglomerateFileKey: AgglomerateFileKey, segmentIds: Seq[Long]): Box[Seq[Long]] = { val cachedAgglomerateFile = agglomerateFileCache.withCache(agglomerateFileKey)(initHDFReader) @@ -537,54 +596,60 @@ class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateServi else binarySearchForSegment(rangeStart, middle - 1L, segmentId, reader) } - def generateAgglomerateGraph(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long): Box[AgglomerateGraph] = - tryo { - val hdfFile = agglomerateFileKey.path(dataBaseDir, agglomerateDir, agglomerateFileExtension).toFile + def generateAgglomerateGraph(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[AgglomerateGraph] = + if (useZarr) + zarrAgglomerateService.generateAgglomerateGraph(agglomerateFileKey, agglomerateId) + else { + tryo { + val hdfFile = agglomerateFileKey.path(dataBaseDir, agglomerateDir, agglomerateFileExtension).toFile - val reader = HDF5FactoryProvider.get.openForReading(hdfFile) + val reader = HDF5FactoryProvider.get.openForReading(hdfFile) - val positionsRange: Array[Long] = - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId) - val edgesRange: Array[Long] = - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_edges_offsets", 2, agglomerateId) + val positionsRange: Array[Long] = + reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId) + val edgesRange: Array[Long] = + reader.uint64().readArrayBlockWithOffset("/agglomerate_to_edges_offsets", 2, agglomerateId) - val nodeCount = positionsRange(1) - positionsRange(0) - val edgeCount = edgesRange(1) - edgesRange(0) - val edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges - if (nodeCount > edgeLimit) { - throw new Exception(s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)") - } - if (edgeCount > edgeLimit) { - throw new Exception(s"Agglomerate has too many edges ($edgeCount > $edgeLimit)") - } - val segmentIds: Array[Long] = - if (nodeCount == 0L) Array[Long]() - else - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments", nodeCount.toInt, positionsRange(0)) - val positions: Array[Array[Long]] = - if (nodeCount == 0L) Array[Array[Long]]() - else - reader - .uint64() - .readMatrixBlockWithOffset("/agglomerate_to_positions", nodeCount.toInt, 3, positionsRange(0), 0) - val edges: Array[Array[Long]] = - if (edgeCount == 0L) Array[Array[Long]]() - else - reader.uint64().readMatrixBlockWithOffset("/agglomerate_to_edges", edgeCount.toInt, 2, edgesRange(0), 0) - val affinities: Array[Float] = - if (edgeCount == 0L) Array[Float]() - else - reader.float32().readArrayBlockWithOffset("/agglomerate_to_affinities", edgeCount.toInt, edgesRange(0)) - - AgglomerateGraph( - // unsafeWrapArray is fine, because the underlying arrays are never mutated - segments = ArraySeq.unsafeWrapArray(segmentIds), - edges = ArraySeq.unsafeWrapArray( - edges.map(e => AgglomerateEdge(source = segmentIds(e(0).toInt), target = segmentIds(e(1).toInt)))), - positions = - ArraySeq.unsafeWrapArray(positions.map(pos => Vec3IntProto(pos(0).toInt, pos(1).toInt, pos(2).toInt))), - affinities = ArraySeq.unsafeWrapArray(affinities) - ) + val nodeCount = positionsRange(1) - positionsRange(0) + val edgeCount = edgesRange(1) - edgesRange(0) + val edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges + if (nodeCount > edgeLimit) { + throw new Exception(s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)") + } + if (edgeCount > edgeLimit) { + throw new Exception(s"Agglomerate has too many edges ($edgeCount > $edgeLimit)") + } + val segmentIds: Array[Long] = + if (nodeCount == 0L) Array[Long]() + else + reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments", nodeCount.toInt, positionsRange(0)) + val positions: Array[Array[Long]] = + if (nodeCount == 0L) Array[Array[Long]]() + else + reader + .uint64() + .readMatrixBlockWithOffset("/agglomerate_to_positions", nodeCount.toInt, 3, positionsRange(0), 0) + val edges: Array[Array[Long]] = + if (edgeCount == 0L) Array[Array[Long]]() + else + reader.uint64().readMatrixBlockWithOffset("/agglomerate_to_edges", edgeCount.toInt, 2, edgesRange(0), 0) + val affinities: Array[Float] = + if (edgeCount == 0L) Array[Float]() + else + reader.float32().readArrayBlockWithOffset("/agglomerate_to_affinities", edgeCount.toInt, edgesRange(0)) + + AgglomerateGraph( + // unsafeWrapArray is fine, because the underlying arrays are never mutated + segments = ArraySeq.unsafeWrapArray(segmentIds), + edges = ArraySeq.unsafeWrapArray( + edges.map(e => AgglomerateEdge(source = segmentIds(e(0).toInt), target = segmentIds(e(1).toInt)))), + positions = + ArraySeq.unsafeWrapArray(positions.map(pos => Vec3IntProto(pos(0).toInt, pos(1).toInt, pos(2).toInt))), + affinities = ArraySeq.unsafeWrapArray(affinities) + ) + }.toFox } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala index 541b5a040d5..108d6b4c9ce 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala @@ -185,11 +185,11 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, )) bucketData <- binaryDataServiceHolder.binaryDataService.handleMultipleBucketRequests(bucketRequests) } yield (bucketData, dataLayer.elementClass) - private def getBucketPositions( - organizationId: String, - datasetDirectoryName: String, - dataLayerName: String, - mappingName: Option[String])(segmentOrAgglomerateId: Long, mag: Vec3Int): Fox[Set[Vec3IntProto]] = + private def getBucketPositions(organizationId: String, + datasetDirectoryName: String, + dataLayerName: String, + mappingName: Option[String])(segmentOrAgglomerateId: Long, mag: Vec3Int)( + implicit tc: TokenContext): Fox[Set[Vec3IntProto]] = for { segmentIds <- getSegmentIdsForAgglomerateIdIfNeeded(organizationId, datasetDirectoryName, @@ -212,11 +212,12 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, bucketPositions = bucketPositionsInFileMag.map(_ / (mag / fileMag)) } yield bucketPositions - private def getSegmentIdsForAgglomerateIdIfNeeded(organizationId: String, - datasetDirectoryName: String, - dataLayerName: String, - segmentOrAgglomerateId: Long, - mappingNameOpt: Option[String]): Fox[List[Long]] = + private def getSegmentIdsForAgglomerateIdIfNeeded( + organizationId: String, + datasetDirectoryName: String, + dataLayerName: String, + segmentOrAgglomerateId: Long, + mappingNameOpt: Option[String])(implicit tc: TokenContext): Fox[Seq[Long]] = // Editable mappings cannot happen here since those requests go to the tracingstore mappingNameOpt match { case Some(mappingName) => @@ -228,14 +229,12 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, dataLayerName, mappingName ) - largestAgglomerateId <- agglomerateService.largestAgglomerateId(agglomerateFileKey).toFox + largestAgglomerateId <- agglomerateService.largestAgglomerateId(agglomerateFileKey) segmentIds <- if (segmentOrAgglomerateId <= largestAgglomerateId) { - agglomerateService - .segmentIdsForAgglomerateId( - agglomerateFileKey, - segmentOrAgglomerateId - ) - .toFox + agglomerateService.segmentIdsForAgglomerateId( + agglomerateFileKey, + segmentOrAgglomerateId + ) } else Fox.successful(List.empty) // agglomerate id is outside of file range, was likely created during brushing } yield segmentIds diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala index ac577ef2dbf..92f04699283 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala @@ -163,12 +163,11 @@ class MeshFileService @Inject()(config: DataStoreConfig)(implicit ec: ExecutionC .toOption .getOrElse(0) - def listMeshChunksForSegmentsMerged( - organizationId: String, - datasetDirectoryName: String, - dataLayerName: String, - meshFileName: String, - segmentIds: List[Long])(implicit m: MessagesProvider): Fox[WebknossosSegmentInfo] = + def listMeshChunksForSegmentsMerged(organizationId: String, + datasetDirectoryName: String, + dataLayerName: String, + meshFileName: String, + segmentIds: Seq[Long])(implicit m: MessagesProvider): Fox[WebknossosSegmentInfo] = for { _ <- Fox.successful(()) meshFilePath: Path = dataBaseDir @@ -190,12 +189,12 @@ class MeshFileService @Inject()(config: DataStoreConfig)(implicit ec: ExecutionC } yield wkChunkInfos private def listMeshChunksForSegments(meshFilePath: Path, - segmentIds: List[Long], + segmentIds: Seq[Long], lodScaleMultiplier: Double, transform: Array[Array[Double]]): List[List[MeshLodInfo]] = meshFileCache .withCachedHdf5(meshFilePath) { cachedMeshFile: CachedHdf5File => - segmentIds.flatMap(segmentId => + segmentIds.toList.flatMap(segmentId => listMeshChunksForSegment(cachedMeshFile, segmentId, lodScaleMultiplier, transform)) } .toOption diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala index ecee0011d7a..96a688e980c 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala @@ -27,7 +27,7 @@ trait MeshMappingHelper extends FoxImplicits { agglomerateId: Long, mappingNameForMeshFile: Option[String], omitMissing: Boolean // If true, failing lookups in the agglomerate file will just return empty list. - )(implicit ec: ExecutionContext, tc: TokenContext): Fox[List[Long]] = + )(implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = (targetMappingName, editableMappingTracingId) match { case (None, None) => // No mapping selected, assume id matches meshfile @@ -40,15 +40,17 @@ trait MeshMappingHelper extends FoxImplicits { // assume agglomerate id, fetch oversegmentation segment ids for it for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - segmentIdsBox = agglomerateService.segmentIdsForAgglomerateId( - AgglomerateFileKey( - organizationId, - datasetDirectoryName, - dataLayerName, - mappingName - ), - agglomerateId - ) + segmentIdsBox <- agglomerateService + .segmentIdsForAgglomerateId( + AgglomerateFileKey( + organizationId, + datasetDirectoryName, + dataLayerName, + mappingName + ), + agglomerateId + ) + .shiftBox segmentIds <- segmentIdsBox match { case Full(segmentIds) => Fox.successful(segmentIds) case _ => if (omitMissing) Fox.successful(List.empty) else segmentIdsBox.toFox @@ -67,17 +69,15 @@ trait MeshMappingHelper extends FoxImplicits { else // the agglomerate id is not present in the editable mapping. Fetch its info from the base mapping. for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - localSegmentIds <- agglomerateService - .segmentIdsForAgglomerateId( - AgglomerateFileKey( - organizationId, - datasetDirectoryName, - dataLayerName, - mappingName - ), - agglomerateId - ) - .toFox + localSegmentIds <- agglomerateService.segmentIdsForAgglomerateId( + AgglomerateFileKey( + organizationId, + datasetDirectoryName, + dataLayerName, + mappingName + ), + agglomerateId + ) } yield localSegmentIds } yield segmentIds case _ => Fox.failure("Cannot determine segment ids for editable mapping without base mapping") diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala index 91bfdc51bc1..0497dca3b87 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala @@ -125,7 +125,7 @@ class NeuroglancerPrecomputedMeshFileService @Inject()(config: DataStoreConfig, ) } - def listMeshChunksForMultipleSegments(meshFilePathOpt: Option[String], segmentId: List[Long])( + def listMeshChunksForMultipleSegments(meshFilePathOpt: Option[String], segmentId: Seq[Long])( implicit tc: TokenContext): Fox[WebknossosSegmentInfo] = for { meshFilePath <- meshFilePathOpt.toFox ?~> "No mesh file path provided" From 0439bcee0b2c871bacd02b720a6cc75bd187115f Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 27 May 2025 16:16:56 +0200 Subject: [PATCH 014/100] remove unused test --- .../datastore/controllers/Application.scala | 8 -------- .../datastore/services/AgglomerateService.scala | 15 ++++----------- .../com.scalableminds.webknossos.datastore.routes | 1 - 3 files changed, 4 insertions(+), 20 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala index f22b572a55d..7a3802c5e64 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala @@ -40,14 +40,6 @@ class Application @Inject()(redisClient: DataStoreRedisStore, } } - def testAgglomerateZarr: Action[AnyContent] = Action.async { implicit request => - log() { - for { - data <- agglomerateService.readFromSegmentToAgglomerate - } yield Ok(s"got ${data.getSize} elements of type ${data.getDataType}: ${data.toString}") - } - } - // Test that the NativeBucketScanner works. // The result is stored in a val because we expect that this continues to work if it works on startup. private lazy val testNativeBucketScanner = tryo { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 88a76e099f0..37a7f20dfe6 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -59,13 +59,6 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService protected lazy val bucketScanner = new NativeBucketScanner() - def readFromSegmentToAgglomerate(implicit ec: ExecutionContext, tc: TokenContext): Fox[ucar.ma2.Array] = - for { - zarrArray <- openZarrArrayCached("segment_to_agglomerate") - read <- zarrArray.readAsMultiArray(Array(10), Array(2)) - _ = logger.info(s"read ${read.getSize} elements from agglomerate file segmentToAgglomerate") - } yield read - private def mapSingleSegment(zarrArray: DatasetArray, segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = for { @@ -153,10 +146,10 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService agglomerateToSegmentsOffsets <- openZarrArrayCached("agglomerate_to_segments_offsets") agglomerateToEdgesOffsets <- openZarrArrayCached("agglomerate_to_edges_offsets") - positionsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(shape = Array(2), - offset = Array(agglomerateId)) - edgesRange: MultiArray <- agglomerateToEdgesOffsets.readAsMultiArray(shape = Array(2), - offset = Array(agglomerateId)) + positionsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = Array(agglomerateId), + shape = Array(2)) + edgesRange: MultiArray <- agglomerateToEdgesOffsets.readAsMultiArray(offset = Array(agglomerateId), + shape = Array(2)) nodeCount = positionsRange.getLong(1) - positionsRange.getLong(0) edgeCount = edgesRange.getLong(1) - edgesRange.getLong(0) edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges diff --git a/webknossos-datastore/conf/com.scalableminds.webknossos.datastore.routes b/webknossos-datastore/conf/com.scalableminds.webknossos.datastore.routes index bcaf26dc3f6..fef187d16e3 100644 --- a/webknossos-datastore/conf/com.scalableminds.webknossos.datastore.routes +++ b/webknossos-datastore/conf/com.scalableminds.webknossos.datastore.routes @@ -3,7 +3,6 @@ # Health endpoint GET /health @com.scalableminds.webknossos.datastore.controllers.Application.health -GET /testAgglomerateZarr @com.scalableminds.webknossos.datastore.controllers.Application.testAgglomerateZarr # Read image data POST /datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.BinaryDataController.requestViaWebknossos(organizationId: String, datasetDirectoryName: String, dataLayerName: String) From 11027d717554896b920166c7c24f36057aa35cab Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 28 May 2025 10:02:59 +0200 Subject: [PATCH 015/100] implement positionForSegmentId; agglomerateIdsForSegmentIds --- .../datastore/DataStoreModule.scala | 1 + .../controllers/DataSourceController.scala | 27 ++--- .../services/AgglomerateService.scala | 114 +++++++++++++----- 3 files changed, 96 insertions(+), 46 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala index 7a2bd9b28ba..4b1ee3c06a2 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala @@ -28,6 +28,7 @@ class DataStoreModule extends AbstractModule { bind(classOf[BinaryDataServiceHolder]).asEagerSingleton() bind(classOf[MappingService]).asEagerSingleton() bind(classOf[AgglomerateService]).asEagerSingleton() + bind(classOf[ZarrAgglomerateService]).asEagerSingleton() bind(classOf[AdHocMeshServiceHolder]).asEagerSingleton() bind(classOf[ApplicationHealthService]).asEagerSingleton() bind(classOf[DSDatasetErrorLoggingService]).asEagerSingleton() diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 06508d7c469..a8ae4237f45 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -315,10 +315,9 @@ class DataSourceController @Inject()( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - position <- agglomerateService - .positionForSegmentId(AgglomerateFileKey(organizationId, datasetDirectoryName, dataLayerName, mappingName), - segmentId) - .toFox ?~> "getSegmentPositionFromAgglomerateFile.failed" + position <- agglomerateService.positionForSegmentId( + AgglomerateFileKey(organizationId, datasetDirectoryName, dataLayerName, mappingName), + segmentId) ?~> "getSegmentPositionFromAgglomerateFile.failed" } yield Ok(Json.toJson(position)) } } @@ -355,17 +354,15 @@ class DataSourceController @Inject()( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - agglomerateIds: Seq[Long] <- agglomerateService - .agglomerateIdsForSegmentIds( - AgglomerateFileKey( - organizationId, - datasetDirectoryName, - dataLayerName, - mappingName - ), - request.body.items - ) - .toFox + agglomerateIds: Seq[Long] <- agglomerateService.agglomerateIdsForSegmentIds( + AgglomerateFileKey( + organizationId, + datasetDirectoryName, + dataLayerName, + mappingName + ), + request.body.items + ) } yield Ok(ListOfLong(agglomerateIds).toByteArray) } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 37a7f20dfe6..2228c8b183f 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -59,10 +59,10 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService protected lazy val bucketScanner = new NativeBucketScanner() - private def mapSingleSegment(zarrArray: DatasetArray, segmentId: Long)(implicit ec: ExecutionContext, - tc: TokenContext): Fox[Long] = + private def mapSingleSegment(segmentToAgglomerate: DatasetArray, segmentId: Long)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Long] = for { - asMultiArray <- zarrArray.readAsMultiArray(shape = Array(1), offset = Array(segmentId)) + asMultiArray <- segmentToAgglomerate.readAsMultiArray(shape = Array(1), offset = Array(segmentId)) } yield asMultiArray.getLong(0) private def openZarrArrayCached(zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext) = @@ -109,11 +109,11 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService bucketScanner.collectSegmentIds(data, bytesPerElement, isSigned = false, skipZeroes = false) for { - zarrArray <- openZarrArrayCached("segment_to_agglomerate") + segmentToAgglomerate <- openZarrArrayCached("segment_to_agglomerate") beforeBuildMap = Instant.now relevantAgglomerateMap: Map[Long, Long] <- Fox .serialCombined(distinctSegmentIds) { segmentId => - mapSingleSegment(zarrArray, segmentId).map((segmentId, _)) + mapSingleSegment(segmentToAgglomerate, segmentId).map((segmentId, _)) } .map(_.toMap) _ = Instant.logSince(beforeBuildMap, "build map") @@ -278,6 +278,51 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService shape = Array(segmentCount.toInt)) } yield segmentIds.getStorage.asInstanceOf[Array[Long]].toSeq + def agglomerateIdsForSegmentIds(agglomerateFileKey: AgglomerateFileKey, segmentIds: Seq[Long])( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[Seq[Long]] = + for { + segmentToAgglomerate <- openZarrArrayCached("segment_to_agglomerate") + agglomerateIds <- Fox.serialCombined(segmentIds) { segmentId => + mapSingleSegment(segmentToAgglomerate, segmentId) + } + } yield agglomerateIds + + def positionForSegmentId(agglomerateFileKey: AgglomerateFileKey, segmentId: Long)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Vec3Int] = + for { + segmentToAgglomerate <- openZarrArrayCached("segment_to_agglomerate") + agglomerateId <- mapSingleSegment(segmentToAgglomerate, segmentId) + agglomerateToSegmentsOffsets <- openZarrArrayCached("agglomerate_to_segments_offsets") + segmentsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = Array(agglomerateId), + shape = Array(2)) + agglomerateToSegments <- openZarrArrayCached("agglomerate_to_segments") + segmentIndex <- binarySearchForSegment(segmentsRange.getLong(0), + segmentsRange.getLong(1), + segmentId, + agglomerateToSegments) + agglomerateToPositions <- openZarrArrayCached("agglomerate_to_positions") + position <- agglomerateToPositions.readAsMultiArray(offset = Array(segmentIndex, 0), shape = Array(3, 1)) + } yield Vec3Int(position.getInt(0), position.getInt(1), position.getInt(2)) + + private def binarySearchForSegment( + rangeStart: Long, + rangeEnd: Long, + segmentId: Long, + agglomerateToSegments: DatasetArray)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = + if (rangeStart > rangeEnd) Fox.failure("Could not find segmentId in agglomerate file") + else { + val middle = rangeStart + (rangeEnd - rangeStart) / 2 + for { + segmentIdAtMiddleMA <- agglomerateToSegments.readAsMultiArray(offset = Array(middle), shape = Array(1)) + segmentIdAtMiddle = segmentIdAtMiddleMA.getLong(0) + segmentIndex <- if (segmentIdAtMiddle == segmentId) + Fox.successful(middle) + else if (segmentIdAtMiddle < segmentId) { + binarySearchForSegment(middle + 1L, rangeEnd, segmentId, agglomerateToSegments) + } else binarySearchForSegment(rangeStart, middle - 1L, segmentId, agglomerateToSegments) + } yield segmentIndex + } } class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConverter with LazyLogging { @@ -546,34 +591,41 @@ class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateServi }.toFox } - def agglomerateIdsForSegmentIds(agglomerateFileKey: AgglomerateFileKey, segmentIds: Seq[Long]): Box[Seq[Long]] = { - val cachedAgglomerateFile = agglomerateFileCache.withCache(agglomerateFileKey)(initHDFReader) - - tryo { - val agglomerateIds = segmentIds.map { segmentId: Long => - cachedAgglomerateFile.agglomerateIdCache.withCache(segmentId, - cachedAgglomerateFile.reader, - cachedAgglomerateFile.dataset)(readHDF) - } - cachedAgglomerateFile.finishAccess() - agglomerateIds + def agglomerateIdsForSegmentIds(agglomerateFileKey: AgglomerateFileKey, segmentIds: Seq[Long])( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[Seq[Long]] = + if (useZarr) { + zarrAgglomerateService.agglomerateIdsForSegmentIds(agglomerateFileKey, segmentIds) + } else { + val cachedAgglomerateFile = agglomerateFileCache.withCache(agglomerateFileKey)(initHDFReader) + tryo { + val agglomerateIds = segmentIds.map { segmentId: Long => + cachedAgglomerateFile.agglomerateIdCache.withCache(segmentId, + cachedAgglomerateFile.reader, + cachedAgglomerateFile.dataset)(readHDF) + } + cachedAgglomerateFile.finishAccess() + agglomerateIds + }.toFox } - } - - def positionForSegmentId(agglomerateFileKey: AgglomerateFileKey, segmentId: Long): Box[Vec3Int] = { - val hdfFile = agglomerateFileKey.path(dataBaseDir, agglomerateDir, agglomerateFileExtension).toFile - val reader: IHDF5Reader = HDF5FactoryProvider.get.openForReading(hdfFile) - for { - agglomerateIdArr: Array[Long] <- tryo( - reader.uint64().readArrayBlockWithOffset("/segment_to_agglomerate", 1, segmentId)) - agglomerateId = agglomerateIdArr(0) - segmentsRange: Array[Long] <- tryo( - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId)) - segmentIndex <- binarySearchForSegment(segmentsRange(0), segmentsRange(1), segmentId, reader) - position <- tryo(reader.uint64().readMatrixBlockWithOffset("/agglomerate_to_positions", 1, 3, segmentIndex, 0)(0)) - } yield Vec3Int(position(0).toInt, position(1).toInt, position(2).toInt) - } + def positionForSegmentId(agglomerateFileKey: AgglomerateFileKey, segmentId: Long)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Vec3Int] = + if (useZarr) zarrAgglomerateService.positionForSegmentId(agglomerateFileKey, segmentId) + else { + val hdfFile = agglomerateFileKey.path(dataBaseDir, agglomerateDir, agglomerateFileExtension).toFile + val reader: IHDF5Reader = HDF5FactoryProvider.get.openForReading(hdfFile) + (for { + agglomerateIdArr: Array[Long] <- tryo( + reader.uint64().readArrayBlockWithOffset("/segment_to_agglomerate", 1, segmentId)) + agglomerateId = agglomerateIdArr(0) + segmentsRange: Array[Long] <- tryo( + reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId)) + segmentIndex <- binarySearchForSegment(segmentsRange(0), segmentsRange(1), segmentId, reader) + position <- tryo( + reader.uint64().readMatrixBlockWithOffset("/agglomerate_to_positions", 1, 3, segmentIndex, 0)(0)) + } yield Vec3Int(position(0).toInt, position(1).toInt, position(2).toInt)).toFox + } @tailrec private def binarySearchForSegment(rangeStart: Long, From 0c5d6471e5426402c45864ea8548e4c63cc53c28 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 28 May 2025 10:15:14 +0200 Subject: [PATCH 016/100] select mapping by request --- .../services/AgglomerateService.scala | 84 ++++++++++++------- 1 file changed, 53 insertions(+), 31 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 2228c8b183f..94daad2d85a 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -39,7 +39,7 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService private val agglomerateDir = "agglomerates" // TODO clear on dataset reload - private lazy val openArraysCache = AlfuCache[String, DatasetArray]() + private lazy val openArraysCache = AlfuCache[(AgglomerateFileKey, String), DatasetArray]() // TODO unify with existing chunkContentsCache from binaryDataService? private lazy val sharedChunkContentsCache: AlfuCache[String, MultiArray] = { @@ -65,15 +65,16 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService asMultiArray <- segmentToAgglomerate.readAsMultiArray(shape = Array(1), offset = Array(segmentId)) } yield asMultiArray.getLong(0) - private def openZarrArrayCached(zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext) = - openArraysCache.getOrLoad(zarrArrayName, zarrArrayName => openZarrArray(zarrArrayName)) + private def openZarrArrayCached(agglomerateFileKey: AgglomerateFileKey, + zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext) = + openArraysCache.getOrLoad((agglomerateFileKey, zarrArrayName), + _ => openZarrArray(agglomerateFileKey, zarrArrayName)) - private def openZarrArray(zarrArrayName: String)(implicit ec: ExecutionContext, - tc: TokenContext): Fox[DatasetArray] = { - val zarrGroupPath = - dataBaseDir - .resolve("sample_organization/test-agglomerate-file-zarr/segmentation/agglomerates/agglomerate_view_55") - .toAbsolutePath + private def openZarrArray(agglomerateFileKey: AgglomerateFileKey, zarrArrayName: String)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[DatasetArray] = { + + val zarrGroupPath = agglomerateFileKey.zarrGroupPath(dataBaseDir, agglomerateDir).toAbsolutePath for { groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(new URI(s"file://$zarrGroupPath"), None)) segmentToAgglomeratePath = groupVaultPath / zarrArrayName @@ -91,7 +92,6 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService tc: TokenContext): Fox[Array[Byte]] = { val agglomerateFileKey = AgglomerateFileKey.fromDataRequest(request) - val zarrGroupPath = agglomerateFileKey.zarrGroupPath(dataBaseDir, agglomerateDir).toAbsolutePath def convertToAgglomerate(segmentIds: Array[Long], relevantAgglomerateMap: Map[Long, Long], @@ -109,7 +109,7 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService bucketScanner.collectSegmentIds(data, bytesPerElement, isSigned = false, skipZeroes = false) for { - segmentToAgglomerate <- openZarrArrayCached("segment_to_agglomerate") + segmentToAgglomerate <- openZarrArrayCached(agglomerateFileKey, "segment_to_agglomerate") beforeBuildMap = Instant.now relevantAgglomerateMap: Map[Long, Long] <- Fox .serialCombined(distinctSegmentIds) { segmentId => @@ -143,8 +143,9 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[SkeletonTracing] = for { before <- Instant.nowFox - agglomerateToSegmentsOffsets <- openZarrArrayCached("agglomerate_to_segments_offsets") - agglomerateToEdgesOffsets <- openZarrArrayCached("agglomerate_to_edges_offsets") + agglomerateFileKey = AgglomerateFileKey(organizationId, datasetDirectoryName, dataLayerName, mappingName) + agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") + agglomerateToEdgesOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges_offsets") positionsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = Array(agglomerateId), shape = Array(2)) @@ -155,10 +156,10 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges _ <- Fox.fromBool(nodeCount <= edgeLimit) ?~> s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)" _ <- Fox.fromBool(edgeCount <= edgeLimit) ?~> s"Agglomerate has too many edges ($edgeCount > $edgeLimit)" - agglomerateToPositions <- openZarrArrayCached("agglomerate_to_positions") + agglomerateToPositions <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_positions") positions <- agglomerateToPositions.readAsMultiArray(offset = Array(positionsRange.getLong(0), 0), shape = Array(nodeCount.toInt, 3)) - agglomerateToEdges <- openZarrArrayCached("agglomerate_to_edges") + agglomerateToEdges <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges") edges: MultiArray <- agglomerateToEdges.readAsMultiArray(offset = Array(edgesRange.getLong(0), 0), shape = Array(edgeCount.toInt, 2)) nodeIdStartAtOneOffset = 1 @@ -210,7 +211,7 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService def largestAgglomerateId(agglomerateFileKey: AgglomerateFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = for { - array <- openZarrArrayCached("agglomerate_to_segments_offsets") + array <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") shape <- array.datasetShape.toFox ?~> "Could not determine array shape" shapeFirstElement <- tryo(shape(0)).toFox } yield shapeFirstElement @@ -219,8 +220,8 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService implicit ec: ExecutionContext, tc: TokenContext): Fox[AgglomerateGraph] = for { - agglomerateToSegmentsOffsets <- openZarrArrayCached("agglomerate_to_segments_offsets") - agglomerateToEdgesOffsets <- openZarrArrayCached("agglomerate_to_edges_offsets") + agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") + agglomerateToEdgesOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges_offsets") positionsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(shape = Array(2), offset = Array(agglomerateId)) @@ -231,16 +232,16 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges _ <- Fox.fromBool(nodeCount <= edgeLimit) ?~> s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)" _ <- Fox.fromBool(edgeCount <= edgeLimit) ?~> s"Agglomerate has too many edges ($edgeCount > $edgeLimit)" - agglomerateToPositions <- openZarrArrayCached("agglomerate_to_positions") + agglomerateToPositions <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_positions") positions: MultiArray <- agglomerateToPositions.readAsMultiArray(offset = Array(positionsRange.getLong(0), 0), shape = Array(nodeCount.toInt, 3)) - agglomerateToSegments <- openZarrArrayCached("agglomerate_to_segments") + agglomerateToSegments <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments") segmentIds: MultiArray <- agglomerateToSegments.readAsMultiArray(offset = Array(positionsRange.getInt(0)), shape = Array(nodeCount.toInt)) - agglomerateToEdges <- openZarrArrayCached("agglomerate_to_edges") + agglomerateToEdges <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges") edges: MultiArray <- agglomerateToEdges.readAsMultiArray(offset = Array(edgesRange.getLong(0), 0), shape = Array(edgeCount.toInt, 2)) - agglomerateToAffinities <- openZarrArray("agglomerate_to_affinities") + agglomerateToAffinities <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_affinities") affinities: MultiArray <- agglomerateToAffinities.readAsMultiArray(offset = Array(edgesRange.getLong(0)), shape = Array(edgeCount.toInt)) @@ -267,8 +268,8 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService def segmentIdsForAgglomerateId(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = for { - agglomerateToSegmentsOffsets <- openZarrArrayCached("agglomerate_to_segments_offsets") - agglomerateToSegments <- openZarrArrayCached("agglomerate_to_segments") + agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") + agglomerateToSegments <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments") segmentRange <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = Array(agglomerateId), shape = Array(2)) segmentCount = segmentRange.getLong(1) - segmentRange.getLong(0) segmentIds <- if (segmentCount == 0) @@ -282,7 +283,7 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = for { - segmentToAgglomerate <- openZarrArrayCached("segment_to_agglomerate") + segmentToAgglomerate <- openZarrArrayCached(agglomerateFileKey, "segment_to_agglomerate") agglomerateIds <- Fox.serialCombined(segmentIds) { segmentId => mapSingleSegment(segmentToAgglomerate, segmentId) } @@ -291,17 +292,17 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService def positionForSegmentId(agglomerateFileKey: AgglomerateFileKey, segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Vec3Int] = for { - segmentToAgglomerate <- openZarrArrayCached("segment_to_agglomerate") + segmentToAgglomerate <- openZarrArrayCached(agglomerateFileKey, "segment_to_agglomerate") agglomerateId <- mapSingleSegment(segmentToAgglomerate, segmentId) - agglomerateToSegmentsOffsets <- openZarrArrayCached("agglomerate_to_segments_offsets") + agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") segmentsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = Array(agglomerateId), shape = Array(2)) - agglomerateToSegments <- openZarrArrayCached("agglomerate_to_segments") + agglomerateToSegments <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments") segmentIndex <- binarySearchForSegment(segmentsRange.getLong(0), segmentsRange.getLong(1), segmentId, agglomerateToSegments) - agglomerateToPositions <- openZarrArrayCached("agglomerate_to_positions") + agglomerateToPositions <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_positions") position <- agglomerateToPositions.readAsMultiArray(offset = Array(segmentIndex, 0), shape = Array(3, 1)) } yield Vec3Int(position.getInt(0), position.getInt(1), position.getInt(2)) @@ -354,8 +355,29 @@ class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateServi paths.map(path => FilenameUtils.removeExtension(path.getFileName.toString)) } .toOption - .getOrElse(Nil) - .toSet ++ Set("agglomerate_view_5") // TODO + .getOrElse(Nil) // TODO explore zarr agglomerates? + .toSet ++ Set( + "agglomerate_view_5", + "agglomerate_view_10", + "agglomerate_view_15", + "agglomerate_view_20", + "agglomerate_view_25", + "agglomerate_view_30", + "agglomerate_view_35", + "agglomerate_view_40", + "agglomerate_view_45", + "agglomerate_view_50", + "agglomerate_view_55", + "agglomerate_view_60", + "agglomerate_view_65", + "agglomerate_view_70", + "agglomerate_view_75", + "agglomerate_view_80", + "agglomerate_view_85", + "agglomerate_view_90", + "agglomerate_view_95", + "agglomerate_view_100" + ) } def applyAgglomerate(request: DataServiceDataRequest)(data: Array[Byte])(implicit ec: ExecutionContext, From 90d97cd2556158c0fb68f3c271fd243a01a65c92 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 28 May 2025 10:41:39 +0200 Subject: [PATCH 017/100] shortcut for single-dimension shape+offset --- .../datastore/controllers/Application.scala | 9 ++--- .../DatasetArrayBucketProvider.scala | 4 +-- .../datastore/datareaders/DatasetArray.scala | 36 ++++++++++--------- .../datareaders/MultiArrayUtils.scala | 3 +- .../services/AgglomerateService.scala | 32 +++++++---------- 5 files changed, 37 insertions(+), 47 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala index 7a3802c5e64..90e222df1be 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala @@ -3,13 +3,8 @@ package com.scalableminds.webknossos.datastore.controllers import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.Fox import com.scalableminds.webknossos.datastore.helpers.NativeBucketScanner -import com.scalableminds.webknossos.datastore.models.datasource.{DataSourceId, ElementClass} -import com.scalableminds.webknossos.datastore.models.requests.DataServiceDataRequest -import com.scalableminds.webknossos.datastore.services.{ - AgglomerateService, - ApplicationHealthService, - ZarrAgglomerateService -} +import com.scalableminds.webknossos.datastore.models.datasource.ElementClass +import com.scalableminds.webknossos.datastore.services.{ApplicationHealthService, ZarrAgglomerateService} import com.scalableminds.webknossos.datastore.storage.DataStoreRedisStore import net.liftweb.common.Box.tryo diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/dataformats/DatasetArrayBucketProvider.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/dataformats/DatasetArrayBucketProvider.scala index 0c78fa32c7c..bd46b455e98 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/dataformats/DatasetArrayBucketProvider.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/dataformats/DatasetArrayBucketProvider.scala @@ -40,8 +40,8 @@ class DatasetArrayBucketProvider(dataLayer: DataLayer, bucket = readInstruction.bucket shape = Vec3Int.full(bucket.bucketLength) offset = Vec3Int(bucket.topLeft.voxelXInMag, bucket.topLeft.voxelYInMag, bucket.topLeft.voxelZInMag) - bucketData <- datasetArray.readBytesWithAdditionalCoordinates(shape, - offset, + bucketData <- datasetArray.readBytesWithAdditionalCoordinates(offset, + shape, bucket.additionalCoordinates, dataLayer.elementClass == ElementClass.uint24) } yield bucketData diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala index dfa848800f4..21c77270b7f 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala @@ -3,7 +3,6 @@ package com.scalableminds.webknossos.datastore.datareaders import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.geometry.Vec3Int -import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.datavault.VaultPath import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId @@ -69,33 +68,33 @@ class DatasetArray(vaultPath: VaultPath, } def readBytesWithAdditionalCoordinates( - shapeXYZ: Vec3Int, offsetXYZ: Vec3Int, + shapeXYZ: Vec3Int, additionalCoordinatesOpt: Option[Seq[AdditionalCoordinate]], shouldReadUint24: Boolean)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Byte]] = for { - (shapeArray, offsetArray) <- tryo(constructShapeAndOffsetArrays( - shapeXYZ, + (offsetArray, shapeArray) <- tryo(constructShapeAndOffsetArrays( offsetXYZ, + shapeXYZ, additionalCoordinatesOpt, shouldReadUint24)).toFox ?~> "failed to construct shape and offset array for requested coordinates" - bytes <- readBytes(shapeArray, offsetArray) + bytes <- readBytes(offsetArray, shapeArray) } yield bytes - private def constructShapeAndOffsetArrays(shapeXYZ: Vec3Int, - offsetXYZ: Vec3Int, + private def constructShapeAndOffsetArrays(offsetXYZ: Vec3Int, + shapeXYZ: Vec3Int, additionalCoordinatesOpt: Option[Seq[AdditionalCoordinate]], shouldReadUint24: Boolean): (Array[Int], Array[Int]) = { - val shapeArray: Array[Int] = Array.fill(rank)(1) - shapeArray(rank - 3) = shapeXYZ.x - shapeArray(rank - 2) = shapeXYZ.y - shapeArray(rank - 1) = shapeXYZ.z - val offsetArray: Array[Int] = Array.fill(rank)(0) offsetArray(rank - 3) = offsetXYZ.x offsetArray(rank - 2) = offsetXYZ.y offsetArray(rank - 1) = offsetXYZ.z + val shapeArray: Array[Int] = Array.fill(rank)(1) + shapeArray(rank - 3) = shapeXYZ.x + shapeArray(rank - 2) = shapeXYZ.y + shapeArray(rank - 1) = shapeXYZ.z + axisOrder.c.foreach { channelAxisInner => val channelAxisOuter = fullAxisOrder.arrayToWkPermutation(channelAxisInner) // If a channelIndex is requested, and a channel axis is known, add an offset to the channel axis @@ -115,14 +114,14 @@ class DatasetArray(vaultPath: VaultPath, // shapeArray at positions of additional coordinates is always 1 } } - (shapeArray, offsetArray) + (offsetArray, shapeArray) } // returns byte array in fortran-order with little-endian values - private def readBytes(shape: Array[Int], offset: Array[Int])(implicit ec: ExecutionContext, + private def readBytes(offset: Array[Int], shape: Array[Int])(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Byte]] = for { - typedMultiArray <- readAsFortranOrder(shape, offset) + typedMultiArray <- readAsFortranOrder(offset, shape) asBytes <- BytesConverter.toByteArray(typedMultiArray, header.resolvedDataType, ByteOrder.LITTLE_ENDIAN).toFox } yield asBytes @@ -153,7 +152,7 @@ class DatasetArray(vaultPath: VaultPath, // The local variables like chunkIndices are also in this order unless explicitly named. // Loading data adapts to the array's axis order so that …CXYZ data in fortran-order is // returned, regardless of the array’s internal storage. - private def readAsFortranOrder(shape: Array[Int], offset: Array[Int])(implicit ec: ExecutionContext, + private def readAsFortranOrder(offset: Array[Int], shape: Array[Int])(implicit ec: ExecutionContext, tc: TokenContext): Fox[MultiArray] = { val totalOffset: Array[Int] = offset.zip(header.voxelOffset).map { case (o, v) => o - v }.padTo(offset.length, 0) val chunkIndices = ChunkUtils.computeChunkIndices( @@ -189,7 +188,10 @@ class DatasetArray(vaultPath: VaultPath, } } - def readAsMultiArray(shape: Array[Int], offset: Array[Long])(implicit ec: ExecutionContext, + def readAsMultiArray(offset: Long, shape: Int)(implicit ec: ExecutionContext, tc: TokenContext): Fox[MultiArray] = + readAsMultiArray(Array(offset), Array(shape)) + + def readAsMultiArray(offset: Array[Long], shape: Array[Int])(implicit ec: ExecutionContext, tc: TokenContext): Fox[MultiArray] = if (shape.product == 0) { Fox.successful(MultiArrayUtils.createEmpty(rank)) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala index a9a2160b7a6..69a3990e68b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala @@ -45,9 +45,8 @@ object MultiArrayUtils extends LazyLogging { } } - def createEmpty(rank: Int): MultiArray = { + def createEmpty(rank: Int): MultiArray = MultiArray.factory(MADataType.FLOAT, Array.fill(rank)(0)) - } /** * Offset describes the displacement between source and target array.
diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 94daad2d85a..44eebd6b09e 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -62,7 +62,7 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService private def mapSingleSegment(segmentToAgglomerate: DatasetArray, segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = for { - asMultiArray <- segmentToAgglomerate.readAsMultiArray(shape = Array(1), offset = Array(segmentId)) + asMultiArray <- segmentToAgglomerate.readAsMultiArray(offset = segmentId, shape = 1) } yield asMultiArray.getLong(0) private def openZarrArrayCached(agglomerateFileKey: AgglomerateFileKey, @@ -147,10 +147,8 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") agglomerateToEdgesOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges_offsets") - positionsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = Array(agglomerateId), - shape = Array(2)) - edgesRange: MultiArray <- agglomerateToEdgesOffsets.readAsMultiArray(offset = Array(agglomerateId), - shape = Array(2)) + positionsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) + edgesRange: MultiArray <- agglomerateToEdgesOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) nodeCount = positionsRange.getLong(1) - positionsRange.getLong(0) edgeCount = edgesRange.getLong(1) - edgesRange.getLong(0) edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges @@ -223,10 +221,8 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") agglomerateToEdgesOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges_offsets") - positionsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(shape = Array(2), - offset = Array(agglomerateId)) - edgesRange: MultiArray <- agglomerateToEdgesOffsets.readAsMultiArray(shape = Array(2), - offset = Array(agglomerateId)) + positionsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) + edgesRange: MultiArray <- agglomerateToEdgesOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) nodeCount = positionsRange.getLong(1) - positionsRange.getLong(0) edgeCount = edgesRange.getLong(1) - edgesRange.getLong(0) edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges @@ -236,14 +232,14 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService positions: MultiArray <- agglomerateToPositions.readAsMultiArray(offset = Array(positionsRange.getLong(0), 0), shape = Array(nodeCount.toInt, 3)) agglomerateToSegments <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments") - segmentIds: MultiArray <- agglomerateToSegments.readAsMultiArray(offset = Array(positionsRange.getInt(0)), - shape = Array(nodeCount.toInt)) + segmentIds: MultiArray <- agglomerateToSegments.readAsMultiArray(offset = positionsRange.getInt(0), + shape = nodeCount.toInt) agglomerateToEdges <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges") edges: MultiArray <- agglomerateToEdges.readAsMultiArray(offset = Array(edgesRange.getLong(0), 0), shape = Array(edgeCount.toInt, 2)) agglomerateToAffinities <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_affinities") - affinities: MultiArray <- agglomerateToAffinities.readAsMultiArray(offset = Array(edgesRange.getLong(0)), - shape = Array(edgeCount.toInt)) + affinities: MultiArray <- agglomerateToAffinities.readAsMultiArray(offset = edgesRange.getLong(0), + shape = edgeCount.toInt) agglomerateGraph = AgglomerateGraph( // unsafeWrapArray is fine, because the underlying arrays are never mutated @@ -270,13 +266,12 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService for { agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") agglomerateToSegments <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments") - segmentRange <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = Array(agglomerateId), shape = Array(2)) + segmentRange <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) segmentCount = segmentRange.getLong(1) - segmentRange.getLong(0) segmentIds <- if (segmentCount == 0) Fox.successful(MultiArray.factory(DataType.LONG, Array(0, 0))) else - agglomerateToSegments.readAsMultiArray(offset = Array(segmentRange.getLong(0)), - shape = Array(segmentCount.toInt)) + agglomerateToSegments.readAsMultiArray(offset = segmentRange.getLong(0), shape = segmentCount.toInt) } yield segmentIds.getStorage.asInstanceOf[Array[Long]].toSeq def agglomerateIdsForSegmentIds(agglomerateFileKey: AgglomerateFileKey, segmentIds: Seq[Long])( @@ -295,8 +290,7 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService segmentToAgglomerate <- openZarrArrayCached(agglomerateFileKey, "segment_to_agglomerate") agglomerateId <- mapSingleSegment(segmentToAgglomerate, segmentId) agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") - segmentsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = Array(agglomerateId), - shape = Array(2)) + segmentsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) agglomerateToSegments <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments") segmentIndex <- binarySearchForSegment(segmentsRange.getLong(0), segmentsRange.getLong(1), @@ -315,7 +309,7 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService else { val middle = rangeStart + (rangeEnd - rangeStart) / 2 for { - segmentIdAtMiddleMA <- agglomerateToSegments.readAsMultiArray(offset = Array(middle), shape = Array(1)) + segmentIdAtMiddleMA <- agglomerateToSegments.readAsMultiArray(offset = middle, shape = 1) segmentIdAtMiddle = segmentIdAtMiddleMA.getLong(0) segmentIndex <- if (segmentIdAtMiddle == segmentId) Fox.successful(middle) From 8551f99f82480d5df908e3cccb56a5901fec5805 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 28 May 2025 11:04:52 +0200 Subject: [PATCH 018/100] handle uint32 agglomerate_to_segments arrays --- .../DatasetArrayBucketProvider.scala | 2 +- .../datareaders/MultiArrayUtils.scala | 16 +++++++++++- .../services/AgglomerateService.scala | 26 +++++++++++-------- .../services/mesh/AdHocMeshService.scala | 2 +- 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/dataformats/DatasetArrayBucketProvider.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/dataformats/DatasetArrayBucketProvider.scala index bd46b455e98..1e9d572e4ed 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/dataformats/DatasetArrayBucketProvider.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/dataformats/DatasetArrayBucketProvider.scala @@ -38,8 +38,8 @@ class DatasetArrayBucketProvider(dataLayer: DataLayer, datasetArray <- datasetArrayCache.getOrLoad(readInstruction.bucket.mag, _ => openDatasetArrayWithTimeLogging(readInstruction)) bucket = readInstruction.bucket - shape = Vec3Int.full(bucket.bucketLength) offset = Vec3Int(bucket.topLeft.voxelXInMag, bucket.topLeft.voxelYInMag, bucket.topLeft.voxelZInMag) + shape = Vec3Int.full(bucket.bucketLength) bucketData <- datasetArray.readBytesWithAdditionalCoordinates(offset, shape, bucket.additionalCoordinates, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala index 69a3990e68b..86e19397b85 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala @@ -2,7 +2,7 @@ package com.scalableminds.webknossos.datastore.datareaders import ArrayDataType.ArrayDataType import com.typesafe.scalalogging.LazyLogging -import net.liftweb.common.Box +import net.liftweb.common.{Box, Failure, Full} import net.liftweb.common.Box.tryo import ucar.ma2.{IndexIterator, InvalidRangeException, Range, Array => MultiArray, DataType => MADataType} @@ -48,6 +48,20 @@ object MultiArrayUtils extends LazyLogging { def createEmpty(rank: Int): MultiArray = MultiArray.factory(MADataType.FLOAT, Array.fill(rank)(0)) + def toLongArray(multiArray: MultiArray): Box[Array[Long]] = + multiArray.getDataType match { + case MADataType.LONG | MADataType.ULONG => + Full(multiArray.getStorage.asInstanceOf[Array[Long]]) + case MADataType.INT => + Full(multiArray.getStorage.asInstanceOf[Array[Int]].map(_.toLong)) + case MADataType.UINT => + Full(multiArray.getStorage.asInstanceOf[Array[Int]].map { signed => + if (signed >= 0) signed.toLong else signed.toLong + Int.MaxValue.toLong + Int.MaxValue.toLong + 2L + }) + case _ => + Failure("Cannot convert MultiArray to LongArray: unsupported data type.") + } + /** * Offset describes the displacement between source and target array.
*
diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 44eebd6b09e..a754e42c0cb 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -10,7 +10,7 @@ import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.AgglomerateGraph.{AgglomerateEdge, AgglomerateGraph} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.SkeletonTracing.{Edge, SkeletonTracing, Tree, TreeTypeProto} -import com.scalableminds.webknossos.datastore.datareaders.DatasetArray +import com.scalableminds.webknossos.datastore.datareaders.{DatasetArray, MultiArrayUtils} import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array import com.scalableminds.webknossos.datastore.geometry.Vec3IntProto import com.scalableminds.webknossos.datastore.helpers.{NativeBucketScanner, NodeDefaults, SkeletonTracingDefaults} @@ -21,7 +21,7 @@ import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.{Box, Failure, Full} import net.liftweb.common.Box.tryo import org.apache.commons.io.FilenameUtils -import ucar.ma2.{DataType, Array => MultiArray} +import ucar.ma2.{Array => MultiArray} import java.net.URI import java.nio._ @@ -232,8 +232,9 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService positions: MultiArray <- agglomerateToPositions.readAsMultiArray(offset = Array(positionsRange.getLong(0), 0), shape = Array(nodeCount.toInt, 3)) agglomerateToSegments <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments") - segmentIds: MultiArray <- agglomerateToSegments.readAsMultiArray(offset = positionsRange.getInt(0), - shape = nodeCount.toInt) + segmentIdsMA: MultiArray <- agglomerateToSegments.readAsMultiArray(offset = positionsRange.getInt(0), + shape = nodeCount.toInt) + segmentIds: Array[Long] <- MultiArrayUtils.toLongArray(segmentIdsMA).toFox agglomerateToEdges <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges") edges: MultiArray <- agglomerateToEdges.readAsMultiArray(offset = Array(edgesRange.getLong(0), 0), shape = Array(edgeCount.toInt, 2)) @@ -243,11 +244,11 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService agglomerateGraph = AgglomerateGraph( // unsafeWrapArray is fine, because the underlying arrays are never mutated - segments = ArraySeq.unsafeWrapArray(segmentIds.getStorage.asInstanceOf[Array[Long]]), + segments = ArraySeq.unsafeWrapArray(segmentIds), edges = (0 until edges.getShape()(0)).map { edgeIdx: Int => AgglomerateEdge( - source = segmentIds.getLong(edges.getInt(edges.getIndex.set(Array(edgeIdx, 0)))), - target = segmentIds.getLong(edges.getInt(edges.getIndex.set(Array(edgeIdx, 1)))) + source = segmentIds(edges.getInt(edges.getIndex.set(Array(edgeIdx, 0)))), + target = segmentIds(edges.getInt(edges.getIndex.set(Array(edgeIdx, 1)))) ) }, positions = (0 until nodeCount.toInt).map { nodeIdx: Int => @@ -269,10 +270,12 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService segmentRange <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) segmentCount = segmentRange.getLong(1) - segmentRange.getLong(0) segmentIds <- if (segmentCount == 0) - Fox.successful(MultiArray.factory(DataType.LONG, Array(0, 0))) + Fox.successful(Array.empty[Long]) else - agglomerateToSegments.readAsMultiArray(offset = segmentRange.getLong(0), shape = segmentCount.toInt) - } yield segmentIds.getStorage.asInstanceOf[Array[Long]].toSeq + agglomerateToSegments + .readAsMultiArray(offset = segmentRange.getLong(0), shape = segmentCount.toInt) + .flatMap(MultiArrayUtils.toLongArray(_).toFox) + } yield segmentIds.toSeq def agglomerateIdsForSegmentIds(agglomerateFileKey: AgglomerateFileKey, segmentIds: Seq[Long])( implicit ec: ExecutionContext, @@ -310,7 +313,8 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService val middle = rangeStart + (rangeEnd - rangeStart) / 2 for { segmentIdAtMiddleMA <- agglomerateToSegments.readAsMultiArray(offset = middle, shape = 1) - segmentIdAtMiddle = segmentIdAtMiddleMA.getLong(0) + segmentIdAdMiddleArray: Array[Long] <- MultiArrayUtils.toLongArray(segmentIdAtMiddleMA).toFox + segmentIdAtMiddle = segmentIdAdMiddleArray(0) segmentIndex <- if (segmentIdAtMiddle == segmentId) Fox.successful(middle) else if (segmentIdAtMiddle < segmentId) { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/AdHocMeshService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/AdHocMeshService.scala index cf8f5c2a135..6e711449df3 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/AdHocMeshService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/AdHocMeshService.scala @@ -14,7 +14,7 @@ import com.scalableminds.webknossos.datastore.models.requests.{ import com.scalableminds.webknossos.datastore.services.mcubes.MarchingCubes import com.scalableminds.webknossos.datastore.services.{BinaryDataService, MappingService} import com.typesafe.scalalogging.LazyLogging -import net.liftweb.common.{Box, Failure, Full} +import net.liftweb.common.{Box, Failure} import org.apache.pekko.actor.{Actor, ActorRef, ActorSystem, Props} import org.apache.pekko.pattern.ask import org.apache.pekko.routing.RoundRobinPool From d183c99d2e486b834581f99723cc424dbb7e08f7 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 28 May 2025 11:14:56 +0200 Subject: [PATCH 019/100] useZarr=false to test ci --- .../webknossos/datastore/services/AgglomerateService.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index a754e42c0cb..32d7dc954d2 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -339,8 +339,9 @@ class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateServi private val cumsumFileName = "cumsum.json" // TODO remove - private val useZarr = true + private val useZarr = false + // TODO clear on reload lazy val agglomerateFileCache = new AgglomerateFileCache(config.Datastore.Cache.AgglomerateFile.maxFileHandleEntries) def exploreAgglomerates(organizationId: String, datasetDirectoryName: String, dataLayerName: String): Set[String] = { From cd04466377039e9b838477d7938ef4d832c42194 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 28 May 2025 13:40:09 +0200 Subject: [PATCH 020/100] change chunkIndices back to list --- .../webknossos/datastore/datareaders/ChunkUtils.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkUtils.scala index 8959b15e2bc..b483ce0a637 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkUtils.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkUtils.scala @@ -4,7 +4,7 @@ object ChunkUtils { def computeChunkIndices(arrayShapeOpt: Option[Array[Long]], arrayChunkShape: Array[Int], selectedShape: Array[Int], - selectedOffset: Array[Long]): Seq[Array[Int]] = { + selectedOffset: Array[Long]): List[Array[Int]] = { val nDims = arrayChunkShape.length val start = new Array[Int](nDims) val end = new Array[Int](nDims) @@ -38,6 +38,6 @@ object ChunkUtils { dimIndex = -1 } } - chunkIndices.toSeq + chunkIndices.toList } } From 9eac53d43bd7f7ebb7bdd628ed45e905ef7285e4 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 28 May 2025 13:43:45 +0200 Subject: [PATCH 021/100] use headOption instead of list deconstruction --- .../webknossos/datastore/datareaders/ChunkUtils.scala | 4 ++-- .../webknossos/datastore/datareaders/DatasetArray.scala | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkUtils.scala index b483ce0a637..8959b15e2bc 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkUtils.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkUtils.scala @@ -4,7 +4,7 @@ object ChunkUtils { def computeChunkIndices(arrayShapeOpt: Option[Array[Long]], arrayChunkShape: Array[Int], selectedShape: Array[Int], - selectedOffset: Array[Long]): List[Array[Int]] = { + selectedOffset: Array[Long]): Seq[Array[Int]] = { val nDims = arrayChunkShape.length val start = new Array[Int](nDims) val end = new Array[Int](nDims) @@ -38,6 +38,6 @@ object ChunkUtils { dimIndex = -1 } } - chunkIndices.toList + chunkIndices.toSeq } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala index 21c77270b7f..683071dbf0f 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala @@ -282,8 +282,8 @@ class DatasetArray(vaultPath: VaultPath, private def partialCopyingIsNotNeededForWkOrder(bufferShape: Array[Int], globalOffset: Array[Int], chunkIndices: Seq[Array[Int]]): Boolean = - chunkIndices match { - case chunkIndex :: Nil => + chunkIndices.headOption match { + case Some(chunkIndex) => val offsetInChunk = computeOffsetInChunk(chunkIndex, globalOffset) header.order == ArrayOrder.F && isZeroOffset(offsetInChunk) && From fd7a281154c23b2b2235d25481017e020d7c949f Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 3 Jun 2025 12:52:25 +0200 Subject: [PATCH 022/100] WIP distinguish btw hdf5 and zarr according to registered layer attachments --- .../controllers/DataSourceController.scala | 44 +- .../datasource/DatasetLayerAttachments.scala | 1 + .../services/AgglomerateService.scala | 723 +++--------------- .../services/Hdf5AgglomerateService.scala | 302 ++++++++ .../services/SegmentIndexFileService.scala | 19 +- .../services/ZarrAgglomerateService.scala | 292 +++++++ .../storage/AgglomerateFileCache.scala | 44 +- 7 files changed, 719 insertions(+), 706 deletions(-) create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index c95f7b4f299..e1f37242eb3 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -278,11 +278,12 @@ class DataSourceController @Inject()( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - skeleton <- agglomerateService.generateSkeleton(organizationId, - datasetDirectoryName, - dataLayerName, - mappingName, - agglomerateId) ?~> "agglomerateSkeleton.failed" + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) + agglomerateFileAttachment = agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + skeleton <- agglomerateService + .generateSkeleton(agglomerateFileAttachment, agglomerateId) ?~> "agglomerateSkeleton.failed" } yield Ok(skeleton.toByteArray).as(protobufMimeType) } } @@ -316,9 +317,12 @@ class DataSourceController @Inject()( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - position <- agglomerateService.positionForSegmentId( - AgglomerateFileKey(organizationId, datasetDirectoryName, dataLayerName, mappingName), - segmentId) ?~> "getSegmentPositionFromAgglomerateFile.failed" + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) + agglomerateFileAttachment = agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + position <- agglomerateService + .positionForSegmentId(agglomerateFileAttachment, segmentId) ?~> "getSegmentPositionFromAgglomerateFile.failed" } yield Ok(Json.toJson(position)) } } @@ -333,14 +337,11 @@ class DataSourceController @Inject()( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - largestAgglomerateId: Long <- agglomerateService.largestAgglomerateId( - AgglomerateFileKey( - organizationId, - datasetDirectoryName, - dataLayerName, - mappingName - ) - ) + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) + agglomerateFileAttachment = agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + largestAgglomerateId: Long <- agglomerateService.largestAgglomerateId(agglomerateFileAttachment) } yield Ok(Json.toJson(largestAgglomerateId)) } } @@ -355,13 +356,12 @@ class DataSourceController @Inject()( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) + agglomerateFileAttachment = agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) agglomerateIds: Seq[Long] <- agglomerateService.agglomerateIdsForSegmentIds( - AgglomerateFileKey( - organizationId, - datasetDirectoryName, - dataLayerName, - mappingName - ), + agglomerateFileAttachment, request.body.items ) } yield Ok(ListOfLong(agglomerateIds).toByteArray) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala index f3102b4c923..53e7097e5fd 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala @@ -54,6 +54,7 @@ object LayerAttachment { Seq.empty } } + } object MeshFileInfo { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 32d7dc954d2..e625078bf1d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -1,336 +1,33 @@ package com.scalableminds.webknossos.datastore.services -import ch.systemsx.cisd.hdf5._ import com.scalableminds.util.accesscontext.TokenContext -import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.geometry.Vec3Int import com.scalableminds.util.io.PathUtils import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.{Fox, FoxImplicits} -import com.scalableminds.webknossos.datastore.AgglomerateGraph.{AgglomerateEdge, AgglomerateGraph} +import com.scalableminds.webknossos.datastore.AgglomerateGraph.AgglomerateGraph import com.scalableminds.webknossos.datastore.DataStoreConfig -import com.scalableminds.webknossos.datastore.SkeletonTracing.{Edge, SkeletonTracing, Tree, TreeTypeProto} -import com.scalableminds.webknossos.datastore.datareaders.{DatasetArray, MultiArrayUtils} -import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array -import com.scalableminds.webknossos.datastore.geometry.Vec3IntProto -import com.scalableminds.webknossos.datastore.helpers.{NativeBucketScanner, NodeDefaults, SkeletonTracingDefaults} -import com.scalableminds.webknossos.datastore.models.datasource.{DataSourceId, ElementClass} +import com.scalableminds.webknossos.datastore.SkeletonTracing.SkeletonTracing +import com.scalableminds.webknossos.datastore.models.datasource.{ + DataLayer, + DataSourceId, + LayerAttachment, + LayerAttachmentDataformat, +} import com.scalableminds.webknossos.datastore.models.requests.DataServiceDataRequest -import com.scalableminds.webknossos.datastore.storage._ import com.typesafe.scalalogging.LazyLogging -import net.liftweb.common.{Box, Failure, Full} -import net.liftweb.common.Box.tryo import org.apache.commons.io.FilenameUtils -import ucar.ma2.{Array => MultiArray} import java.net.URI -import java.nio._ -import java.nio.file.{Files, Paths} +import java.nio.file.Paths import javax.inject.Inject -import scala.annotation.tailrec -import scala.collection.compat.immutable.ArraySeq import scala.concurrent.ExecutionContext import scala.concurrent.duration.DurationInt -class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService: DataVaultService) - extends DataConverter - with LazyLogging { - private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) - private val agglomerateDir = "agglomerates" - - // TODO clear on dataset reload - private lazy val openArraysCache = AlfuCache[(AgglomerateFileKey, String), DatasetArray]() - - // TODO unify with existing chunkContentsCache from binaryDataService? - private lazy val sharedChunkContentsCache: AlfuCache[String, MultiArray] = { - // Used by DatasetArray-based datasets. Measure item weight in kilobytes because the weigher can only return int, not long - - val maxSizeKiloBytes = Math.floor(config.Datastore.Cache.ImageArrayChunks.maxSizeBytes.toDouble / 1000.0).toInt - - def cacheWeight(key: String, arrayBox: Box[MultiArray]): Int = - arrayBox match { - case Full(array) => - (array.getSizeBytes / 1000L).toInt - case _ => 0 - } - - AlfuCache(maxSizeKiloBytes, weighFn = Some(cacheWeight)) - } - - protected lazy val bucketScanner = new NativeBucketScanner() - - private def mapSingleSegment(segmentToAgglomerate: DatasetArray, segmentId: Long)(implicit ec: ExecutionContext, - tc: TokenContext): Fox[Long] = - for { - asMultiArray <- segmentToAgglomerate.readAsMultiArray(offset = segmentId, shape = 1) - } yield asMultiArray.getLong(0) - - private def openZarrArrayCached(agglomerateFileKey: AgglomerateFileKey, - zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext) = - openArraysCache.getOrLoad((agglomerateFileKey, zarrArrayName), - _ => openZarrArray(agglomerateFileKey, zarrArrayName)) - - private def openZarrArray(agglomerateFileKey: AgglomerateFileKey, zarrArrayName: String)( - implicit ec: ExecutionContext, - tc: TokenContext): Fox[DatasetArray] = { - - val zarrGroupPath = agglomerateFileKey.zarrGroupPath(dataBaseDir, agglomerateDir).toAbsolutePath - for { - groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(new URI(s"file://$zarrGroupPath"), None)) - segmentToAgglomeratePath = groupVaultPath / zarrArrayName - zarrArray <- Zarr3Array.open(segmentToAgglomeratePath, - DataSourceId("zarr", "test"), - "layer", - None, - None, - None, - sharedChunkContentsCache) - } yield zarrArray - } - - def applyAgglomerate(request: DataServiceDataRequest)(data: Array[Byte])(implicit ec: ExecutionContext, - tc: TokenContext): Fox[Array[Byte]] = { - - val agglomerateFileKey = AgglomerateFileKey.fromDataRequest(request) - - def convertToAgglomerate(segmentIds: Array[Long], - relevantAgglomerateMap: Map[Long, Long], - bytesPerElement: Int, - putToBufferFunction: (ByteBuffer, Long) => ByteBuffer): Array[Byte] = { - val agglomerateIds = segmentIds.map(relevantAgglomerateMap) - agglomerateIds - .foldLeft(ByteBuffer.allocate(bytesPerElement * segmentIds.length).order(ByteOrder.LITTLE_ENDIAN))( - putToBufferFunction) - .array - } - - val bytesPerElement = ElementClass.bytesPerElement(request.dataLayer.elementClass) - val distinctSegmentIds = - bucketScanner.collectSegmentIds(data, bytesPerElement, isSigned = false, skipZeroes = false) - - for { - segmentToAgglomerate <- openZarrArrayCached(agglomerateFileKey, "segment_to_agglomerate") - beforeBuildMap = Instant.now - relevantAgglomerateMap: Map[Long, Long] <- Fox - .serialCombined(distinctSegmentIds) { segmentId => - mapSingleSegment(segmentToAgglomerate, segmentId).map((segmentId, _)) - } - .map(_.toMap) - _ = Instant.logSince(beforeBuildMap, "build map") - mappedBytes: Array[Byte] = convertData(data, request.dataLayer.elementClass) match { - case data: Array[Byte] => - val longBuffer = LongBuffer.allocate(data.length) - data.foreach(e => longBuffer.put(uByteToLong(e))) - convertToAgglomerate(longBuffer.array, relevantAgglomerateMap, bytesPerElement, putByte) - case data: Array[Short] => - val longBuffer = LongBuffer.allocate(data.length) - data.foreach(e => longBuffer.put(uShortToLong(e))) - convertToAgglomerate(longBuffer.array, relevantAgglomerateMap, bytesPerElement, putShort) - case data: Array[Int] => - val longBuffer = LongBuffer.allocate(data.length) - data.foreach(e => longBuffer.put(uIntToLong(e))) - convertToAgglomerate(longBuffer.array, relevantAgglomerateMap, bytesPerElement, putInt) - case data: Array[Long] => convertToAgglomerate(data, relevantAgglomerateMap, bytesPerElement, putLong) - case _ => data - } - } yield mappedBytes - } - - def generateSkeleton(organizationId: String, - datasetDirectoryName: String, - dataLayerName: String, - mappingName: String, - agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[SkeletonTracing] = - for { - before <- Instant.nowFox - agglomerateFileKey = AgglomerateFileKey(organizationId, datasetDirectoryName, dataLayerName, mappingName) - agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") - agglomerateToEdgesOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges_offsets") - - positionsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) - edgesRange: MultiArray <- agglomerateToEdgesOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) - nodeCount = positionsRange.getLong(1) - positionsRange.getLong(0) - edgeCount = edgesRange.getLong(1) - edgesRange.getLong(0) - edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges - _ <- Fox.fromBool(nodeCount <= edgeLimit) ?~> s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)" - _ <- Fox.fromBool(edgeCount <= edgeLimit) ?~> s"Agglomerate has too many edges ($edgeCount > $edgeLimit)" - agglomerateToPositions <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_positions") - positions <- agglomerateToPositions.readAsMultiArray(offset = Array(positionsRange.getLong(0), 0), - shape = Array(nodeCount.toInt, 3)) - agglomerateToEdges <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges") - edges: MultiArray <- agglomerateToEdges.readAsMultiArray(offset = Array(edgesRange.getLong(0), 0), - shape = Array(edgeCount.toInt, 2)) - nodeIdStartAtOneOffset = 1 - - // TODO use multiarray index iterators? - nodes = (0 until nodeCount.toInt).map { nodeIdx => - NodeDefaults.createInstance.copy( - id = nodeIdx + nodeIdStartAtOneOffset, - position = Vec3IntProto( - positions.getInt(positions.getIndex.set(Array(nodeIdx, 0))), - positions.getInt(positions.getIndex.set(Array(nodeIdx, 1))), - positions.getInt(positions.getIndex.set(Array(nodeIdx, 2))) - ) - ) - } - - skeletonEdges = (0 until edges.getShape()(0)).map { edgeIdx => - Edge( - source = edges.getInt(edges.getIndex.set(Array(edgeIdx, 0))) + nodeIdStartAtOneOffset, - target = edges.getInt(edges.getIndex.set(Array(edgeIdx, 1))) + nodeIdStartAtOneOffset - ) - } - - trees = Seq( - Tree( - treeId = math.abs(agglomerateId.toInt), // used only to deterministically select tree color - createdTimestamp = System.currentTimeMillis(), - // unsafeWrapArray is fine, because the underlying arrays are never mutated - nodes = nodes, - edges = skeletonEdges, - name = s"agglomerate $agglomerateId ($mappingName)", - `type` = Some(TreeTypeProto.AGGLOMERATE) - )) - - skeleton = SkeletonTracingDefaults.createInstance.copy( - datasetName = datasetDirectoryName, - trees = trees - ) - - _ = if (Instant.since(before) > (100 milliseconds)) { - Instant.logSince( - before, - s"Generating skeleton from agglomerate file with ${skeletonEdges.length} edges, ${nodes.length} nodes", - logger) - } - - } yield skeleton - - def largestAgglomerateId(agglomerateFileKey: AgglomerateFileKey)(implicit ec: ExecutionContext, - tc: TokenContext): Fox[Long] = - for { - array <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") - shape <- array.datasetShape.toFox ?~> "Could not determine array shape" - shapeFirstElement <- tryo(shape(0)).toFox - } yield shapeFirstElement - - def generateAgglomerateGraph(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long)( - implicit ec: ExecutionContext, - tc: TokenContext): Fox[AgglomerateGraph] = - for { - agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") - agglomerateToEdgesOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges_offsets") - - positionsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) - edgesRange: MultiArray <- agglomerateToEdgesOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) - nodeCount = positionsRange.getLong(1) - positionsRange.getLong(0) - edgeCount = edgesRange.getLong(1) - edgesRange.getLong(0) - edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges - _ <- Fox.fromBool(nodeCount <= edgeLimit) ?~> s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)" - _ <- Fox.fromBool(edgeCount <= edgeLimit) ?~> s"Agglomerate has too many edges ($edgeCount > $edgeLimit)" - agglomerateToPositions <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_positions") - positions: MultiArray <- agglomerateToPositions.readAsMultiArray(offset = Array(positionsRange.getLong(0), 0), - shape = Array(nodeCount.toInt, 3)) - agglomerateToSegments <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments") - segmentIdsMA: MultiArray <- agglomerateToSegments.readAsMultiArray(offset = positionsRange.getInt(0), - shape = nodeCount.toInt) - segmentIds: Array[Long] <- MultiArrayUtils.toLongArray(segmentIdsMA).toFox - agglomerateToEdges <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges") - edges: MultiArray <- agglomerateToEdges.readAsMultiArray(offset = Array(edgesRange.getLong(0), 0), - shape = Array(edgeCount.toInt, 2)) - agglomerateToAffinities <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_affinities") - affinities: MultiArray <- agglomerateToAffinities.readAsMultiArray(offset = edgesRange.getLong(0), - shape = edgeCount.toInt) - - agglomerateGraph = AgglomerateGraph( - // unsafeWrapArray is fine, because the underlying arrays are never mutated - segments = ArraySeq.unsafeWrapArray(segmentIds), - edges = (0 until edges.getShape()(0)).map { edgeIdx: Int => - AgglomerateEdge( - source = segmentIds(edges.getInt(edges.getIndex.set(Array(edgeIdx, 0)))), - target = segmentIds(edges.getInt(edges.getIndex.set(Array(edgeIdx, 1)))) - ) - }, - positions = (0 until nodeCount.toInt).map { nodeIdx: Int => - Vec3IntProto( - positions.getInt(positions.getIndex.set(Array(nodeIdx, 0))), - positions.getInt(positions.getIndex.set(Array(nodeIdx, 1))), - positions.getInt(positions.getIndex.set(Array(nodeIdx, 2))) - ) - }, - affinities = ArraySeq.unsafeWrapArray(affinities.getStorage.asInstanceOf[Array[Float]]) - ) - } yield agglomerateGraph - - def segmentIdsForAgglomerateId(agglomerateFileKey: AgglomerateFileKey, - agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = - for { - agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") - agglomerateToSegments <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments") - segmentRange <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) - segmentCount = segmentRange.getLong(1) - segmentRange.getLong(0) - segmentIds <- if (segmentCount == 0) - Fox.successful(Array.empty[Long]) - else - agglomerateToSegments - .readAsMultiArray(offset = segmentRange.getLong(0), shape = segmentCount.toInt) - .flatMap(MultiArrayUtils.toLongArray(_).toFox) - } yield segmentIds.toSeq - - def agglomerateIdsForSegmentIds(agglomerateFileKey: AgglomerateFileKey, segmentIds: Seq[Long])( - implicit ec: ExecutionContext, - tc: TokenContext): Fox[Seq[Long]] = - for { - segmentToAgglomerate <- openZarrArrayCached(agglomerateFileKey, "segment_to_agglomerate") - agglomerateIds <- Fox.serialCombined(segmentIds) { segmentId => - mapSingleSegment(segmentToAgglomerate, segmentId) - } - } yield agglomerateIds - - def positionForSegmentId(agglomerateFileKey: AgglomerateFileKey, segmentId: Long)(implicit ec: ExecutionContext, - tc: TokenContext): Fox[Vec3Int] = - for { - segmentToAgglomerate <- openZarrArrayCached(agglomerateFileKey, "segment_to_agglomerate") - agglomerateId <- mapSingleSegment(segmentToAgglomerate, segmentId) - agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") - segmentsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) - agglomerateToSegments <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments") - segmentIndex <- binarySearchForSegment(segmentsRange.getLong(0), - segmentsRange.getLong(1), - segmentId, - agglomerateToSegments) - agglomerateToPositions <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_positions") - position <- agglomerateToPositions.readAsMultiArray(offset = Array(segmentIndex, 0), shape = Array(3, 1)) - } yield Vec3Int(position.getInt(0), position.getInt(1), position.getInt(2)) - - private def binarySearchForSegment( - rangeStart: Long, - rangeEnd: Long, - segmentId: Long, - agglomerateToSegments: DatasetArray)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = - if (rangeStart > rangeEnd) Fox.failure("Could not find segmentId in agglomerate file") - else { - val middle = rangeStart + (rangeEnd - rangeStart) / 2 - for { - segmentIdAtMiddleMA <- agglomerateToSegments.readAsMultiArray(offset = middle, shape = 1) - segmentIdAdMiddleArray: Array[Long] <- MultiArrayUtils.toLongArray(segmentIdAtMiddleMA).toFox - segmentIdAtMiddle = segmentIdAdMiddleArray(0) - segmentIndex <- if (segmentIdAtMiddle == segmentId) - Fox.successful(middle) - else if (segmentIdAtMiddle < segmentId) { - binarySearchForSegment(middle + 1L, rangeEnd, segmentId, agglomerateToSegments) - } else binarySearchForSegment(rangeStart, middle - 1L, segmentId, agglomerateToSegments) - } yield segmentIndex - } -} - -class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConverter with LazyLogging { - // TODO -} - -class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateService: ZarrAgglomerateService) - extends DataConverter - with LazyLogging +class AgglomerateService @Inject()(config: DataStoreConfig, + zarrAgglomerateService: ZarrAgglomerateService, + hdf5AgglomerateService: Hdf5AgglomerateService) + extends LazyLogging with FoxImplicits { private val agglomerateDir = "agglomerates" private val agglomerateFileExtension = "hdf5" @@ -341,9 +38,6 @@ class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateServi // TODO remove private val useZarr = false - // TODO clear on reload - lazy val agglomerateFileCache = new AgglomerateFileCache(config.Datastore.Cache.AgglomerateFile.maxFileHandleEntries) - def exploreAgglomerates(organizationId: String, datasetDirectoryName: String, dataLayerName: String): Set[String] = { val layerDir = dataBaseDir.resolve(organizationId).resolve(datasetDirectoryName).resolve(dataLayerName) PathUtils @@ -379,343 +73,102 @@ class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateServi ) } - def applyAgglomerate(request: DataServiceDataRequest)(data: Array[Byte])(implicit ec: ExecutionContext, - tc: TokenContext): Fox[Array[Byte]] = - if (useZarr) { - zarrAgglomerateService.applyAgglomerate(request)(data) - } else applyAgglomerateHdf5(request)(data).toFox - - private def applyAgglomerateHdf5(request: DataServiceDataRequest)(data: Array[Byte]): Box[Array[Byte]] = tryo { - - val agglomerateFileKey = AgglomerateFileKey.fromDataRequest(request) - - def convertToAgglomerate(input: Array[Long], - bytesPerElement: Int, - bufferFunc: (ByteBuffer, Long) => ByteBuffer): Array[Byte] = { - - val cachedAgglomerateFile = agglomerateFileCache.withCache(agglomerateFileKey)(initHDFReader) - - val agglomerateIds = cachedAgglomerateFile.cache match { - case Left(agglomerateIdCache) => - input.map(el => - agglomerateIdCache.withCache(el, cachedAgglomerateFile.reader, cachedAgglomerateFile.dataset)(readHDF)) - case Right(boundingBoxCache) => - boundingBoxCache.withCache(request, input, cachedAgglomerateFile.reader)(readHDF) - } - cachedAgglomerateFile.finishAccess() - - agglomerateIds - .foldLeft(ByteBuffer.allocate(bytesPerElement * input.length).order(ByteOrder.LITTLE_ENDIAN))(bufferFunc) - .array - } - - val bytesPerElement = ElementClass.bytesPerElement(request.dataLayer.elementClass) - /* Every value of the segmentation data needs to be converted to Long to then look up the - agglomerate id in the segment-to-agglomerate array. - The value is first converted to the primitive signed number types, and then converted - to Long via uByteToLong, uShortToLong etc, which perform bitwise and to take care of - the unsigned semantics. Using functions avoids allocating intermediate SegmentInteger objects. - Allocating a fixed-length LongBuffer first is a further performance optimization. - */ - convertData(data, request.dataLayer.elementClass) match { - case data: Array[Byte] => - val longBuffer = LongBuffer.allocate(data.length) - data.foreach(e => longBuffer.put(uByteToLong(e))) - convertToAgglomerate(longBuffer.array, bytesPerElement, putByte) - case data: Array[Short] => - val longBuffer = LongBuffer.allocate(data.length) - data.foreach(e => longBuffer.put(uShortToLong(e))) - convertToAgglomerate(longBuffer.array, bytesPerElement, putShort) - case data: Array[Int] => - val longBuffer = LongBuffer.allocate(data.length) - data.foreach(e => longBuffer.put(uIntToLong(e))) - convertToAgglomerate(longBuffer.array, bytesPerElement, putInt) - case data: Array[Long] => convertToAgglomerate(data, bytesPerElement, putLong) - case _ => data + // TODO cache? + def lookUpAgglomerateFile(dataSourceId: DataSourceId, dataLayer: DataLayer, mappingName: String): LayerAttachment = { + val registeredAttachment: Option[LayerAttachment] = dataLayer.attachments match { + case Some(attachments) => attachments.agglomerates.find(_.name == mappingName) + case None => None } + registeredAttachment.getOrElse( + LayerAttachment( + mappingName, + new URI( + dataBaseDir + .resolve(dataSourceId.organizationId) + .resolve(dataSourceId.directoryName) + .resolve(dataLayer.name) + .resolve(agglomerateDir) + .toString), + LayerAttachmentDataformat.hdf5 + ) + ) } - // This uses a HDF5DataSet, which improves performance per call but doesn't permit parallel calls with the same dataset. - private def readHDF(reader: IHDF5Reader, hdf5Dataset: HDF5DataSet, segmentId: Long, blockSize: Long): Array[Long] = - // We don't need to differentiate between the data types because the underlying library does the conversion for us - reader.uint64().readArrayBlockWithOffset(hdf5Dataset, blockSize.toInt, segmentId) - - // This uses the datasetName, which allows us to call it on the same hdf file in parallel. - private def readHDF(reader: IHDF5Reader, segmentId: Long, blockSize: Long) = - reader.uint64().readArrayBlockWithOffset(datasetName, blockSize.toInt, segmentId) - - // An agglomerate file holds information about a specific mapping. wK translates the segment ids to agglomerate ids by looking at the HDF5 dataset "/segment_to_agglomerate". - // In this array, the agglomerate id is found by using the segment id as index. - // There are two ways of how we prevent a file lookup for every input element. When present, we use the cumsum.json to initialize a BoundingBoxCache (see comment there). - // Otherwise, we read configurable sized blocks from the agglomerate file and save them in a LRU cache. - private def initHDFReader(agglomerateFileKey: AgglomerateFileKey) = { - val hdfFile = - agglomerateFileKey.path(dataBaseDir, agglomerateDir, agglomerateFileExtension).toFile - - val cumsumPath = - dataBaseDir - .resolve(agglomerateFileKey.organizationId) - .resolve(agglomerateFileKey.datasetDirectoryName) - .resolve(agglomerateFileKey.layerName) - .resolve(agglomerateDir) - .resolve(cumsumFileName) - - val reader = HDF5FactoryProvider.get.openForReading(hdfFile) - - val agglomerateIdCache = new AgglomerateIdCache(config.Datastore.Cache.AgglomerateFile.maxSegmentIdEntries, - config.Datastore.Cache.AgglomerateFile.blockSize) - - val defaultCache: Either[AgglomerateIdCache, BoundingBoxCache] = - if (Files.exists(cumsumPath)) { - Right(CumsumParser.parse(cumsumPath.toFile, config.Datastore.Cache.AgglomerateFile.cumsumMaxReaderRange)) - } else { - Left(agglomerateIdCache) + def applyAgglomerate(request: DataServiceDataRequest)(data: Array[Byte])(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Array[Byte]] = + for { + mappingName <- request.settings.appliedAgglomerate.toFox + elementClass = request.dataLayer.elementClass + agglomerateFileAttachment = lookUpAgglomerateFile(request.dataSourceIdOrVolumeDummy, + request.dataLayer, + mappingName) + data <- agglomerateFileAttachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => + zarrAgglomerateService.applyAgglomerate(agglomerateFileAttachment, elementClass)(data) + case _ => hdf5AgglomerateService.applyAgglomerate(agglomerateFileAttachment, request)(data).toFox } + } yield data - CachedAgglomerateFile(reader, reader.`object`().openDataSet(datasetName), agglomerateIdCache, defaultCache) - } - - def generateSkeleton(organizationId: String, - datasetDirectoryName: String, - dataLayerName: String, - mappingName: String, + def generateSkeleton(agglomerateFileAttachment: LayerAttachment, agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[SkeletonTracing] = - if (useZarr) { - zarrAgglomerateService.generateSkeleton(organizationId, - datasetDirectoryName, - dataLayerName, - mappingName, - agglomerateId) - } else { - (try { - val before = Instant.now - val hdfFile = - dataBaseDir - .resolve(organizationId) - .resolve(datasetDirectoryName) - .resolve(dataLayerName) - .resolve(agglomerateDir) - .resolve(s"$mappingName.$agglomerateFileExtension") - .toFile - - val reader = HDF5FactoryProvider.get.openForReading(hdfFile) - val positionsRange: Array[Long] = - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId) - val edgesRange: Array[Long] = - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_edges_offsets", 2, agglomerateId) - - val nodeCount = positionsRange(1) - positionsRange(0) - val edgeCount = edgesRange(1) - edgesRange(0) - val edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges - if (nodeCount > edgeLimit) { - throw new Exception(s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)") - } - if (edgeCount > edgeLimit) { - throw new Exception(s"Agglomerate has too many edges ($edgeCount > $edgeLimit)") - } - val positions: Array[Array[Long]] = - if (nodeCount == 0L) { - Array.empty[Array[Long]] - } else { - reader - .uint64() - .readMatrixBlockWithOffset("/agglomerate_to_positions", nodeCount.toInt, 3, positionsRange(0), 0) - } - val edges: Array[Array[Long]] = { - if (edgeCount == 0L) { - Array.empty[Array[Long]] - } else { - reader.uint64().readMatrixBlockWithOffset("/agglomerate_to_edges", edgeCount.toInt, 2, edgesRange(0), 0) - } - } - - val nodeIdStartAtOneOffset = 1 - - val nodes = positions.zipWithIndex.map { - case (pos, idx) => - NodeDefaults.createInstance.copy( - id = idx + nodeIdStartAtOneOffset, - position = Vec3IntProto(pos(0).toInt, pos(1).toInt, pos(2).toInt) - ) - } - - val skeletonEdges = edges.map { e => - Edge(source = e(0).toInt + nodeIdStartAtOneOffset, target = e(1).toInt + nodeIdStartAtOneOffset) - } - - val trees = Seq( - Tree( - treeId = math.abs(agglomerateId.toInt), // used only to deterministically select tree color - createdTimestamp = System.currentTimeMillis(), - // unsafeWrapArray is fine, because the underlying arrays are never mutated - nodes = ArraySeq.unsafeWrapArray(nodes), - edges = ArraySeq.unsafeWrapArray(skeletonEdges), - name = s"agglomerate $agglomerateId ($mappingName)", - `type` = Some(TreeTypeProto.AGGLOMERATE) - )) - - val skeleton = SkeletonTracingDefaults.createInstance.copy( - datasetName = datasetDirectoryName, - trees = trees + for { + before <- Instant.nowFox + skeleton <- agglomerateFileAttachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => + zarrAgglomerateService.generateSkeleton(agglomerateFileAttachment, agglomerateId) + case _ => hdf5AgglomerateService.generateSkeleton(agglomerateFileAttachment, agglomerateId).toFox + } + _ = if (Instant.since(before) > (100 milliseconds)) { + Instant.logSince( + before, + s"Generating skeleton from agglomerate file with ${skeleton.trees.headOption + .map(_.edges.length) + .getOrElse(0)} edges, ${skeleton.trees.headOption.map(_.nodes.length).getOrElse(0)} nodes", + logger ) + } + } yield skeleton - if (Instant.since(before) > (100 milliseconds)) { - Instant.logSince( - before, - s"Generating skeleton from agglomerate file with ${skeletonEdges.length} edges, ${nodes.length} nodes", - logger) - } - - Full(skeleton) - } catch { - case e: Exception => Failure(e.getMessage) - }).toFox - } - - def largestAgglomerateId(agglomerateFileKey: AgglomerateFileKey)(implicit ec: ExecutionContext, - tc: TokenContext): Fox[Long] = - if (useZarr) zarrAgglomerateService.largestAgglomerateId(agglomerateFileKey) - else { - val hdfFile = agglomerateFileKey.path(dataBaseDir, agglomerateDir, agglomerateFileExtension).toFile - tryo { - val reader = HDF5FactoryProvider.get.openForReading(hdfFile) - reader.`object`().getNumberOfElements("/agglomerate_to_segments_offsets") - 1L - }.toFox + def largestAgglomerateId(agglomerateFileAttachment: LayerAttachment)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Long] = + agglomerateFileAttachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => zarrAgglomerateService.largestAgglomerateId(agglomerateFileAttachment) + case _ => hdf5AgglomerateService.largestAgglomerateId(agglomerateFileAttachment).toFox } - def segmentIdsForAgglomerateId(agglomerateFileKey: AgglomerateFileKey, + def segmentIdsForAgglomerateId(agglomerateFileAttachment: LayerAttachment, agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = - if (useZarr) - zarrAgglomerateService.segmentIdsForAgglomerateId(agglomerateFileKey, agglomerateId) - else { - val hdfFile = - dataBaseDir - .resolve(agglomerateFileKey.organizationId) - .resolve(agglomerateFileKey.datasetDirectoryName) - .resolve(agglomerateFileKey.layerName) - .resolve(agglomerateDir) - .resolve(s"${agglomerateFileKey.mappingName}.$agglomerateFileExtension") - .toFile - - tryo { - val reader = HDF5FactoryProvider.get.openForReading(hdfFile) - val positionsRange: Array[Long] = - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId) - - val segmentCount = positionsRange(1) - positionsRange(0) - val segmentIds: Array[Long] = - if (segmentCount == 0) Array.empty[Long] - else { - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments", segmentCount.toInt, positionsRange(0)) - } - segmentIds.toSeq - }.toFox + agglomerateFileAttachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => + zarrAgglomerateService.segmentIdsForAgglomerateId(agglomerateFileAttachment, agglomerateId) + case _ => hdf5AgglomerateService.segmentIdsForAgglomerateId(agglomerateFileAttachment, agglomerateId).toFox } - def agglomerateIdsForSegmentIds(agglomerateFileKey: AgglomerateFileKey, segmentIds: Seq[Long])( + def agglomerateIdsForSegmentIds(agglomerateFileAttachment: LayerAttachment, segmentIds: Seq[Long])( implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = - if (useZarr) { - zarrAgglomerateService.agglomerateIdsForSegmentIds(agglomerateFileKey, segmentIds) - } else { - val cachedAgglomerateFile = agglomerateFileCache.withCache(agglomerateFileKey)(initHDFReader) - tryo { - val agglomerateIds = segmentIds.map { segmentId: Long => - cachedAgglomerateFile.agglomerateIdCache.withCache(segmentId, - cachedAgglomerateFile.reader, - cachedAgglomerateFile.dataset)(readHDF) - } - cachedAgglomerateFile.finishAccess() - agglomerateIds - }.toFox - } - - def positionForSegmentId(agglomerateFileKey: AgglomerateFileKey, segmentId: Long)(implicit ec: ExecutionContext, - tc: TokenContext): Fox[Vec3Int] = - if (useZarr) zarrAgglomerateService.positionForSegmentId(agglomerateFileKey, segmentId) - else { - val hdfFile = agglomerateFileKey.path(dataBaseDir, agglomerateDir, agglomerateFileExtension).toFile - val reader: IHDF5Reader = HDF5FactoryProvider.get.openForReading(hdfFile) - (for { - agglomerateIdArr: Array[Long] <- tryo( - reader.uint64().readArrayBlockWithOffset("/segment_to_agglomerate", 1, segmentId)) - agglomerateId = agglomerateIdArr(0) - segmentsRange: Array[Long] <- tryo( - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId)) - segmentIndex <- binarySearchForSegment(segmentsRange(0), segmentsRange(1), segmentId, reader) - position <- tryo( - reader.uint64().readMatrixBlockWithOffset("/agglomerate_to_positions", 1, 3, segmentIndex, 0)(0)) - } yield Vec3Int(position(0).toInt, position(1).toInt, position(2).toInt)).toFox + agglomerateFileAttachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => + zarrAgglomerateService.agglomerateIdsForSegmentIds(agglomerateFileAttachment, segmentIds) + case _ => hdf5AgglomerateService.agglomerateIdsForSegmentIds(agglomerateFileAttachment, segmentIds).toFox } - @tailrec - private def binarySearchForSegment(rangeStart: Long, - rangeEnd: Long, - segmentId: Long, - reader: IHDF5Reader): Box[Long] = - if (rangeStart > rangeEnd) Failure("Could not find segmentId in agglomerate file") - else { - val middle = rangeStart + (rangeEnd - rangeStart) / 2 - val segmentIdAtMiddle: Long = reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments", 1, middle)(0) - if (segmentIdAtMiddle == segmentId) Full(middle) - else if (segmentIdAtMiddle < segmentId) binarySearchForSegment(middle + 1L, rangeEnd, segmentId, reader) - else binarySearchForSegment(rangeStart, middle - 1L, segmentId, reader) + def positionForSegmentId(agglomerateFileAttachment: LayerAttachment, + segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Vec3Int] = + agglomerateFileAttachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => + zarrAgglomerateService.positionForSegmentId(agglomerateFileAttachment, segmentId) + case _ => hdf5AgglomerateService.positionForSegmentId(agglomerateFileAttachment, segmentId).toFox } - def generateAgglomerateGraph(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long)( + def generateAgglomerateGraph(agglomerateFileAttachment: LayerAttachment, agglomerateId: Long)( implicit ec: ExecutionContext, tc: TokenContext): Fox[AgglomerateGraph] = - if (useZarr) - zarrAgglomerateService.generateAgglomerateGraph(agglomerateFileKey, agglomerateId) - else { - tryo { - val hdfFile = agglomerateFileKey.path(dataBaseDir, agglomerateDir, agglomerateFileExtension).toFile - - val reader = HDF5FactoryProvider.get.openForReading(hdfFile) - - val positionsRange: Array[Long] = - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId) - val edgesRange: Array[Long] = - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_edges_offsets", 2, agglomerateId) - - val nodeCount = positionsRange(1) - positionsRange(0) - val edgeCount = edgesRange(1) - edgesRange(0) - val edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges - if (nodeCount > edgeLimit) { - throw new Exception(s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)") - } - if (edgeCount > edgeLimit) { - throw new Exception(s"Agglomerate has too many edges ($edgeCount > $edgeLimit)") - } - val segmentIds: Array[Long] = - if (nodeCount == 0L) Array[Long]() - else - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments", nodeCount.toInt, positionsRange(0)) - val positions: Array[Array[Long]] = - if (nodeCount == 0L) Array[Array[Long]]() - else - reader - .uint64() - .readMatrixBlockWithOffset("/agglomerate_to_positions", nodeCount.toInt, 3, positionsRange(0), 0) - val edges: Array[Array[Long]] = - if (edgeCount == 0L) Array[Array[Long]]() - else - reader.uint64().readMatrixBlockWithOffset("/agglomerate_to_edges", edgeCount.toInt, 2, edgesRange(0), 0) - val affinities: Array[Float] = - if (edgeCount == 0L) Array[Float]() - else - reader.float32().readArrayBlockWithOffset("/agglomerate_to_affinities", edgeCount.toInt, edgesRange(0)) - - AgglomerateGraph( - // unsafeWrapArray is fine, because the underlying arrays are never mutated - segments = ArraySeq.unsafeWrapArray(segmentIds), - edges = ArraySeq.unsafeWrapArray( - edges.map(e => AgglomerateEdge(source = segmentIds(e(0).toInt), target = segmentIds(e(1).toInt)))), - positions = - ArraySeq.unsafeWrapArray(positions.map(pos => Vec3IntProto(pos(0).toInt, pos(1).toInt, pos(2).toInt))), - affinities = ArraySeq.unsafeWrapArray(affinities) - ) - }.toFox + agglomerateFileAttachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => + zarrAgglomerateService.generateAgglomerateGraph(agglomerateFileAttachment, agglomerateId) + case _ => + hdf5AgglomerateService.generateAgglomerateGraph(agglomerateFileAttachment, agglomerateId).toFox } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala new file mode 100644 index 00000000000..e9790815b21 --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala @@ -0,0 +1,302 @@ +package com.scalableminds.webknossos.datastore.services + +import ch.systemsx.cisd.hdf5.{HDF5DataSet, HDF5FactoryProvider, IHDF5Reader} +import com.scalableminds.util.geometry.Vec3Int +import com.scalableminds.webknossos.datastore.AgglomerateGraph.{AgglomerateEdge, AgglomerateGraph} +import com.scalableminds.webknossos.datastore.DataStoreConfig +import com.scalableminds.webknossos.datastore.SkeletonTracing.{Edge, SkeletonTracing, Tree, TreeTypeProto} +import com.scalableminds.webknossos.datastore.geometry.Vec3IntProto +import com.scalableminds.webknossos.datastore.helpers.{NodeDefaults, SkeletonTracingDefaults} +import com.scalableminds.webknossos.datastore.models.datasource.{ElementClass, LayerAttachment} +import com.scalableminds.webknossos.datastore.models.requests.DataServiceDataRequest +import com.scalableminds.webknossos.datastore.storage.{ + AgglomerateFileCache, + AgglomerateIdCache, + BoundingBoxCache, + CachedAgglomerateFile, + CumsumParser +} +import net.liftweb.common.{Box, Failure, Full} +import net.liftweb.common.Box.tryo + +import java.nio.{ByteBuffer, ByteOrder, LongBuffer} +import java.nio.file.{Files, Path} +import javax.inject.Inject +import scala.annotation.tailrec +import scala.collection.compat.immutable.ArraySeq + +class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConverter { + + // TODO clear on reload + lazy val agglomerateFileCache = new AgglomerateFileCache(config.Datastore.Cache.AgglomerateFile.maxFileHandleEntries) + + private def openHdf5(agglomerateFileAttachment: LayerAttachment): IHDF5Reader = { + if (agglomerateFileAttachment.path.getScheme.nonEmpty && agglomerateFileAttachment.path.getScheme != "file") { + throw new Exception( + "Trying to open non-local hdf5 agglomerate file. Hdf5 agglomerate files are only supported on the datastore-local file system") + } + HDF5FactoryProvider.get.openForReading(Path.of(agglomerateFileAttachment.path).toFile) + } + + def largestAgglomerateId(agglomerateFileAttachment: LayerAttachment): Box[Long] = + tryo { + val reader = openHdf5(agglomerateFileAttachment) + reader.`object`().getNumberOfElements("/agglomerate_to_segments_offsets") - 1L + } + + def applyAgglomerate(agglomerateFileAttachment: LayerAttachment, request: DataServiceDataRequest)( + data: Array[Byte]): Box[Array[Byte]] = tryo { + + def convertToAgglomerate(input: Array[Long], + bytesPerElement: Int, + bufferFunc: (ByteBuffer, Long) => ByteBuffer): Array[Byte] = { + + val cachedAgglomerateFile = agglomerateFileCache.withCache(agglomerateFileAttachment)(openAsCachedAgglomerateFile) + + val agglomerateIds = cachedAgglomerateFile.cache match { + case Left(agglomerateIdCache) => + input.map(el => + agglomerateIdCache.withCache(el, cachedAgglomerateFile.reader, cachedAgglomerateFile.dataset)(readHDF)) + case Right(boundingBoxCache) => + boundingBoxCache.withCache(request, input, cachedAgglomerateFile.reader)(readHDF) + } + cachedAgglomerateFile.finishAccess() + + agglomerateIds + .foldLeft(ByteBuffer.allocate(bytesPerElement * input.length).order(ByteOrder.LITTLE_ENDIAN))(bufferFunc) + .array + } + + val bytesPerElement = ElementClass.bytesPerElement(request.dataLayer.elementClass) + /* Every value of the segmentation data needs to be converted to Long to then look up the + agglomerate id in the segment-to-agglomerate array. + The value is first converted to the primitive signed number types, and then converted + to Long via uByteToLong, uShortToLong etc, which perform bitwise and to take care of + the unsigned semantics. Using functions avoids allocating intermediate SegmentInteger objects. + Allocating a fixed-length LongBuffer first is a further performance optimization. + */ + convertData(data, request.dataLayer.elementClass) match { + case data: Array[Byte] => + val longBuffer = LongBuffer.allocate(data.length) + data.foreach(e => longBuffer.put(uByteToLong(e))) + convertToAgglomerate(longBuffer.array, bytesPerElement, putByte) + case data: Array[Short] => + val longBuffer = LongBuffer.allocate(data.length) + data.foreach(e => longBuffer.put(uShortToLong(e))) + convertToAgglomerate(longBuffer.array, bytesPerElement, putShort) + case data: Array[Int] => + val longBuffer = LongBuffer.allocate(data.length) + data.foreach(e => longBuffer.put(uIntToLong(e))) + convertToAgglomerate(longBuffer.array, bytesPerElement, putInt) + case data: Array[Long] => convertToAgglomerate(data, bytesPerElement, putLong) + case _ => data + } + } + + def agglomerateIdsForSegmentIds(agglomerateFileAttachment: LayerAttachment, segmentIds: Seq[Long]): Box[Seq[Long]] = { + val cachedAgglomerateFile = agglomerateFileCache.withCache(agglomerateFileAttachment)(openAsCachedAgglomerateFile) + tryo { + val agglomerateIds = segmentIds.map { segmentId: Long => + cachedAgglomerateFile.agglomerateIdCache.withCache(segmentId, + cachedAgglomerateFile.reader, + cachedAgglomerateFile.dataset)(readHDF) + } + cachedAgglomerateFile.finishAccess() + agglomerateIds + } + } + + def generateSkeleton(agglomerateFileAttachment: LayerAttachment, agglomerateId: Long): Box[SkeletonTracing] = + try { + val reader = openHdf5(agglomerateFileAttachment) + val positionsRange: Array[Long] = + reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId) + val edgesRange: Array[Long] = + reader.uint64().readArrayBlockWithOffset("/agglomerate_to_edges_offsets", 2, agglomerateId) + + val nodeCount = positionsRange(1) - positionsRange(0) + val edgeCount = edgesRange(1) - edgesRange(0) + val edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges + if (nodeCount > edgeLimit) { + throw new Exception(s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)") + } + if (edgeCount > edgeLimit) { + throw new Exception(s"Agglomerate has too many edges ($edgeCount > $edgeLimit)") + } + val positions: Array[Array[Long]] = + if (nodeCount == 0L) { + Array.empty[Array[Long]] + } else { + reader + .uint64() + .readMatrixBlockWithOffset("/agglomerate_to_positions", nodeCount.toInt, 3, positionsRange(0), 0) + } + val edges: Array[Array[Long]] = { + if (edgeCount == 0L) { + Array.empty[Array[Long]] + } else { + reader.uint64().readMatrixBlockWithOffset("/agglomerate_to_edges", edgeCount.toInt, 2, edgesRange(0), 0) + } + } + + val nodeIdStartAtOneOffset = 1 + + val nodes = positions.zipWithIndex.map { + case (pos, idx) => + NodeDefaults.createInstance.copy( + id = idx + nodeIdStartAtOneOffset, + position = Vec3IntProto(pos(0).toInt, pos(1).toInt, pos(2).toInt) + ) + } + + val skeletonEdges = edges.map { e => + Edge(source = e(0).toInt + nodeIdStartAtOneOffset, target = e(1).toInt + nodeIdStartAtOneOffset) + } + + val trees = Seq( + Tree( + treeId = math.abs(agglomerateId.toInt), // used only to deterministically select tree color + createdTimestamp = System.currentTimeMillis(), + // unsafeWrapArray is fine, because the underlying arrays are never mutated + nodes = ArraySeq.unsafeWrapArray(nodes), + edges = ArraySeq.unsafeWrapArray(skeletonEdges), + name = s"agglomerate $agglomerateId (${agglomerateFileAttachment.name})", + `type` = Some(TreeTypeProto.AGGLOMERATE) + )) + + val skeleton = SkeletonTracingDefaults.createInstance.copy(trees = trees) + Full(skeleton) + } catch { + case e: Exception => Failure(e.getMessage) + } + + def generateAgglomerateGraph(agglomerateFileAttachment: LayerAttachment, agglomerateId: Long): Box[AgglomerateGraph] = + tryo { + val reader = openHdf5(agglomerateFileAttachment) + + val positionsRange: Array[Long] = + reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId) + val edgesRange: Array[Long] = + reader.uint64().readArrayBlockWithOffset("/agglomerate_to_edges_offsets", 2, agglomerateId) + + val nodeCount = positionsRange(1) - positionsRange(0) + val edgeCount = edgesRange(1) - edgesRange(0) + val edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges + if (nodeCount > edgeLimit) { + throw new Exception(s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)") + } + if (edgeCount > edgeLimit) { + throw new Exception(s"Agglomerate has too many edges ($edgeCount > $edgeLimit)") + } + val segmentIds: Array[Long] = + if (nodeCount == 0L) Array[Long]() + else + reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments", nodeCount.toInt, positionsRange(0)) + val positions: Array[Array[Long]] = + if (nodeCount == 0L) Array[Array[Long]]() + else + reader + .uint64() + .readMatrixBlockWithOffset("/agglomerate_to_positions", nodeCount.toInt, 3, positionsRange(0), 0) + val edges: Array[Array[Long]] = + if (edgeCount == 0L) Array[Array[Long]]() + else + reader.uint64().readMatrixBlockWithOffset("/agglomerate_to_edges", edgeCount.toInt, 2, edgesRange(0), 0) + val affinities: Array[Float] = + if (edgeCount == 0L) Array[Float]() + else + reader.float32().readArrayBlockWithOffset("/agglomerate_to_affinities", edgeCount.toInt, edgesRange(0)) + + AgglomerateGraph( + // unsafeWrapArray is fine, because the underlying arrays are never mutated + segments = ArraySeq.unsafeWrapArray(segmentIds), + edges = ArraySeq.unsafeWrapArray( + edges.map(e => AgglomerateEdge(source = segmentIds(e(0).toInt), target = segmentIds(e(1).toInt)))), + positions = + ArraySeq.unsafeWrapArray(positions.map(pos => Vec3IntProto(pos(0).toInt, pos(1).toInt, pos(2).toInt))), + affinities = ArraySeq.unsafeWrapArray(affinities) + ) + } + + def segmentIdsForAgglomerateId(agglomerateFileAttachment: LayerAttachment, agglomerateId: Long): Box[Seq[Long]] = + tryo { + val reader = openHdf5(agglomerateFileAttachment) + val positionsRange: Array[Long] = + reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId) + + val segmentCount = positionsRange(1) - positionsRange(0) + val segmentIds: Array[Long] = + if (segmentCount == 0) Array.empty[Long] + else { + reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments", segmentCount.toInt, positionsRange(0)) + } + segmentIds.toSeq + } + + def positionForSegmentId(agglomerateFileAttachment: LayerAttachment, segmentId: Long): Box[Vec3Int] = { + val reader: IHDF5Reader = openHdf5(agglomerateFileAttachment) + for { + agglomerateIdArr: Array[Long] <- tryo( + reader.uint64().readArrayBlockWithOffset("/segment_to_agglomerate", 1, segmentId)) + agglomerateId = agglomerateIdArr(0) + segmentsRange: Array[Long] <- tryo( + reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId)) + segmentIndex <- binarySearchForSegment(segmentsRange(0), segmentsRange(1), segmentId, reader) + position <- tryo(reader.uint64().readMatrixBlockWithOffset("/agglomerate_to_positions", 1, 3, segmentIndex, 0)(0)) + } yield Vec3Int(position(0).toInt, position(1).toInt, position(2).toInt) + } + + @tailrec + private def binarySearchForSegment(rangeStart: Long, + rangeEnd: Long, + segmentId: Long, + reader: IHDF5Reader): Box[Long] = + if (rangeStart > rangeEnd) Failure("Could not find segmentId in agglomerate file") + else { + val middle = rangeStart + (rangeEnd - rangeStart) / 2 + val segmentIdAtMiddle: Long = reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments", 1, middle)(0) + if (segmentIdAtMiddle == segmentId) Full(middle) + else if (segmentIdAtMiddle < segmentId) binarySearchForSegment(middle + 1L, rangeEnd, segmentId, reader) + else binarySearchForSegment(rangeStart, middle - 1L, segmentId, reader) + } + + // This uses a HDF5DataSet, which improves performance per call but doesn't permit parallel calls with the same dataset. + private def readHDF(reader: IHDF5Reader, hdf5Dataset: HDF5DataSet, segmentId: Long, blockSize: Long): Array[Long] = + // We don't need to differentiate between the data types because the underlying library does the conversion for us + reader.uint64().readArrayBlockWithOffset(hdf5Dataset, blockSize.toInt, segmentId) + + // This uses the datasetName, which allows us to call it on the same hdf file in parallel. + private def readHDF(reader: IHDF5Reader, datasetName: String, segmentId: Long, blockSize: Long) = + reader.uint64().readArrayBlockWithOffset(datasetName, blockSize.toInt, segmentId) + + // An agglomerate file holds information about a specific mapping. wK translates the segment ids to agglomerate ids by looking at the HDF5 dataset "/segment_to_agglomerate". + // In this array, the agglomerate id is found by using the segment id as index. + // There are two ways of how we prevent a file lookup for every input element. When present, we use the cumsum.json to initialize a BoundingBoxCache (see comment there). + // Otherwise, we read configurable sized blocks from the agglomerate file and save them in a LRU cache. + private def openAsCachedAgglomerateFile(agglomerateFileAttachment: LayerAttachment) = { + val hdfFile = + agglomerateFileKey.path(dataBaseDir, agglomerateDir, agglomerateFileExtension).toFile + + val cumsumPath = + dataBaseDir + .resolve(agglomerateFileKey.organizationId) + .resolve(agglomerateFileKey.datasetDirectoryName) + .resolve(agglomerateFileKey.layerName) + .resolve(agglomerateDir) + .resolve(cumsumFileName) + + val reader = HDF5FactoryProvider.get.openForReading(hdfFile) + + val agglomerateIdCache = new AgglomerateIdCache(config.Datastore.Cache.AgglomerateFile.maxSegmentIdEntries, + config.Datastore.Cache.AgglomerateFile.blockSize) + + val defaultCache: Either[AgglomerateIdCache, BoundingBoxCache] = + if (Files.exists(cumsumPath)) { + Right(CumsumParser.parse(cumsumPath.toFile, config.Datastore.Cache.AgglomerateFile.cumsumMaxReaderRange)) + } else { + Left(agglomerateIdCache) + } + + CachedAgglomerateFile(reader, reader.`object`().openDataSet(datasetName), agglomerateIdCache, defaultCache) + } +} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala index 108d6b4c9ce..f75b574fcc8 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala @@ -189,7 +189,8 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, datasetDirectoryName: String, dataLayerName: String, mappingName: Option[String])(segmentOrAgglomerateId: Long, mag: Vec3Int)( - implicit tc: TokenContext): Fox[Set[Vec3IntProto]] = + implicit tc: TokenContext, + m: MessagesProvider): Fox[Set[Vec3IntProto]] = for { segmentIds <- getSegmentIdsForAgglomerateIdIfNeeded(organizationId, datasetDirectoryName, @@ -217,22 +218,20 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, datasetDirectoryName: String, dataLayerName: String, segmentOrAgglomerateId: Long, - mappingNameOpt: Option[String])(implicit tc: TokenContext): Fox[Seq[Long]] = + mappingNameOpt: Option[String])(implicit tc: TokenContext, m: MessagesProvider): Fox[Seq[Long]] = // Editable mappings cannot happen here since those requests go to the tracingstore mappingNameOpt match { case Some(mappingName) => for { + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - agglomerateFileKey = AgglomerateFileKey( - organizationId, - datasetDirectoryName, - dataLayerName, - mappingName - ) - largestAgglomerateId <- agglomerateService.largestAgglomerateId(agglomerateFileKey) + agglomerateFileAttachment = agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + largestAgglomerateId <- agglomerateService.largestAgglomerateId(agglomerateFileAttachment) segmentIds <- if (segmentOrAgglomerateId <= largestAgglomerateId) { agglomerateService.segmentIdsForAgglomerateId( - agglomerateFileKey, + agglomerateFileAttachment, segmentOrAgglomerateId ) } else diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala new file mode 100644 index 00000000000..87b5011a9c8 --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala @@ -0,0 +1,292 @@ +package com.scalableminds.webknossos.datastore.services + +import com.scalableminds.util.accesscontext.TokenContext +import com.scalableminds.util.cache.AlfuCache +import com.scalableminds.util.geometry.Vec3Int +import com.scalableminds.util.time.Instant +import com.scalableminds.util.tools.Fox +import com.scalableminds.webknossos.datastore.AgglomerateGraph.{AgglomerateEdge, AgglomerateGraph} +import com.scalableminds.webknossos.datastore.DataStoreConfig +import com.scalableminds.webknossos.datastore.SkeletonTracing.{Edge, SkeletonTracing, Tree, TreeTypeProto} +import com.scalableminds.webknossos.datastore.datareaders.{DatasetArray, MultiArrayUtils} +import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array +import com.scalableminds.webknossos.datastore.geometry.Vec3IntProto +import com.scalableminds.webknossos.datastore.helpers.{NativeBucketScanner, NodeDefaults, SkeletonTracingDefaults} +import com.scalableminds.webknossos.datastore.models.datasource.{DataSourceId, ElementClass, LayerAttachment} +import com.scalableminds.webknossos.datastore.storage.{DataVaultService, RemoteSourceDescriptor} +import com.typesafe.scalalogging.LazyLogging +import net.liftweb.common.Box.tryo +import net.liftweb.common.{Box, Full} +import ucar.ma2.{Array => MultiArray} + +import java.nio.{ByteBuffer, ByteOrder, LongBuffer} +import javax.inject.Inject +import scala.collection.compat.immutable.ArraySeq +import scala.concurrent.ExecutionContext + +class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService: DataVaultService) + extends DataConverter + with LazyLogging { + + private lazy val openArraysCache = AlfuCache[(LayerAttachment, String), DatasetArray]() + + // TODO unify with existing chunkContentsCache from binaryDataService? + private lazy val sharedChunkContentsCache: AlfuCache[String, MultiArray] = { + // Used by DatasetArray-based datasets. Measure item weight in kilobytes because the weigher can only return int, not long + + val maxSizeKiloBytes = Math.floor(config.Datastore.Cache.ImageArrayChunks.maxSizeBytes.toDouble / 1000.0).toInt + + def cacheWeight(key: String, arrayBox: Box[MultiArray]): Int = + arrayBox match { + case Full(array) => + (array.getSizeBytes / 1000L).toInt + case _ => 0 + } + + AlfuCache(maxSizeKiloBytes, weighFn = Some(cacheWeight)) + } + + protected lazy val bucketScanner = new NativeBucketScanner() + + private def mapSingleSegment(segmentToAgglomerate: DatasetArray, segmentId: Long)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Long] = + for { + asMultiArray <- segmentToAgglomerate.readAsMultiArray(offset = segmentId, shape = 1) + } yield asMultiArray.getLong(0) + + private def openZarrArrayCached(agglomerateFileAttachment: LayerAttachment, + zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext) = + openArraysCache.getOrLoad((agglomerateFileAttachment, zarrArrayName), + _ => openZarrArray(agglomerateFileAttachment, zarrArrayName)) + + private def openZarrArray(agglomerateFileAttachment: LayerAttachment, + zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext): Fox[DatasetArray] = + for { + groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(agglomerateFileAttachment.path, None)) + segmentToAgglomeratePath = groupVaultPath / zarrArrayName + zarrArray <- Zarr3Array.open(segmentToAgglomeratePath, + DataSourceId("zarr", "test"), + "layer", + None, + None, + None, + sharedChunkContentsCache) + } yield zarrArray + + def applyAgglomerate(agglomerateFileAttachment: LayerAttachment, elementClass: ElementClass.Value)( + data: Array[Byte])(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Byte]] = { + + def convertToAgglomerate(segmentIds: Array[Long], + relevantAgglomerateMap: Map[Long, Long], + bytesPerElement: Int, + putToBufferFunction: (ByteBuffer, Long) => ByteBuffer): Array[Byte] = { + val agglomerateIds = segmentIds.map(relevantAgglomerateMap) + agglomerateIds + .foldLeft(ByteBuffer.allocate(bytesPerElement * segmentIds.length).order(ByteOrder.LITTLE_ENDIAN))( + putToBufferFunction) + .array + } + + val bytesPerElement = ElementClass.bytesPerElement(elementClass) + val distinctSegmentIds = + bucketScanner.collectSegmentIds(data, bytesPerElement, isSigned = false, skipZeroes = false) + + for { + segmentToAgglomerate <- openZarrArrayCached(agglomerateFileAttachment, "segment_to_agglomerate") + beforeBuildMap = Instant.now + relevantAgglomerateMap: Map[Long, Long] <- Fox + .serialCombined(distinctSegmentIds) { segmentId => + mapSingleSegment(segmentToAgglomerate, segmentId).map((segmentId, _)) + } + .map(_.toMap) + _ = Instant.logSince(beforeBuildMap, "build map") + mappedBytes: Array[Byte] = convertData(data, elementClass) match { + case data: Array[Byte] => + val longBuffer = LongBuffer.allocate(data.length) + data.foreach(e => longBuffer.put(uByteToLong(e))) + convertToAgglomerate(longBuffer.array, relevantAgglomerateMap, bytesPerElement, putByte) + case data: Array[Short] => + val longBuffer = LongBuffer.allocate(data.length) + data.foreach(e => longBuffer.put(uShortToLong(e))) + convertToAgglomerate(longBuffer.array, relevantAgglomerateMap, bytesPerElement, putShort) + case data: Array[Int] => + val longBuffer = LongBuffer.allocate(data.length) + data.foreach(e => longBuffer.put(uIntToLong(e))) + convertToAgglomerate(longBuffer.array, relevantAgglomerateMap, bytesPerElement, putInt) + case data: Array[Long] => convertToAgglomerate(data, relevantAgglomerateMap, bytesPerElement, putLong) + case _ => data + } + } yield mappedBytes + } + + def generateSkeleton(agglomerateFileAttachment: LayerAttachment, + agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[SkeletonTracing] = + for { + agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_segments_offsets") + agglomerateToEdgesOffsets <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_edges_offsets") + + positionsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) + edgesRange: MultiArray <- agglomerateToEdgesOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) + nodeCount = positionsRange.getLong(1) - positionsRange.getLong(0) + edgeCount = edgesRange.getLong(1) - edgesRange.getLong(0) + edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges + _ <- Fox.fromBool(nodeCount <= edgeLimit) ?~> s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)" + _ <- Fox.fromBool(edgeCount <= edgeLimit) ?~> s"Agglomerate has too many edges ($edgeCount > $edgeLimit)" + agglomerateToPositions <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_positions") + positions <- agglomerateToPositions.readAsMultiArray(offset = Array(positionsRange.getLong(0), 0), + shape = Array(nodeCount.toInt, 3)) + agglomerateToEdges <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_edges") + edges: MultiArray <- agglomerateToEdges.readAsMultiArray(offset = Array(edgesRange.getLong(0), 0), + shape = Array(edgeCount.toInt, 2)) + nodeIdStartAtOneOffset = 1 + + // TODO use multiarray index iterators? + nodes = (0 until nodeCount.toInt).map { nodeIdx => + NodeDefaults.createInstance.copy( + id = nodeIdx + nodeIdStartAtOneOffset, + position = Vec3IntProto( + positions.getInt(positions.getIndex.set(Array(nodeIdx, 0))), + positions.getInt(positions.getIndex.set(Array(nodeIdx, 1))), + positions.getInt(positions.getIndex.set(Array(nodeIdx, 2))) + ) + ) + } + + skeletonEdges = (0 until edges.getShape()(0)).map { edgeIdx => + Edge( + source = edges.getInt(edges.getIndex.set(Array(edgeIdx, 0))) + nodeIdStartAtOneOffset, + target = edges.getInt(edges.getIndex.set(Array(edgeIdx, 1))) + nodeIdStartAtOneOffset + ) + } + + trees = Seq( + Tree( + treeId = math.abs(agglomerateId.toInt), // used only to deterministically select tree color + createdTimestamp = System.currentTimeMillis(), + // unsafeWrapArray is fine, because the underlying arrays are never mutated + nodes = nodes, + edges = skeletonEdges, + name = s"agglomerate $agglomerateId (${agglomerateFileAttachment.name})", + `type` = Some(TreeTypeProto.AGGLOMERATE) + )) + + skeleton = SkeletonTracingDefaults.createInstance.copy(trees = trees) + } yield skeleton + + def largestAgglomerateId(agglomerateFileAttachment: LayerAttachment)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Long] = + for { + array <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_segments_offsets") + shape <- array.datasetShape.toFox ?~> "Could not determine array shape" + shapeFirstElement <- tryo(shape(0)).toFox + } yield shapeFirstElement + + def generateAgglomerateGraph(agglomerateFileAttachment: LayerAttachment, agglomerateId: Long)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[AgglomerateGraph] = + for { + agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_segments_offsets") + agglomerateToEdgesOffsets <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_edges_offsets") + + positionsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) + edgesRange: MultiArray <- agglomerateToEdgesOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) + nodeCount = positionsRange.getLong(1) - positionsRange.getLong(0) + edgeCount = edgesRange.getLong(1) - edgesRange.getLong(0) + edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges + _ <- Fox.fromBool(nodeCount <= edgeLimit) ?~> s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)" + _ <- Fox.fromBool(edgeCount <= edgeLimit) ?~> s"Agglomerate has too many edges ($edgeCount > $edgeLimit)" + agglomerateToPositions <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_positions") + positions: MultiArray <- agglomerateToPositions.readAsMultiArray(offset = Array(positionsRange.getLong(0), 0), + shape = Array(nodeCount.toInt, 3)) + agglomerateToSegments <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_segments") + segmentIdsMA: MultiArray <- agglomerateToSegments.readAsMultiArray(offset = positionsRange.getInt(0), + shape = nodeCount.toInt) + segmentIds: Array[Long] <- MultiArrayUtils.toLongArray(segmentIdsMA).toFox + agglomerateToEdges <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_edges") + edges: MultiArray <- agglomerateToEdges.readAsMultiArray(offset = Array(edgesRange.getLong(0), 0), + shape = Array(edgeCount.toInt, 2)) + agglomerateToAffinities <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_affinities") + affinities: MultiArray <- agglomerateToAffinities.readAsMultiArray(offset = edgesRange.getLong(0), + shape = edgeCount.toInt) + + agglomerateGraph = AgglomerateGraph( + // unsafeWrapArray is fine, because the underlying arrays are never mutated + segments = ArraySeq.unsafeWrapArray(segmentIds), + edges = (0 until edges.getShape()(0)).map { edgeIdx: Int => + AgglomerateEdge( + source = segmentIds(edges.getInt(edges.getIndex.set(Array(edgeIdx, 0)))), + target = segmentIds(edges.getInt(edges.getIndex.set(Array(edgeIdx, 1)))) + ) + }, + positions = (0 until nodeCount.toInt).map { nodeIdx: Int => + Vec3IntProto( + positions.getInt(positions.getIndex.set(Array(nodeIdx, 0))), + positions.getInt(positions.getIndex.set(Array(nodeIdx, 1))), + positions.getInt(positions.getIndex.set(Array(nodeIdx, 2))) + ) + }, + affinities = ArraySeq.unsafeWrapArray(affinities.getStorage.asInstanceOf[Array[Float]]) + ) + } yield agglomerateGraph + + def segmentIdsForAgglomerateId(agglomerateFileAttachment: LayerAttachment, + agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = + for { + agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_segments_offsets") + agglomerateToSegments <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_segments") + segmentRange <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) + segmentCount = segmentRange.getLong(1) - segmentRange.getLong(0) + segmentIds <- if (segmentCount == 0) + Fox.successful(Array.empty[Long]) + else + agglomerateToSegments + .readAsMultiArray(offset = segmentRange.getLong(0), shape = segmentCount.toInt) + .flatMap(MultiArrayUtils.toLongArray(_).toFox) + } yield segmentIds.toSeq + + def agglomerateIdsForSegmentIds(agglomerateFileAttachment: LayerAttachment, segmentIds: Seq[Long])( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[Seq[Long]] = + for { + segmentToAgglomerate <- openZarrArrayCached(agglomerateFileAttachment, "segment_to_agglomerate") + agglomerateIds <- Fox.serialCombined(segmentIds) { segmentId => + mapSingleSegment(segmentToAgglomerate, segmentId) + } + } yield agglomerateIds + + def positionForSegmentId(agglomerateFileAttachment: LayerAttachment, + segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Vec3Int] = + for { + segmentToAgglomerate <- openZarrArrayCached(agglomerateFileAttachment, "segment_to_agglomerate") + agglomerateId <- mapSingleSegment(segmentToAgglomerate, segmentId) + agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_segments_offsets") + segmentsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) + agglomerateToSegments <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_segments") + segmentIndex <- binarySearchForSegment(segmentsRange.getLong(0), + segmentsRange.getLong(1), + segmentId, + agglomerateToSegments) + agglomerateToPositions <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_positions") + position <- agglomerateToPositions.readAsMultiArray(offset = Array(segmentIndex, 0), shape = Array(3, 1)) + } yield Vec3Int(position.getInt(0), position.getInt(1), position.getInt(2)) + + private def binarySearchForSegment( + rangeStart: Long, + rangeEnd: Long, + segmentId: Long, + agglomerateToSegments: DatasetArray)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = + if (rangeStart > rangeEnd) Fox.failure("Could not find segmentId in agglomerate file") + else { + val middle = rangeStart + (rangeEnd - rangeStart) / 2 + for { + segmentIdAtMiddleMA <- agglomerateToSegments.readAsMultiArray(offset = middle, shape = 1) + segmentIdAdMiddleArray: Array[Long] <- MultiArrayUtils.toLongArray(segmentIdAtMiddleMA).toFox + segmentIdAtMiddle = segmentIdAdMiddleArray(0) + segmentIndex <- if (segmentIdAtMiddle == segmentId) + Fox.successful(middle) + else if (segmentIdAtMiddle < segmentId) { + binarySearchForSegment(middle + 1L, rangeEnd, segmentId, agglomerateToSegments) + } else binarySearchForSegment(rangeStart, middle - 1L, segmentId, agglomerateToSegments) + } yield segmentIndex + } +} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/AgglomerateFileCache.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/AgglomerateFileCache.scala index 9a9e2106ba4..98817d4da7e 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/AgglomerateFileCache.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/AgglomerateFileCache.scala @@ -1,11 +1,10 @@ package com.scalableminds.webknossos.datastore.storage -import java.nio.file.Path import java.util - import ch.systemsx.cisd.hdf5.{HDF5DataSet, IHDF5Reader} import com.scalableminds.util.cache.LRUConcurrentCache import com.scalableminds.webknossos.datastore.dataformats.SafeCachable +import com.scalableminds.webknossos.datastore.models.datasource.LayerAttachment import com.scalableminds.webknossos.datastore.models.requests.{Cuboid, DataServiceDataRequest} import com.typesafe.scalalogging.LazyLogging @@ -19,45 +18,12 @@ case class CachedAgglomerateFile(reader: IHDF5Reader, override protected def onFinalize(): Unit = { dataset.close(); reader.close() } } -case class AgglomerateFileKey( - organizationId: String, - datasetDirectoryName: String, - layerName: String, - mappingName: String -) { - def path(dataBaseDir: Path, agglomerateDir: String, agglomerateFileExtension: String): Path = - dataBaseDir - .resolve(organizationId) - .resolve(datasetDirectoryName) - .resolve(layerName) - .resolve(agglomerateDir) - .resolve(s"$mappingName.$agglomerateFileExtension") - - def zarrGroupPath(dataBaseDir: Path, agglomerateDir: String): Path = - dataBaseDir - .resolve(organizationId) - .resolve(datasetDirectoryName) - .resolve(layerName) - .resolve(agglomerateDir) - .resolve(mappingName) -} - -object AgglomerateFileKey { - def fromDataRequest(dataRequest: DataServiceDataRequest): AgglomerateFileKey = - AgglomerateFileKey( - dataRequest.dataSourceIdOrVolumeDummy.organizationId, - dataRequest.dataSourceIdOrVolumeDummy.directoryName, - dataRequest.dataLayer.name, - dataRequest.settings.appliedAgglomerate.get - ) -} - -class AgglomerateFileCache(val maxEntries: Int) extends LRUConcurrentCache[AgglomerateFileKey, CachedAgglomerateFile] { - override def onElementRemoval(key: AgglomerateFileKey, value: CachedAgglomerateFile): Unit = +class AgglomerateFileCache(val maxEntries: Int) extends LRUConcurrentCache[LayerAttachment, CachedAgglomerateFile] { + override def onElementRemoval(key: LayerAttachment, value: CachedAgglomerateFile): Unit = value.scheduleForRemoval() - def withCache(agglomerateFileKey: AgglomerateFileKey)( - loadFn: AgglomerateFileKey => CachedAgglomerateFile): CachedAgglomerateFile = { + def withCache(agglomerateFileKey: LayerAttachment)( + loadFn: LayerAttachment => CachedAgglomerateFile): CachedAgglomerateFile = { def handleUncachedAgglomerateFile() = { val agglomerateFile = loadFn(agglomerateFileKey) From 19641af7243452b5a010b054b0b07a12edfab254 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 3 Jun 2025 13:46:42 +0200 Subject: [PATCH 023/100] pass datasource id + layer --- .../controllers/DSMeshController.scala | 9 ++++-- .../controllers/DataSourceController.scala | 27 ++++++++++------- .../services/AgglomerateService.scala | 9 +++--- .../services/BinaryDataService.scala | 11 +++---- .../services/Hdf5AgglomerateService.scala | 27 +++++++++-------- .../services/SegmentIndexFileService.scala | 2 +- .../services/mesh/DSFullMeshService.scala | 8 +++-- .../services/mesh/MeshMappingHelper.scala | 29 +++++-------------- .../storage/AgglomerateFileCache.scala | 8 ++--- 9 files changed, 63 insertions(+), 67 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala index 9d838421d4e..e526c626d12 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala @@ -23,6 +23,7 @@ class DSMeshController @Inject()( meshFileService: MeshFileService, neuroglancerPrecomputedMeshService: NeuroglancerPrecomputedMeshFileService, fullMeshService: DSFullMeshService, + dataSourceRepository: DataSourceRepository, val dsRemoteWebknossosClient: DSRemoteWebknossosClient, val dsRemoteTracingstoreClient: DSRemoteTracingstoreClient, val binaryDataServiceHolder: BinaryDataServiceHolder @@ -66,10 +67,12 @@ class DSMeshController @Inject()( datasetDirectoryName, dataLayerName, request.body.meshFile.name) + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) segmentIds: Seq[Long] <- segmentIdsForAgglomerateIdIfNeeded( - organizationId, - datasetDirectoryName, - dataLayerName, + dataSource.id, + dataLayer, targetMappingName, editableMappingTracingId, request.body.segmentId, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index e1f37242eb3..e64dfc6c0aa 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -21,7 +21,7 @@ import com.scalableminds.webknossos.datastore.models.datasource.{DataLayer, Data import com.scalableminds.webknossos.datastore.services._ import com.scalableminds.webknossos.datastore.services.mesh.{MeshFileService, MeshMappingHelper} import com.scalableminds.webknossos.datastore.services.uploading._ -import com.scalableminds.webknossos.datastore.storage.{AgglomerateFileKey, DataVaultService} +import com.scalableminds.webknossos.datastore.storage.DataVaultService import net.liftweb.common.Box.tryo import net.liftweb.common.{Box, Empty, Failure, Full} import play.api.data.Form @@ -299,9 +299,12 @@ class DataSourceController @Inject()( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - agglomerateGraph <- agglomerateService.generateAgglomerateGraph( - AgglomerateFileKey(organizationId, datasetDirectoryName, dataLayerName, mappingName), - agglomerateId) ?~> "agglomerateGraph.failed" + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) + agglomerateFileAttachment = agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + agglomerateGraph <- agglomerateService + .generateAgglomerateGraph(agglomerateFileAttachment, agglomerateId) ?~> "agglomerateGraph.failed" } yield Ok(agglomerateGraph.toByteArray).as(protobufMimeType) } } @@ -608,10 +611,12 @@ class DataSourceController @Inject()( accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) segmentIds <- segmentIdsForAgglomerateIdIfNeeded( - organizationId, - datasetDirectoryName, - dataLayerName, + dataSource.id, + dataLayer, request.body.mappingName, request.body.editableMappingTracingId, segmentId.toLong, @@ -645,12 +650,14 @@ class DataSourceController @Inject()( accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) segmentIdsAndBucketPositions <- Fox.serialCombined(request.body.segmentIds) { segmentOrAgglomerateId => for { segmentIds <- segmentIdsForAgglomerateIdIfNeeded( - organizationId, - datasetDirectoryName, - dataLayerName, + dataSource.id, + dataLayer, request.body.mappingName, request.body.editableMappingTracingId, segmentOrAgglomerateId, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index e625078bf1d..141d0c0ddcf 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -31,12 +31,7 @@ class AgglomerateService @Inject()(config: DataStoreConfig, with FoxImplicits { private val agglomerateDir = "agglomerates" private val agglomerateFileExtension = "hdf5" - private val datasetName = "/segment_to_agglomerate" private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) - private val cumsumFileName = "cumsum.json" - - // TODO remove - private val useZarr = false def exploreAgglomerates(organizationId: String, datasetDirectoryName: String, dataLayerName: String): Set[String] = { val layerDir = dataBaseDir.resolve(organizationId).resolve(datasetDirectoryName).resolve(dataLayerName) @@ -73,6 +68,10 @@ class AgglomerateService @Inject()(config: DataStoreConfig, ) } + def clearCaches(hdf5Predicate: LayerAttachment => Boolean): Int = + // TODO also clear zarr caches + hdf5AgglomerateService.agglomerateFileCache.clear(hdf5Predicate) + // TODO cache? def lookUpAgglomerateFile(dataSourceId: DataSourceId, dataLayer: DataLayer, mappingName: String): LayerAttachment = { val registeredAttachment: Option[LayerAttachment] = dataLayer.attachments match { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala index 58d4dd006e1..00b9db56e1a 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala @@ -7,7 +7,7 @@ import com.scalableminds.util.geometry.Vec3Int import com.scalableminds.util.tools.ExtendedTypes.ExtendedArraySeq import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.models.BucketPosition -import com.scalableminds.webknossos.datastore.models.datasource.{Category, DataLayer, DataSourceId} +import com.scalableminds.webknossos.datastore.models.datasource.{Category, DataLayer, DataSourceId, LayerAttachment} import com.scalableminds.webknossos.datastore.models.requests.{DataReadInstruction, DataServiceDataRequest} import com.scalableminds.webknossos.datastore.storage._ import com.typesafe.scalalogging.LazyLogging @@ -258,16 +258,17 @@ class BinaryDataService(val dataBaseDir: Path, def clearCache(organizationId: String, datasetDirectoryName: String, layerName: Option[String]): (Int, Int, Int) = { val dataSourceId = DataSourceId(datasetDirectoryName, organizationId) + val localDatasourcePath = dataBaseDir.resolve(organizationId).resolve(datasetDirectoryName) + val localAgglomeratePathPrefix = layerName.map(localDatasourcePath.resolve).getOrElse(localDatasourcePath) - def agglomerateFileMatchPredicate(agglomerateKey: AgglomerateFileKey) = - agglomerateKey.datasetDirectoryName == datasetDirectoryName && agglomerateKey.organizationId == organizationId && layerName - .forall(_ == agglomerateKey.layerName) + def agglomerateFileMatchPredicate(agglomerateFileAttachment: LayerAttachment) = + agglomerateFileAttachment.path.toString.startsWith(localAgglomeratePathPrefix.toString) def bucketProviderPredicate(key: (DataSourceId, String)): Boolean = key._1 == DataSourceId(datasetDirectoryName, organizationId) && layerName.forall(_ == key._2) val closedAgglomerateFileHandleCount = - agglomerateServiceOpt.map(_.agglomerateFileCache.clear(agglomerateFileMatchPredicate)).getOrElse(0) + agglomerateServiceOpt.map(_.clearCaches(agglomerateFileMatchPredicate)).getOrElse(0) val clearedBucketProviderCount = bucketProviderCache.clear(bucketProviderPredicate) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala index e9790815b21..b2b2704104a 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala @@ -27,6 +27,10 @@ import scala.collection.compat.immutable.ArraySeq class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConverter { + private val cumsumFileName = "cumsum.json" + // TODO other keys, also in zarr case + private val keySegmentToAgglomerate = "/segment_to_agglomerate" + // TODO clear on reload lazy val agglomerateFileCache = new AgglomerateFileCache(config.Datastore.Cache.AgglomerateFile.maxFileHandleEntries) @@ -237,7 +241,7 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv val reader: IHDF5Reader = openHdf5(agglomerateFileAttachment) for { agglomerateIdArr: Array[Long] <- tryo( - reader.uint64().readArrayBlockWithOffset("/segment_to_agglomerate", 1, segmentId)) + reader.uint64().readArrayBlockWithOffset(keySegmentToAgglomerate, 1, segmentId)) agglomerateId = agglomerateIdArr(0) segmentsRange: Array[Long] <- tryo( reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId)) @@ -266,26 +270,18 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv reader.uint64().readArrayBlockWithOffset(hdf5Dataset, blockSize.toInt, segmentId) // This uses the datasetName, which allows us to call it on the same hdf file in parallel. - private def readHDF(reader: IHDF5Reader, datasetName: String, segmentId: Long, blockSize: Long) = - reader.uint64().readArrayBlockWithOffset(datasetName, blockSize.toInt, segmentId) + private def readHDF(reader: IHDF5Reader, segmentId: Long, blockSize: Long) = + reader.uint64().readArrayBlockWithOffset(keySegmentToAgglomerate, blockSize.toInt, segmentId) // An agglomerate file holds information about a specific mapping. wK translates the segment ids to agglomerate ids by looking at the HDF5 dataset "/segment_to_agglomerate". // In this array, the agglomerate id is found by using the segment id as index. // There are two ways of how we prevent a file lookup for every input element. When present, we use the cumsum.json to initialize a BoundingBoxCache (see comment there). // Otherwise, we read configurable sized blocks from the agglomerate file and save them in a LRU cache. private def openAsCachedAgglomerateFile(agglomerateFileAttachment: LayerAttachment) = { - val hdfFile = - agglomerateFileKey.path(dataBaseDir, agglomerateDir, agglomerateFileExtension).toFile - val cumsumPath = - dataBaseDir - .resolve(agglomerateFileKey.organizationId) - .resolve(agglomerateFileKey.datasetDirectoryName) - .resolve(agglomerateFileKey.layerName) - .resolve(agglomerateDir) - .resolve(cumsumFileName) + Path.of(agglomerateFileAttachment.path).getParent.resolve(cumsumFileName) - val reader = HDF5FactoryProvider.get.openForReading(hdfFile) + val reader = openHdf5(agglomerateFileAttachment) val agglomerateIdCache = new AgglomerateIdCache(config.Datastore.Cache.AgglomerateFile.maxSegmentIdEntries, config.Datastore.Cache.AgglomerateFile.blockSize) @@ -297,6 +293,9 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv Left(agglomerateIdCache) } - CachedAgglomerateFile(reader, reader.`object`().openDataSet(datasetName), agglomerateIdCache, defaultCache) + CachedAgglomerateFile(reader, + reader.`object`().openDataSet(keySegmentToAgglomerate), + agglomerateIdCache, + defaultCache) } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala index f75b574fcc8..fc528af2ba9 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala @@ -14,7 +14,7 @@ import com.scalableminds.webknossos.datastore.models.requests.{ DataServiceRequestSettings } import com.scalableminds.webknossos.datastore.models.{AdditionalCoordinate, VoxelPosition} -import com.scalableminds.webknossos.datastore.storage.{AgglomerateFileKey, CachedHdf5File, Hdf5FileCache} +import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache} import net.liftweb.common.Box.tryo import net.liftweb.common.{Box, Full} import play.api.i18n.MessagesProvider diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala index a588fa0aa5c..84435fb62bd 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala @@ -137,10 +137,12 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, datasetDirectoryName, layerName, meshFileName) + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + layerName) segmentIds <- segmentIdsForAgglomerateIdIfNeeded( - organizationId, - datasetDirectoryName, - layerName, + dataSource.id, + dataLayer, fullMeshRequest.mappingName, fullMeshRequest.editableMappingTracingId, fullMeshRequest.segmentId, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala index 96a688e980c..2eb0ee4726b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala @@ -7,7 +7,7 @@ import com.scalableminds.webknossos.datastore.services.{ DSRemoteWebknossosClient } import com.scalableminds.util.tools.{Fox, FoxImplicits} -import com.scalableminds.webknossos.datastore.storage.AgglomerateFileKey +import com.scalableminds.webknossos.datastore.models.datasource.{DataLayer, DataSourceId} import net.liftweb.common.Full import scala.concurrent.ExecutionContext @@ -19,9 +19,8 @@ trait MeshMappingHelper extends FoxImplicits { protected val binaryDataServiceHolder: BinaryDataServiceHolder protected def segmentIdsForAgglomerateIdIfNeeded( - organizationId: String, - datasetDirectoryName: String, - dataLayerName: String, + dataSourceId: DataSourceId, + dataLayer: DataLayer, targetMappingName: Option[String], editableMappingTracingId: Option[String], agglomerateId: Long, @@ -40,16 +39,9 @@ trait MeshMappingHelper extends FoxImplicits { // assume agglomerate id, fetch oversegmentation segment ids for it for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox + agglomerateFileAttachment = agglomerateService.lookUpAgglomerateFile(dataSourceId, dataLayer, mappingName) segmentIdsBox <- agglomerateService - .segmentIdsForAgglomerateId( - AgglomerateFileKey( - organizationId, - datasetDirectoryName, - dataLayerName, - mappingName - ), - agglomerateId - ) + .segmentIdsForAgglomerateId(agglomerateFileAttachment, agglomerateId) .shiftBox segmentIds <- segmentIdsBox match { case Full(segmentIds) => Fox.successful(segmentIds) @@ -69,15 +61,8 @@ trait MeshMappingHelper extends FoxImplicits { else // the agglomerate id is not present in the editable mapping. Fetch its info from the base mapping. for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - localSegmentIds <- agglomerateService.segmentIdsForAgglomerateId( - AgglomerateFileKey( - organizationId, - datasetDirectoryName, - dataLayerName, - mappingName - ), - agglomerateId - ) + agglomerateFileAttachment = agglomerateService.lookUpAgglomerateFile(dataSourceId, dataLayer, mappingName) + localSegmentIds <- agglomerateService.segmentIdsForAgglomerateId(agglomerateFileAttachment, agglomerateId) } yield localSegmentIds } yield segmentIds case _ => Fox.failure("Cannot determine segment ids for editable mapping without base mapping") diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/AgglomerateFileCache.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/AgglomerateFileCache.scala index 98817d4da7e..a25d601bc8d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/AgglomerateFileCache.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/AgglomerateFileCache.scala @@ -22,19 +22,19 @@ class AgglomerateFileCache(val maxEntries: Int) extends LRUConcurrentCache[Layer override def onElementRemoval(key: LayerAttachment, value: CachedAgglomerateFile): Unit = value.scheduleForRemoval() - def withCache(agglomerateFileKey: LayerAttachment)( + def withCache(agglomerateFileAttachment: LayerAttachment)( loadFn: LayerAttachment => CachedAgglomerateFile): CachedAgglomerateFile = { def handleUncachedAgglomerateFile() = { - val agglomerateFile = loadFn(agglomerateFileKey) + val agglomerateFile = loadFn(agglomerateFileAttachment) // We don't need to check the return value of the `tryAccess` call as we just created the agglomerate file and use it only to increase the access counter. agglomerateFile.tryAccess() - put(agglomerateFileKey, agglomerateFile) + put(agglomerateFileAttachment, agglomerateFile) agglomerateFile } this.synchronized { - get(agglomerateFileKey) match { + get(agglomerateFileAttachment) match { case Some(agglomerateFile) => if (agglomerateFile.tryAccess()) agglomerateFile else handleUncachedAgglomerateFile() case _ => handleUncachedAgglomerateFile() From 716794def544d0c809b59b42ba65965a7111a8eb Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 3 Jun 2025 14:35:16 +0200 Subject: [PATCH 024/100] list attached agglomerate files --- .../controllers/DataSourceController.scala | 25 ++++-- .../datasource/DatasetLayerAttachments.scala | 13 ++-- .../services/AgglomerateService.scala | 77 ++++++++----------- .../services/SegmentIndexFileService.scala | 4 +- .../services/ZarrAgglomerateService.scala | 3 - .../services/mesh/MeshMappingHelper.scala | 8 +- .../RemoteSourceDescriptorService.scala | 52 +++++++------ 7 files changed, 97 insertions(+), 85 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index e64dfc6c0aa..40d4fe873de 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -262,7 +262,10 @@ class DataSourceController @Inject()( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - agglomerateList = agglomerateService.exploreAgglomerates(organizationId, datasetDirectoryName, dataLayerName) + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) + agglomerateList = agglomerateService.exploreAgglomerates(organizationId, datasetDirectoryName, dataLayer) } yield Ok(Json.toJson(agglomerateList)) } } @@ -281,7 +284,9 @@ class DataSourceController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - agglomerateFileAttachment = agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + agglomerateFileAttachment <- agglomerateService + .lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + .toFox skeleton <- agglomerateService .generateSkeleton(agglomerateFileAttachment, agglomerateId) ?~> "agglomerateSkeleton.failed" } yield Ok(skeleton.toByteArray).as(protobufMimeType) @@ -302,7 +307,9 @@ class DataSourceController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - agglomerateFileAttachment = agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + agglomerateFileAttachment <- agglomerateService + .lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + .toFox agglomerateGraph <- agglomerateService .generateAgglomerateGraph(agglomerateFileAttachment, agglomerateId) ?~> "agglomerateGraph.failed" } yield Ok(agglomerateGraph.toByteArray).as(protobufMimeType) @@ -323,7 +330,9 @@ class DataSourceController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - agglomerateFileAttachment = agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + agglomerateFileAttachment <- agglomerateService + .lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + .toFox position <- agglomerateService .positionForSegmentId(agglomerateFileAttachment, segmentId) ?~> "getSegmentPositionFromAgglomerateFile.failed" } yield Ok(Json.toJson(position)) @@ -343,7 +352,9 @@ class DataSourceController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - agglomerateFileAttachment = agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + agglomerateFileAttachment <- agglomerateService + .lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + .toFox largestAgglomerateId: Long <- agglomerateService.largestAgglomerateId(agglomerateFileAttachment) } yield Ok(Json.toJson(largestAgglomerateId)) } @@ -362,7 +373,9 @@ class DataSourceController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - agglomerateFileAttachment = agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + agglomerateFileAttachment <- agglomerateService + .lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + .toFox agglomerateIds: Seq[Long] <- agglomerateService.agglomerateIdsForSegmentIds( agglomerateFileAttachment, request.body.items diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala index 53e7097e5fd..ec5d58b3a88 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala @@ -10,15 +10,16 @@ import java.net.URI import java.nio.file.{Files, Path} case class DatasetLayerAttachments( - meshes: Seq[LayerAttachment], - agglomerates: Seq[LayerAttachment], - segmentIndex: Option[LayerAttachment], - connectomes: Seq[LayerAttachment], - cumsum: Option[LayerAttachment] + meshes: Seq[LayerAttachment] = Seq.empty, + agglomerates: Seq[LayerAttachment] = Seq.empty, + segmentIndex: Option[LayerAttachment] = None, + connectomes: Seq[LayerAttachment] = Seq.empty, + cumsum: Option[LayerAttachment] = None ) object DatasetLayerAttachments { - implicit val jsonFormat: Format[DatasetLayerAttachments] = Json.format[DatasetLayerAttachments] + implicit val jsonFormat: Format[DatasetLayerAttachments] = + Json.using[Json.WithDefaultValues].format[DatasetLayerAttachments] } object LayerAttachmentDataformat extends ExtendedEnumeration { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 141d0c0ddcf..28a84d966c8 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -12,10 +12,13 @@ import com.scalableminds.webknossos.datastore.models.datasource.{ DataLayer, DataSourceId, LayerAttachment, - LayerAttachmentDataformat, + LayerAttachmentDataformat } import com.scalableminds.webknossos.datastore.models.requests.DataServiceDataRequest +import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService import com.typesafe.scalalogging.LazyLogging +import net.liftweb.common.Box +import net.liftweb.common.Box.tryo import org.apache.commons.io.FilenameUtils import java.net.URI @@ -26,16 +29,19 @@ import scala.concurrent.duration.DurationInt class AgglomerateService @Inject()(config: DataStoreConfig, zarrAgglomerateService: ZarrAgglomerateService, - hdf5AgglomerateService: Hdf5AgglomerateService) + hdf5AgglomerateService: Hdf5AgglomerateService, + remoteSourceDescriptorService: RemoteSourceDescriptorService) extends LazyLogging with FoxImplicits { private val agglomerateDir = "agglomerates" private val agglomerateFileExtension = "hdf5" private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) - def exploreAgglomerates(organizationId: String, datasetDirectoryName: String, dataLayerName: String): Set[String] = { - val layerDir = dataBaseDir.resolve(organizationId).resolve(datasetDirectoryName).resolve(dataLayerName) - PathUtils + def exploreAgglomerates(organizationId: String, datasetDirectoryName: String, dataLayer: DataLayer): Set[String] = { + val attachedAgglomerates = dataLayer.attachments.map(_.agglomerates).getOrElse(Seq.empty).map(_.name).toSet + + val layerDir = dataBaseDir.resolve(organizationId).resolve(datasetDirectoryName).resolve(dataLayer.name) + val exploredAgglomerates = PathUtils .listFiles(layerDir.resolve(agglomerateDir), silent = true, PathUtils.fileExtensionFilter(agglomerateFileExtension)) @@ -43,29 +49,10 @@ class AgglomerateService @Inject()(config: DataStoreConfig, paths.map(path => FilenameUtils.removeExtension(path.getFileName.toString)) } .toOption - .getOrElse(Nil) // TODO explore zarr agglomerates? - .toSet ++ Set( - "agglomerate_view_5", - "agglomerate_view_10", - "agglomerate_view_15", - "agglomerate_view_20", - "agglomerate_view_25", - "agglomerate_view_30", - "agglomerate_view_35", - "agglomerate_view_40", - "agglomerate_view_45", - "agglomerate_view_50", - "agglomerate_view_55", - "agglomerate_view_60", - "agglomerate_view_65", - "agglomerate_view_70", - "agglomerate_view_75", - "agglomerate_view_80", - "agglomerate_view_85", - "agglomerate_view_90", - "agglomerate_view_95", - "agglomerate_view_100" - ) + .getOrElse(Nil) + .toSet + + attachedAgglomerates ++ exploredAgglomerates } def clearCaches(hdf5Predicate: LayerAttachment => Boolean): Int = @@ -73,24 +60,28 @@ class AgglomerateService @Inject()(config: DataStoreConfig, hdf5AgglomerateService.agglomerateFileCache.clear(hdf5Predicate) // TODO cache? - def lookUpAgglomerateFile(dataSourceId: DataSourceId, dataLayer: DataLayer, mappingName: String): LayerAttachment = { + def lookUpAgglomerateFile(dataSourceId: DataSourceId, + dataLayer: DataLayer, + mappingName: String): Box[LayerAttachment] = { val registeredAttachment: Option[LayerAttachment] = dataLayer.attachments match { case Some(attachments) => attachments.agglomerates.find(_.name == mappingName) case None => None } - registeredAttachment.getOrElse( - LayerAttachment( - mappingName, - new URI( - dataBaseDir - .resolve(dataSourceId.organizationId) - .resolve(dataSourceId.directoryName) - .resolve(dataLayer.name) - .resolve(agglomerateDir) - .toString), - LayerAttachmentDataformat.hdf5 + val localDatsetDir = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) + for { + registeredAttachmentNormalized <- tryo(registeredAttachment.map { attachment => + attachment.copy( + path = + remoteSourceDescriptorService.uriFromPathLiteral(attachment.path.toString, localDatsetDir, dataLayer.name)) + }) + } yield + registeredAttachmentNormalized.getOrElse( + LayerAttachment( + mappingName, + new URI(dataBaseDir.resolve(dataLayer.name).resolve(agglomerateDir).toString), + LayerAttachmentDataformat.hdf5 + ) ) - ) } def applyAgglomerate(request: DataServiceDataRequest)(data: Array[Byte])(implicit ec: ExecutionContext, @@ -98,9 +89,9 @@ class AgglomerateService @Inject()(config: DataStoreConfig, for { mappingName <- request.settings.appliedAgglomerate.toFox elementClass = request.dataLayer.elementClass - agglomerateFileAttachment = lookUpAgglomerateFile(request.dataSourceIdOrVolumeDummy, + agglomerateFileAttachment <- lookUpAgglomerateFile(request.dataSourceIdOrVolumeDummy, request.dataLayer, - mappingName) + mappingName).toFox data <- agglomerateFileAttachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrAgglomerateService.applyAgglomerate(agglomerateFileAttachment, elementClass)(data) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala index fc528af2ba9..a47829884be 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala @@ -227,7 +227,9 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, datasetDirectoryName, dataLayerName) agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - agglomerateFileAttachment = agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + agglomerateFileAttachment <- agglomerateService + .lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + .toFox largestAgglomerateId <- agglomerateService.largestAgglomerateId(agglomerateFileAttachment) segmentIds <- if (segmentOrAgglomerateId <= largestAgglomerateId) { agglomerateService.segmentIdsForAgglomerateId( diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala index 87b5011a9c8..fefdda7010c 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala @@ -3,7 +3,6 @@ package com.scalableminds.webknossos.datastore.services import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.geometry.Vec3Int -import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.Fox import com.scalableminds.webknossos.datastore.AgglomerateGraph.{AgglomerateEdge, AgglomerateGraph} import com.scalableminds.webknossos.datastore.DataStoreConfig @@ -93,13 +92,11 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService for { segmentToAgglomerate <- openZarrArrayCached(agglomerateFileAttachment, "segment_to_agglomerate") - beforeBuildMap = Instant.now relevantAgglomerateMap: Map[Long, Long] <- Fox .serialCombined(distinctSegmentIds) { segmentId => mapSingleSegment(segmentToAgglomerate, segmentId).map((segmentId, _)) } .map(_.toMap) - _ = Instant.logSince(beforeBuildMap, "build map") mappedBytes: Array[Byte] = convertData(data, elementClass) match { case data: Array[Byte] => val longBuffer = LongBuffer.allocate(data.length) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala index 2eb0ee4726b..9c54ce0a22b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala @@ -39,7 +39,9 @@ trait MeshMappingHelper extends FoxImplicits { // assume agglomerate id, fetch oversegmentation segment ids for it for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - agglomerateFileAttachment = agglomerateService.lookUpAgglomerateFile(dataSourceId, dataLayer, mappingName) + agglomerateFileAttachment <- agglomerateService + .lookUpAgglomerateFile(dataSourceId, dataLayer, mappingName) + .toFox segmentIdsBox <- agglomerateService .segmentIdsForAgglomerateId(agglomerateFileAttachment, agglomerateId) .shiftBox @@ -61,7 +63,9 @@ trait MeshMappingHelper extends FoxImplicits { else // the agglomerate id is not present in the editable mapping. Fetch its info from the base mapping. for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - agglomerateFileAttachment = agglomerateService.lookUpAgglomerateFile(dataSourceId, dataLayer, mappingName) + agglomerateFileAttachment <- agglomerateService + .lookUpAgglomerateFile(dataSourceId, dataLayer, mappingName) + .toFox localSegmentIds <- agglomerateService.segmentIdsForAgglomerateId(agglomerateFileAttachment, agglomerateId) } yield localSegmentIds } yield segmentIds diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/RemoteSourceDescriptorService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/RemoteSourceDescriptorService.scala index 4b904c5f512..7d65109e6ad 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/RemoteSourceDescriptorService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/RemoteSourceDescriptorService.scala @@ -48,33 +48,37 @@ class RemoteSourceDescriptorService @Inject()(dSRemoteWebknossosClient: DSRemote remoteSource = RemoteSourceDescriptor(uri, credentialBox.toOption) } yield remoteSource - def resolveMagPath(datasetDir: Path, layerDir: Path, layerName: String, magLocator: MagLocator): URI = - magLocator.path match { - case Some(magLocatorPath) => - val uri = new URI(magLocatorPath) - if (DataVaultService.isRemoteScheme(uri.getScheme)) { + def uriFromPathLiteral(pathLiteral: String, localDatasetDir: Path, layerName: String): URI = { + val uri = new URI(pathLiteral) + if (DataVaultService.isRemoteScheme(uri.getScheme)) { + uri + } else if (uri.getScheme == null || uri.getScheme == DataVaultService.schemeFile) { + val localPath = Paths.get(uri.getPath) + if (localPath.isAbsolute) { + if (localPath.toString.startsWith(localDatasetDir.toAbsolutePath.toString) || dataStoreConfig.Datastore.localDirectoryWhitelist + .exists(whitelistEntry => localPath.toString.startsWith(whitelistEntry))) uri - } else if (uri.getScheme == null || uri.getScheme == DataVaultService.schemeFile) { - val localPath = Paths.get(uri.getPath) - if (localPath.isAbsolute) { - if (dataStoreConfig.Datastore.localDirectoryWhitelist.exists(whitelistEntry => - localPath.toString.startsWith(whitelistEntry))) - uri - else - throw new Exception( - s"Absolute path $localPath in local file system is not in path whitelist. Consider adding it to datastore.localDirectoryWhitelist") - } else { // relative local path, resolve in dataset dir - val magPathRelativeToDataset = datasetDir.resolve(localPath) - val magPathRelativeToLayer = datasetDir.resolve(layerName).resolve(localPath) - if (magPathRelativeToDataset.toFile.exists) { - magPathRelativeToDataset.toUri - } else { - magPathRelativeToLayer.toUri - } - } + else + throw new Exception( + s"Absolute path $localPath in local file system is not in path whitelist. Consider adding it to datastore.localDirectoryWhitelist") + } else { // relative local path, resolve in dataset dir + val magPathRelativeToDataset = localDatasetDir.resolve(localPath) + val magPathRelativeToLayer = localDatasetDir.resolve(layerName).resolve(localPath) + if (magPathRelativeToDataset.toFile.exists) { + magPathRelativeToDataset.toUri } else { - throw new Exception(s"Unsupported mag path: $magLocatorPath") + magPathRelativeToLayer.toUri } + } + } else { + throw new Exception(s"Unsupported path: $localDatasetDir") + } + } + + def resolveMagPath(datasetDir: Path, layerDir: Path, layerName: String, magLocator: MagLocator): URI = + magLocator.path match { + case Some(magLocatorPath) => + uriFromPathLiteral(magLocatorPath, datasetDir, layerName) case _ => val localDirWithScalarMag = layerDir.resolve(magLocator.mag.toMagLiteral(allowScalar = true)) val localDirWithVec3Mag = layerDir.resolve(magLocator.mag.toMagLiteral()) From feb8cb4686b696ff477cf78dca45a8533f00dcdc Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 3 Jun 2025 14:41:00 +0200 Subject: [PATCH 025/100] format --- .../webknossos/datastore/services/AgglomerateService.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 28a84d966c8..b1502121b1c 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -90,8 +90,8 @@ class AgglomerateService @Inject()(config: DataStoreConfig, mappingName <- request.settings.appliedAgglomerate.toFox elementClass = request.dataLayer.elementClass agglomerateFileAttachment <- lookUpAgglomerateFile(request.dataSourceIdOrVolumeDummy, - request.dataLayer, - mappingName).toFox + request.dataLayer, + mappingName).toFox data <- agglomerateFileAttachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrAgglomerateService.applyAgglomerate(agglomerateFileAttachment, elementClass)(data) From f040483c12b8678abc1191e4dd98fcc1861519d9 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 4 Jun 2025 11:11:45 +0200 Subject: [PATCH 026/100] use agglomeratefilekey as cache key for proper cache clear support --- .../services/AgglomerateService.scala | 82 ++++----- .../services/BinaryDataService.scala | 8 +- .../services/Hdf5AgglomerateService.scala | 45 ++--- .../services/ZarrAgglomerateService.scala | 162 +++++++++--------- .../storage/AgglomerateFileCache.scala | 18 +- 5 files changed, 162 insertions(+), 153 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index b1502121b1c..555ae651bdd 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -15,7 +15,7 @@ import com.scalableminds.webknossos.datastore.models.datasource.{ LayerAttachmentDataformat } import com.scalableminds.webknossos.datastore.models.requests.DataServiceDataRequest -import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService +import com.scalableminds.webknossos.datastore.storage.{AgglomerateFileKey, RemoteSourceDescriptorService} import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.Box import net.liftweb.common.Box.tryo @@ -55,14 +55,14 @@ class AgglomerateService @Inject()(config: DataStoreConfig, attachedAgglomerates ++ exploredAgglomerates } - def clearCaches(hdf5Predicate: LayerAttachment => Boolean): Int = + def clearCaches(hdf5Predicate: AgglomerateFileKey => Boolean): Int = // TODO also clear zarr caches hdf5AgglomerateService.agglomerateFileCache.clear(hdf5Predicate) // TODO cache? def lookUpAgglomerateFile(dataSourceId: DataSourceId, dataLayer: DataLayer, - mappingName: String): Box[LayerAttachment] = { + mappingName: String): Box[AgglomerateFileKey] = { val registeredAttachment: Option[LayerAttachment] = dataLayer.attachments match { case Some(attachments) => attachments.agglomerates.find(_.name == mappingName) case None => None @@ -75,11 +75,15 @@ class AgglomerateService @Inject()(config: DataStoreConfig, remoteSourceDescriptorService.uriFromPathLiteral(attachment.path.toString, localDatsetDir, dataLayer.name)) }) } yield - registeredAttachmentNormalized.getOrElse( - LayerAttachment( - mappingName, - new URI(dataBaseDir.resolve(dataLayer.name).resolve(agglomerateDir).toString), - LayerAttachmentDataformat.hdf5 + AgglomerateFileKey( + dataSourceId, + dataLayer.name, + registeredAttachmentNormalized.getOrElse( + LayerAttachment( + mappingName, + new URI(dataBaseDir.resolve(dataLayer.name).resolve(agglomerateDir).toString), + LayerAttachmentDataformat.hdf5 + ) ) ) } @@ -89,24 +93,22 @@ class AgglomerateService @Inject()(config: DataStoreConfig, for { mappingName <- request.settings.appliedAgglomerate.toFox elementClass = request.dataLayer.elementClass - agglomerateFileAttachment <- lookUpAgglomerateFile(request.dataSourceIdOrVolumeDummy, - request.dataLayer, - mappingName).toFox - data <- agglomerateFileAttachment.dataFormat match { + agglomerateFileKey <- lookUpAgglomerateFile(request.dataSourceIdOrVolumeDummy, request.dataLayer, mappingName).toFox + data <- agglomerateFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => - zarrAgglomerateService.applyAgglomerate(agglomerateFileAttachment, elementClass)(data) - case _ => hdf5AgglomerateService.applyAgglomerate(agglomerateFileAttachment, request)(data).toFox + zarrAgglomerateService.applyAgglomerate(agglomerateFileKey, elementClass)(data) + case _ => hdf5AgglomerateService.applyAgglomerate(agglomerateFileKey, request)(data).toFox } } yield data - def generateSkeleton(agglomerateFileAttachment: LayerAttachment, + def generateSkeleton(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[SkeletonTracing] = for { before <- Instant.nowFox - skeleton <- agglomerateFileAttachment.dataFormat match { + skeleton <- agglomerateFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => - zarrAgglomerateService.generateSkeleton(agglomerateFileAttachment, agglomerateId) - case _ => hdf5AgglomerateService.generateSkeleton(agglomerateFileAttachment, agglomerateId).toFox + zarrAgglomerateService.generateSkeleton(agglomerateFileKey, agglomerateId) + case _ => hdf5AgglomerateService.generateSkeleton(agglomerateFileKey, agglomerateId).toFox } _ = if (Instant.since(before) > (100 milliseconds)) { Instant.logSince( @@ -119,46 +121,46 @@ class AgglomerateService @Inject()(config: DataStoreConfig, } } yield skeleton - def largestAgglomerateId(agglomerateFileAttachment: LayerAttachment)(implicit ec: ExecutionContext, - tc: TokenContext): Fox[Long] = - agglomerateFileAttachment.dataFormat match { - case LayerAttachmentDataformat.zarr3 => zarrAgglomerateService.largestAgglomerateId(agglomerateFileAttachment) - case _ => hdf5AgglomerateService.largestAgglomerateId(agglomerateFileAttachment).toFox + def largestAgglomerateId(agglomerateFileKey: AgglomerateFileKey)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Long] = + agglomerateFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => zarrAgglomerateService.largestAgglomerateId(agglomerateFileKey) + case _ => hdf5AgglomerateService.largestAgglomerateId(agglomerateFileKey).toFox } - def segmentIdsForAgglomerateId(agglomerateFileAttachment: LayerAttachment, + def segmentIdsForAgglomerateId(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = - agglomerateFileAttachment.dataFormat match { + agglomerateFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => - zarrAgglomerateService.segmentIdsForAgglomerateId(agglomerateFileAttachment, agglomerateId) - case _ => hdf5AgglomerateService.segmentIdsForAgglomerateId(agglomerateFileAttachment, agglomerateId).toFox + zarrAgglomerateService.segmentIdsForAgglomerateId(agglomerateFileKey, agglomerateId) + case _ => hdf5AgglomerateService.segmentIdsForAgglomerateId(agglomerateFileKey, agglomerateId).toFox } - def agglomerateIdsForSegmentIds(agglomerateFileAttachment: LayerAttachment, segmentIds: Seq[Long])( + def agglomerateIdsForSegmentIds(agglomerateFileKey: AgglomerateFileKey, segmentIds: Seq[Long])( implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = - agglomerateFileAttachment.dataFormat match { + agglomerateFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => - zarrAgglomerateService.agglomerateIdsForSegmentIds(agglomerateFileAttachment, segmentIds) - case _ => hdf5AgglomerateService.agglomerateIdsForSegmentIds(agglomerateFileAttachment, segmentIds).toFox + zarrAgglomerateService.agglomerateIdsForSegmentIds(agglomerateFileKey, segmentIds) + case _ => hdf5AgglomerateService.agglomerateIdsForSegmentIds(agglomerateFileKey, segmentIds).toFox } - def positionForSegmentId(agglomerateFileAttachment: LayerAttachment, - segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Vec3Int] = - agglomerateFileAttachment.dataFormat match { + def positionForSegmentId(agglomerateFileKey: AgglomerateFileKey, segmentId: Long)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Vec3Int] = + agglomerateFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => - zarrAgglomerateService.positionForSegmentId(agglomerateFileAttachment, segmentId) - case _ => hdf5AgglomerateService.positionForSegmentId(agglomerateFileAttachment, segmentId).toFox + zarrAgglomerateService.positionForSegmentId(agglomerateFileKey, segmentId) + case _ => hdf5AgglomerateService.positionForSegmentId(agglomerateFileKey, segmentId).toFox } - def generateAgglomerateGraph(agglomerateFileAttachment: LayerAttachment, agglomerateId: Long)( + def generateAgglomerateGraph(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long)( implicit ec: ExecutionContext, tc: TokenContext): Fox[AgglomerateGraph] = - agglomerateFileAttachment.dataFormat match { + agglomerateFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => - zarrAgglomerateService.generateAgglomerateGraph(agglomerateFileAttachment, agglomerateId) + zarrAgglomerateService.generateAgglomerateGraph(agglomerateFileKey, agglomerateId) case _ => - hdf5AgglomerateService.generateAgglomerateGraph(agglomerateFileAttachment, agglomerateId).toFox + hdf5AgglomerateService.generateAgglomerateGraph(agglomerateFileKey, agglomerateId).toFox } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala index 00b9db56e1a..7c90c52ac65 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala @@ -7,7 +7,7 @@ import com.scalableminds.util.geometry.Vec3Int import com.scalableminds.util.tools.ExtendedTypes.ExtendedArraySeq import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.models.BucketPosition -import com.scalableminds.webknossos.datastore.models.datasource.{Category, DataLayer, DataSourceId, LayerAttachment} +import com.scalableminds.webknossos.datastore.models.datasource.{Category, DataLayer, DataSourceId} import com.scalableminds.webknossos.datastore.models.requests.{DataReadInstruction, DataServiceDataRequest} import com.scalableminds.webknossos.datastore.storage._ import com.typesafe.scalalogging.LazyLogging @@ -258,11 +258,9 @@ class BinaryDataService(val dataBaseDir: Path, def clearCache(organizationId: String, datasetDirectoryName: String, layerName: Option[String]): (Int, Int, Int) = { val dataSourceId = DataSourceId(datasetDirectoryName, organizationId) - val localDatasourcePath = dataBaseDir.resolve(organizationId).resolve(datasetDirectoryName) - val localAgglomeratePathPrefix = layerName.map(localDatasourcePath.resolve).getOrElse(localDatasourcePath) - def agglomerateFileMatchPredicate(agglomerateFileAttachment: LayerAttachment) = - agglomerateFileAttachment.path.toString.startsWith(localAgglomeratePathPrefix.toString) + def agglomerateFileMatchPredicate(agglomerateFileKey: AgglomerateFileKey) = + agglomerateFileKey.dataSourceId == dataSourceId && layerName.forall(agglomerateFileKey.layerName == _) def bucketProviderPredicate(key: (DataSourceId, String)): Boolean = key._1 == DataSourceId(datasetDirectoryName, organizationId) && layerName.forall(_ == key._2) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala index b2b2704104a..943626a8e5b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala @@ -7,10 +7,11 @@ import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.SkeletonTracing.{Edge, SkeletonTracing, Tree, TreeTypeProto} import com.scalableminds.webknossos.datastore.geometry.Vec3IntProto import com.scalableminds.webknossos.datastore.helpers.{NodeDefaults, SkeletonTracingDefaults} -import com.scalableminds.webknossos.datastore.models.datasource.{ElementClass, LayerAttachment} +import com.scalableminds.webknossos.datastore.models.datasource.ElementClass import com.scalableminds.webknossos.datastore.models.requests.DataServiceDataRequest import com.scalableminds.webknossos.datastore.storage.{ AgglomerateFileCache, + AgglomerateFileKey, AgglomerateIdCache, BoundingBoxCache, CachedAgglomerateFile, @@ -34,28 +35,28 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv // TODO clear on reload lazy val agglomerateFileCache = new AgglomerateFileCache(config.Datastore.Cache.AgglomerateFile.maxFileHandleEntries) - private def openHdf5(agglomerateFileAttachment: LayerAttachment): IHDF5Reader = { - if (agglomerateFileAttachment.path.getScheme.nonEmpty && agglomerateFileAttachment.path.getScheme != "file") { + private def openHdf5(agglomerateFileKey: AgglomerateFileKey): IHDF5Reader = { + if (agglomerateFileKey.attachment.path.getScheme.nonEmpty && agglomerateFileKey.attachment.path.getScheme != "file") { throw new Exception( "Trying to open non-local hdf5 agglomerate file. Hdf5 agglomerate files are only supported on the datastore-local file system") } - HDF5FactoryProvider.get.openForReading(Path.of(agglomerateFileAttachment.path).toFile) + HDF5FactoryProvider.get.openForReading(Path.of(agglomerateFileKey.attachment.path).toFile) } - def largestAgglomerateId(agglomerateFileAttachment: LayerAttachment): Box[Long] = + def largestAgglomerateId(agglomerateFileKey: AgglomerateFileKey): Box[Long] = tryo { - val reader = openHdf5(agglomerateFileAttachment) + val reader = openHdf5(agglomerateFileKey) reader.`object`().getNumberOfElements("/agglomerate_to_segments_offsets") - 1L } - def applyAgglomerate(agglomerateFileAttachment: LayerAttachment, request: DataServiceDataRequest)( + def applyAgglomerate(agglomerateFileKey: AgglomerateFileKey, request: DataServiceDataRequest)( data: Array[Byte]): Box[Array[Byte]] = tryo { def convertToAgglomerate(input: Array[Long], bytesPerElement: Int, bufferFunc: (ByteBuffer, Long) => ByteBuffer): Array[Byte] = { - val cachedAgglomerateFile = agglomerateFileCache.withCache(agglomerateFileAttachment)(openAsCachedAgglomerateFile) + val cachedAgglomerateFile = agglomerateFileCache.withCache(agglomerateFileKey)(openAsCachedAgglomerateFile) val agglomerateIds = cachedAgglomerateFile.cache match { case Left(agglomerateIdCache) => @@ -97,8 +98,8 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv } } - def agglomerateIdsForSegmentIds(agglomerateFileAttachment: LayerAttachment, segmentIds: Seq[Long]): Box[Seq[Long]] = { - val cachedAgglomerateFile = agglomerateFileCache.withCache(agglomerateFileAttachment)(openAsCachedAgglomerateFile) + def agglomerateIdsForSegmentIds(agglomerateFileKey: AgglomerateFileKey, segmentIds: Seq[Long]): Box[Seq[Long]] = { + val cachedAgglomerateFile = agglomerateFileCache.withCache(agglomerateFileKey)(openAsCachedAgglomerateFile) tryo { val agglomerateIds = segmentIds.map { segmentId: Long => cachedAgglomerateFile.agglomerateIdCache.withCache(segmentId, @@ -110,9 +111,9 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv } } - def generateSkeleton(agglomerateFileAttachment: LayerAttachment, agglomerateId: Long): Box[SkeletonTracing] = + def generateSkeleton(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long): Box[SkeletonTracing] = try { - val reader = openHdf5(agglomerateFileAttachment) + val reader = openHdf5(agglomerateFileKey) val positionsRange: Array[Long] = reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId) val edgesRange: Array[Long] = @@ -164,7 +165,7 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv // unsafeWrapArray is fine, because the underlying arrays are never mutated nodes = ArraySeq.unsafeWrapArray(nodes), edges = ArraySeq.unsafeWrapArray(skeletonEdges), - name = s"agglomerate $agglomerateId (${agglomerateFileAttachment.name})", + name = s"agglomerate $agglomerateId (${agglomerateFileKey.attachment.name})", `type` = Some(TreeTypeProto.AGGLOMERATE) )) @@ -174,9 +175,9 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv case e: Exception => Failure(e.getMessage) } - def generateAgglomerateGraph(agglomerateFileAttachment: LayerAttachment, agglomerateId: Long): Box[AgglomerateGraph] = + def generateAgglomerateGraph(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long): Box[AgglomerateGraph] = tryo { - val reader = openHdf5(agglomerateFileAttachment) + val reader = openHdf5(agglomerateFileKey) val positionsRange: Array[Long] = reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId) @@ -222,9 +223,9 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv ) } - def segmentIdsForAgglomerateId(agglomerateFileAttachment: LayerAttachment, agglomerateId: Long): Box[Seq[Long]] = + def segmentIdsForAgglomerateId(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long): Box[Seq[Long]] = tryo { - val reader = openHdf5(agglomerateFileAttachment) + val reader = openHdf5(agglomerateFileKey) val positionsRange: Array[Long] = reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId) @@ -237,8 +238,8 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv segmentIds.toSeq } - def positionForSegmentId(agglomerateFileAttachment: LayerAttachment, segmentId: Long): Box[Vec3Int] = { - val reader: IHDF5Reader = openHdf5(agglomerateFileAttachment) + def positionForSegmentId(agglomerateFileKey: AgglomerateFileKey, segmentId: Long): Box[Vec3Int] = { + val reader: IHDF5Reader = openHdf5(agglomerateFileKey) for { agglomerateIdArr: Array[Long] <- tryo( reader.uint64().readArrayBlockWithOffset(keySegmentToAgglomerate, 1, segmentId)) @@ -277,11 +278,11 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv // In this array, the agglomerate id is found by using the segment id as index. // There are two ways of how we prevent a file lookup for every input element. When present, we use the cumsum.json to initialize a BoundingBoxCache (see comment there). // Otherwise, we read configurable sized blocks from the agglomerate file and save them in a LRU cache. - private def openAsCachedAgglomerateFile(agglomerateFileAttachment: LayerAttachment) = { + private def openAsCachedAgglomerateFile(agglomerateFileKey: AgglomerateFileKey) = { val cumsumPath = - Path.of(agglomerateFileAttachment.path).getParent.resolve(cumsumFileName) + Path.of(agglomerateFileKey.attachment.path).getParent.resolve(cumsumFileName) - val reader = openHdf5(agglomerateFileAttachment) + val reader = openHdf5(agglomerateFileKey) val agglomerateIdCache = new AgglomerateIdCache(config.Datastore.Cache.AgglomerateFile.maxSegmentIdEntries, config.Datastore.Cache.AgglomerateFile.blockSize) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala index fefdda7010c..58b37beabee 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala @@ -11,8 +11,8 @@ import com.scalableminds.webknossos.datastore.datareaders.{DatasetArray, MultiAr import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array import com.scalableminds.webknossos.datastore.geometry.Vec3IntProto import com.scalableminds.webknossos.datastore.helpers.{NativeBucketScanner, NodeDefaults, SkeletonTracingDefaults} -import com.scalableminds.webknossos.datastore.models.datasource.{DataSourceId, ElementClass, LayerAttachment} -import com.scalableminds.webknossos.datastore.storage.{DataVaultService, RemoteSourceDescriptor} +import com.scalableminds.webknossos.datastore.models.datasource.{DataSourceId, ElementClass} +import com.scalableminds.webknossos.datastore.storage.{AgglomerateFileKey, DataVaultService, RemoteSourceDescriptor} import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.Box.tryo import net.liftweb.common.{Box, Full} @@ -27,7 +27,7 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService extends DataConverter with LazyLogging { - private lazy val openArraysCache = AlfuCache[(LayerAttachment, String), DatasetArray]() + private lazy val openArraysCache = AlfuCache[(AgglomerateFileKey, String), DatasetArray]() // TODO unify with existing chunkContentsCache from binaryDataService? private lazy val sharedChunkContentsCache: AlfuCache[String, MultiArray] = { @@ -53,15 +53,15 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService asMultiArray <- segmentToAgglomerate.readAsMultiArray(offset = segmentId, shape = 1) } yield asMultiArray.getLong(0) - private def openZarrArrayCached(agglomerateFileAttachment: LayerAttachment, + private def openZarrArrayCached(agglomerateFileKey: AgglomerateFileKey, zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext) = - openArraysCache.getOrLoad((agglomerateFileAttachment, zarrArrayName), - _ => openZarrArray(agglomerateFileAttachment, zarrArrayName)) + openArraysCache.getOrLoad((agglomerateFileKey, zarrArrayName), + _ => openZarrArray(agglomerateFileKey, zarrArrayName)) - private def openZarrArray(agglomerateFileAttachment: LayerAttachment, + private def openZarrArray(agglomerateFileKey: AgglomerateFileKey, zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext): Fox[DatasetArray] = for { - groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(agglomerateFileAttachment.path, None)) + groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(agglomerateFileKey.attachment.path, None)) segmentToAgglomeratePath = groupVaultPath / zarrArrayName zarrArray <- Zarr3Array.open(segmentToAgglomeratePath, DataSourceId("zarr", "test"), @@ -72,7 +72,7 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService sharedChunkContentsCache) } yield zarrArray - def applyAgglomerate(agglomerateFileAttachment: LayerAttachment, elementClass: ElementClass.Value)( + def applyAgglomerate(agglomerateFileKey: AgglomerateFileKey, elementClass: ElementClass.Value)( data: Array[Byte])(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Byte]] = { def convertToAgglomerate(segmentIds: Array[Long], @@ -91,7 +91,7 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService bucketScanner.collectSegmentIds(data, bytesPerElement, isSigned = false, skipZeroes = false) for { - segmentToAgglomerate <- openZarrArrayCached(agglomerateFileAttachment, "segment_to_agglomerate") + segmentToAgglomerate <- openZarrArrayCached(agglomerateFileKey, "segment_to_agglomerate") relevantAgglomerateMap: Map[Long, Long] <- Fox .serialCombined(distinctSegmentIds) { segmentId => mapSingleSegment(segmentToAgglomerate, segmentId).map((segmentId, _)) @@ -116,45 +116,48 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService } yield mappedBytes } - def generateSkeleton(agglomerateFileAttachment: LayerAttachment, + def generateSkeleton(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[SkeletonTracing] = for { - agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_segments_offsets") - agglomerateToEdgesOffsets <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_edges_offsets") + agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") + agglomerateToEdgesOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges_offsets") positionsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) edgesRange: MultiArray <- agglomerateToEdgesOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) - nodeCount = positionsRange.getLong(1) - positionsRange.getLong(0) - edgeCount = edgesRange.getLong(1) - edgesRange.getLong(0) + nodeCount <- tryo(positionsRange.getLong(1) - positionsRange.getLong(0)).toFox + edgeCount <- tryo(edgesRange.getLong(1) - edgesRange.getLong(0)).toFox edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges _ <- Fox.fromBool(nodeCount <= edgeLimit) ?~> s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)" _ <- Fox.fromBool(edgeCount <= edgeLimit) ?~> s"Agglomerate has too many edges ($edgeCount > $edgeLimit)" - agglomerateToPositions <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_positions") + agglomerateToPositions <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_positions") positions <- agglomerateToPositions.readAsMultiArray(offset = Array(positionsRange.getLong(0), 0), shape = Array(nodeCount.toInt, 3)) - agglomerateToEdges <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_edges") + agglomerateToEdges <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges") edges: MultiArray <- agglomerateToEdges.readAsMultiArray(offset = Array(edgesRange.getLong(0), 0), shape = Array(edgeCount.toInt, 2)) nodeIdStartAtOneOffset = 1 - // TODO use multiarray index iterators? - nodes = (0 until nodeCount.toInt).map { nodeIdx => - NodeDefaults.createInstance.copy( - id = nodeIdx + nodeIdStartAtOneOffset, - position = Vec3IntProto( - positions.getInt(positions.getIndex.set(Array(nodeIdx, 0))), - positions.getInt(positions.getIndex.set(Array(nodeIdx, 1))), - positions.getInt(positions.getIndex.set(Array(nodeIdx, 2))) + nodes <- tryo { + (0 until nodeCount.toInt).map { nodeIdx => + NodeDefaults.createInstance.copy( + id = nodeIdx + nodeIdStartAtOneOffset, + position = Vec3IntProto( + positions.getInt(positions.getIndex.set(Array(nodeIdx, 0))), + positions.getInt(positions.getIndex.set(Array(nodeIdx, 1))), + positions.getInt(positions.getIndex.set(Array(nodeIdx, 2))) + ) ) - ) - } + } + }.toFox - skeletonEdges = (0 until edges.getShape()(0)).map { edgeIdx => - Edge( - source = edges.getInt(edges.getIndex.set(Array(edgeIdx, 0))) + nodeIdStartAtOneOffset, - target = edges.getInt(edges.getIndex.set(Array(edgeIdx, 1))) + nodeIdStartAtOneOffset - ) - } + skeletonEdges <- tryo { + (0 until edges.getShape()(0)).map { edgeIdx => + Edge( + source = edges.getInt(edges.getIndex.set(Array(edgeIdx, 0))) + nodeIdStartAtOneOffset, + target = edges.getInt(edges.getIndex.set(Array(edgeIdx, 1))) + nodeIdStartAtOneOffset + ) + } + }.toFox trees = Seq( Tree( @@ -163,107 +166,110 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService // unsafeWrapArray is fine, because the underlying arrays are never mutated nodes = nodes, edges = skeletonEdges, - name = s"agglomerate $agglomerateId (${agglomerateFileAttachment.name})", + name = s"agglomerate $agglomerateId (${agglomerateFileKey.attachment.name})", `type` = Some(TreeTypeProto.AGGLOMERATE) )) skeleton = SkeletonTracingDefaults.createInstance.copy(trees = trees) } yield skeleton - def largestAgglomerateId(agglomerateFileAttachment: LayerAttachment)(implicit ec: ExecutionContext, - tc: TokenContext): Fox[Long] = + def largestAgglomerateId(agglomerateFileKey: AgglomerateFileKey)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Long] = for { - array <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_segments_offsets") + array <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") shape <- array.datasetShape.toFox ?~> "Could not determine array shape" shapeFirstElement <- tryo(shape(0)).toFox } yield shapeFirstElement - def generateAgglomerateGraph(agglomerateFileAttachment: LayerAttachment, agglomerateId: Long)( + def generateAgglomerateGraph(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long)( implicit ec: ExecutionContext, tc: TokenContext): Fox[AgglomerateGraph] = for { - agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_segments_offsets") - agglomerateToEdgesOffsets <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_edges_offsets") + agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") + agglomerateToEdgesOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges_offsets") positionsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) edgesRange: MultiArray <- agglomerateToEdgesOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) - nodeCount = positionsRange.getLong(1) - positionsRange.getLong(0) - edgeCount = edgesRange.getLong(1) - edgesRange.getLong(0) + nodeCount <- tryo(positionsRange.getLong(1) - positionsRange.getLong(0)).toFox + edgeCount <- tryo(edgesRange.getLong(1) - edgesRange.getLong(0)).toFox edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges _ <- Fox.fromBool(nodeCount <= edgeLimit) ?~> s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)" _ <- Fox.fromBool(edgeCount <= edgeLimit) ?~> s"Agglomerate has too many edges ($edgeCount > $edgeLimit)" - agglomerateToPositions <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_positions") + agglomerateToPositions <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_positions") positions: MultiArray <- agglomerateToPositions.readAsMultiArray(offset = Array(positionsRange.getLong(0), 0), shape = Array(nodeCount.toInt, 3)) - agglomerateToSegments <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_segments") + agglomerateToSegments <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments") segmentIdsMA: MultiArray <- agglomerateToSegments.readAsMultiArray(offset = positionsRange.getInt(0), shape = nodeCount.toInt) segmentIds: Array[Long] <- MultiArrayUtils.toLongArray(segmentIdsMA).toFox - agglomerateToEdges <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_edges") + agglomerateToEdges <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges") edges: MultiArray <- agglomerateToEdges.readAsMultiArray(offset = Array(edgesRange.getLong(0), 0), shape = Array(edgeCount.toInt, 2)) - agglomerateToAffinities <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_affinities") + agglomerateToAffinities <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_affinities") affinities: MultiArray <- agglomerateToAffinities.readAsMultiArray(offset = edgesRange.getLong(0), shape = edgeCount.toInt) - agglomerateGraph = AgglomerateGraph( - // unsafeWrapArray is fine, because the underlying arrays are never mutated - segments = ArraySeq.unsafeWrapArray(segmentIds), - edges = (0 until edges.getShape()(0)).map { edgeIdx: Int => - AgglomerateEdge( - source = segmentIds(edges.getInt(edges.getIndex.set(Array(edgeIdx, 0)))), - target = segmentIds(edges.getInt(edges.getIndex.set(Array(edgeIdx, 1)))) - ) - }, - positions = (0 until nodeCount.toInt).map { nodeIdx: Int => - Vec3IntProto( - positions.getInt(positions.getIndex.set(Array(nodeIdx, 0))), - positions.getInt(positions.getIndex.set(Array(nodeIdx, 1))), - positions.getInt(positions.getIndex.set(Array(nodeIdx, 2))) - ) - }, - affinities = ArraySeq.unsafeWrapArray(affinities.getStorage.asInstanceOf[Array[Float]]) - ) + agglomerateGraph <- tryo { + AgglomerateGraph( + // unsafeWrapArray is fine, because the underlying arrays are never mutated + segments = ArraySeq.unsafeWrapArray(segmentIds), + edges = (0 until edges.getShape()(0)).map { edgeIdx: Int => + AgglomerateEdge( + source = segmentIds(edges.getInt(edges.getIndex.set(Array(edgeIdx, 0)))), + target = segmentIds(edges.getInt(edges.getIndex.set(Array(edgeIdx, 1)))) + ) + }, + positions = (0 until nodeCount.toInt).map { nodeIdx: Int => + Vec3IntProto( + positions.getInt(positions.getIndex.set(Array(nodeIdx, 0))), + positions.getInt(positions.getIndex.set(Array(nodeIdx, 1))), + positions.getInt(positions.getIndex.set(Array(nodeIdx, 2))) + ) + }, + affinities = ArraySeq.unsafeWrapArray(affinities.getStorage.asInstanceOf[Array[Float]]) + ) + }.toFox } yield agglomerateGraph - def segmentIdsForAgglomerateId(agglomerateFileAttachment: LayerAttachment, + def segmentIdsForAgglomerateId(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = for { - agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_segments_offsets") - agglomerateToSegments <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_segments") + agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") + agglomerateToSegments <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments") segmentRange <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) - segmentCount = segmentRange.getLong(1) - segmentRange.getLong(0) + segmentOffset <- tryo(segmentRange.getLong(0)).toFox + segmentCount <- tryo(segmentRange.getLong(1) - segmentOffset).toFox segmentIds <- if (segmentCount == 0) Fox.successful(Array.empty[Long]) else agglomerateToSegments - .readAsMultiArray(offset = segmentRange.getLong(0), shape = segmentCount.toInt) + .readAsMultiArray(offset = segmentOffset, shape = segmentCount.toInt) .flatMap(MultiArrayUtils.toLongArray(_).toFox) } yield segmentIds.toSeq - def agglomerateIdsForSegmentIds(agglomerateFileAttachment: LayerAttachment, segmentIds: Seq[Long])( + def agglomerateIdsForSegmentIds(agglomerateFileKey: AgglomerateFileKey, segmentIds: Seq[Long])( implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = for { - segmentToAgglomerate <- openZarrArrayCached(agglomerateFileAttachment, "segment_to_agglomerate") + segmentToAgglomerate <- openZarrArrayCached(agglomerateFileKey, "segment_to_agglomerate") agglomerateIds <- Fox.serialCombined(segmentIds) { segmentId => mapSingleSegment(segmentToAgglomerate, segmentId) } } yield agglomerateIds - def positionForSegmentId(agglomerateFileAttachment: LayerAttachment, - segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Vec3Int] = + def positionForSegmentId(agglomerateFileKey: AgglomerateFileKey, segmentId: Long)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Vec3Int] = for { - segmentToAgglomerate <- openZarrArrayCached(agglomerateFileAttachment, "segment_to_agglomerate") + segmentToAgglomerate <- openZarrArrayCached(agglomerateFileKey, "segment_to_agglomerate") agglomerateId <- mapSingleSegment(segmentToAgglomerate, segmentId) - agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_segments_offsets") + agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") segmentsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) - agglomerateToSegments <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_segments") + agglomerateToSegments <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments") segmentIndex <- binarySearchForSegment(segmentsRange.getLong(0), segmentsRange.getLong(1), segmentId, agglomerateToSegments) - agglomerateToPositions <- openZarrArrayCached(agglomerateFileAttachment, "agglomerate_to_positions") + agglomerateToPositions <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_positions") position <- agglomerateToPositions.readAsMultiArray(offset = Array(segmentIndex, 0), shape = Array(3, 1)) } yield Vec3Int(position.getInt(0), position.getInt(1), position.getInt(2)) @@ -278,7 +284,7 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService for { segmentIdAtMiddleMA <- agglomerateToSegments.readAsMultiArray(offset = middle, shape = 1) segmentIdAdMiddleArray: Array[Long] <- MultiArrayUtils.toLongArray(segmentIdAtMiddleMA).toFox - segmentIdAtMiddle = segmentIdAdMiddleArray(0) + segmentIdAtMiddle <- tryo(segmentIdAdMiddleArray(0)).toFox segmentIndex <- if (segmentIdAtMiddle == segmentId) Fox.successful(middle) else if (segmentIdAtMiddle < segmentId) { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/AgglomerateFileCache.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/AgglomerateFileCache.scala index a25d601bc8d..38de42a97bc 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/AgglomerateFileCache.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/AgglomerateFileCache.scala @@ -4,12 +4,14 @@ import java.util import ch.systemsx.cisd.hdf5.{HDF5DataSet, IHDF5Reader} import com.scalableminds.util.cache.LRUConcurrentCache import com.scalableminds.webknossos.datastore.dataformats.SafeCachable -import com.scalableminds.webknossos.datastore.models.datasource.LayerAttachment +import com.scalableminds.webknossos.datastore.models.datasource.{DataSourceId, LayerAttachment} import com.scalableminds.webknossos.datastore.models.requests.{Cuboid, DataServiceDataRequest} import com.typesafe.scalalogging.LazyLogging import scala.collection.mutable +case class AgglomerateFileKey(dataSourceId: DataSourceId, layerName: String, attachment: LayerAttachment) + case class CachedAgglomerateFile(reader: IHDF5Reader, dataset: HDF5DataSet, agglomerateIdCache: AgglomerateIdCache, @@ -18,23 +20,23 @@ case class CachedAgglomerateFile(reader: IHDF5Reader, override protected def onFinalize(): Unit = { dataset.close(); reader.close() } } -class AgglomerateFileCache(val maxEntries: Int) extends LRUConcurrentCache[LayerAttachment, CachedAgglomerateFile] { - override def onElementRemoval(key: LayerAttachment, value: CachedAgglomerateFile): Unit = +class AgglomerateFileCache(val maxEntries: Int) extends LRUConcurrentCache[AgglomerateFileKey, CachedAgglomerateFile] { + override def onElementRemoval(key: AgglomerateFileKey, value: CachedAgglomerateFile): Unit = value.scheduleForRemoval() - def withCache(agglomerateFileAttachment: LayerAttachment)( - loadFn: LayerAttachment => CachedAgglomerateFile): CachedAgglomerateFile = { + def withCache(agglomerateFileKey: AgglomerateFileKey)( + loadFn: AgglomerateFileKey => CachedAgglomerateFile): CachedAgglomerateFile = { def handleUncachedAgglomerateFile() = { - val agglomerateFile = loadFn(agglomerateFileAttachment) + val agglomerateFile = loadFn(agglomerateFileKey) // We don't need to check the return value of the `tryAccess` call as we just created the agglomerate file and use it only to increase the access counter. agglomerateFile.tryAccess() - put(agglomerateFileAttachment, agglomerateFile) + put(agglomerateFileKey, agglomerateFile) agglomerateFile } this.synchronized { - get(agglomerateFileAttachment) match { + get(agglomerateFileKey) match { case Some(agglomerateFile) => if (agglomerateFile.tryAccess()) agglomerateFile else handleUncachedAgglomerateFile() case _ => handleUncachedAgglomerateFile() From e781bafa5fdc73365dbdf353221d8a712d519534 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 4 Jun 2025 11:57:49 +0200 Subject: [PATCH 027/100] clear agglomerate caches on layer/ds reload --- .../controllers/DataSourceController.scala | 30 ++++-------- .../services/AgglomerateService.scala | 46 +++++++++++++------ .../services/BinaryDataService.scala | 6 +-- .../services/BinaryDataServiceHolder.scala | 23 +++++++--- .../services/Hdf5AgglomerateService.scala | 6 ++- .../services/SegmentIndexFileService.scala | 8 ++-- .../services/ZarrAgglomerateService.scala | 23 ++-------- .../services/mesh/MeshMappingHelper.scala | 14 ++---- 8 files changed, 77 insertions(+), 79 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 40d4fe873de..342b53e5836 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -284,11 +284,9 @@ class DataSourceController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - agglomerateFileAttachment <- agglomerateService - .lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) - .toFox + agglomerateFileKey <- agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) skeleton <- agglomerateService - .generateSkeleton(agglomerateFileAttachment, agglomerateId) ?~> "agglomerateSkeleton.failed" + .generateSkeleton(agglomerateFileKey, agglomerateId) ?~> "agglomerateSkeleton.failed" } yield Ok(skeleton.toByteArray).as(protobufMimeType) } } @@ -307,11 +305,9 @@ class DataSourceController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - agglomerateFileAttachment <- agglomerateService - .lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) - .toFox + agglomerateFileKey <- agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) agglomerateGraph <- agglomerateService - .generateAgglomerateGraph(agglomerateFileAttachment, agglomerateId) ?~> "agglomerateGraph.failed" + .generateAgglomerateGraph(agglomerateFileKey, agglomerateId) ?~> "agglomerateGraph.failed" } yield Ok(agglomerateGraph.toByteArray).as(protobufMimeType) } } @@ -330,11 +326,9 @@ class DataSourceController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - agglomerateFileAttachment <- agglomerateService - .lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) - .toFox + agglomerateFileKey <- agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) position <- agglomerateService - .positionForSegmentId(agglomerateFileAttachment, segmentId) ?~> "getSegmentPositionFromAgglomerateFile.failed" + .positionForSegmentId(agglomerateFileKey, segmentId) ?~> "getSegmentPositionFromAgglomerateFile.failed" } yield Ok(Json.toJson(position)) } } @@ -352,10 +346,8 @@ class DataSourceController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - agglomerateFileAttachment <- agglomerateService - .lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) - .toFox - largestAgglomerateId: Long <- agglomerateService.largestAgglomerateId(agglomerateFileAttachment) + agglomerateFileKey <- agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + largestAgglomerateId: Long <- agglomerateService.largestAgglomerateId(agglomerateFileKey) } yield Ok(Json.toJson(largestAgglomerateId)) } } @@ -373,11 +365,9 @@ class DataSourceController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - agglomerateFileAttachment <- agglomerateService - .lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) - .toFox + agglomerateFileKey <- agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) agglomerateIds: Seq[Long] <- agglomerateService.agglomerateIdsForSegmentIds( - agglomerateFileAttachment, + agglomerateFileKey, request.body.items ) } yield Ok(ListOfLong(agglomerateIds).toByteArray) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 555ae651bdd..6fc7a73a7c7 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -1,6 +1,7 @@ package com.scalableminds.webknossos.datastore.services import com.scalableminds.util.accesscontext.TokenContext +import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.geometry.Vec3Int import com.scalableminds.util.io.PathUtils import com.scalableminds.util.time.Instant @@ -23,20 +24,22 @@ import org.apache.commons.io.FilenameUtils import java.net.URI import java.nio.file.Paths -import javax.inject.Inject import scala.concurrent.ExecutionContext import scala.concurrent.duration.DurationInt -class AgglomerateService @Inject()(config: DataStoreConfig, - zarrAgglomerateService: ZarrAgglomerateService, - hdf5AgglomerateService: Hdf5AgglomerateService, - remoteSourceDescriptorService: RemoteSourceDescriptorService) +class AgglomerateService(config: DataStoreConfig, + zarrAgglomerateService: ZarrAgglomerateService, + hdf5AgglomerateService: Hdf5AgglomerateService, + remoteSourceDescriptorService: RemoteSourceDescriptorService) extends LazyLogging with FoxImplicits { private val agglomerateDir = "agglomerates" private val agglomerateFileExtension = "hdf5" private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) + private val agglomerateKeyCache + : AlfuCache[(DataSourceId, String, String), AgglomerateFileKey] = AlfuCache() // dataSourceId, layerName, mappingName → AgglomerateFileKey + def exploreAgglomerates(organizationId: String, datasetDirectoryName: String, dataLayer: DataLayer): Set[String] = { val attachedAgglomerates = dataLayer.attachments.map(_.agglomerates).getOrElse(Seq.empty).map(_.name).toSet @@ -55,14 +58,31 @@ class AgglomerateService @Inject()(config: DataStoreConfig, attachedAgglomerates ++ exploredAgglomerates } - def clearCaches(hdf5Predicate: AgglomerateFileKey => Boolean): Int = - // TODO also clear zarr caches - hdf5AgglomerateService.agglomerateFileCache.clear(hdf5Predicate) + def clearCaches(dataSourceId: DataSourceId, layerName: Option[String]): Int = { + agglomerateKeyCache.clear { + case (keyDataSourceId, keyLayerName, _) => dataSourceId == keyDataSourceId && layerName.forall(_ == keyLayerName) + } + + val clearedHdf5Count = hdf5AgglomerateService.clearCache { agglomerateFileKey => + agglomerateFileKey.dataSourceId == dataSourceId && layerName.forall(agglomerateFileKey.layerName == _) + } + + val clearedZarrCount = zarrAgglomerateService.clearCache { + case (agglomerateFileKey, _) => + agglomerateFileKey.dataSourceId == dataSourceId && layerName.forall(agglomerateFileKey.layerName == _) + } + + clearedHdf5Count + clearedZarrCount + } + + def lookUpAgglomerateFile(dataSourceId: DataSourceId, dataLayer: DataLayer, mappingName: String)( + implicit ec: ExecutionContext): Fox[AgglomerateFileKey] = + agglomerateKeyCache.getOrLoad((dataSourceId, dataLayer.name, mappingName), + _ => lookUpAgglomerateFileImpl(dataSourceId, dataLayer, mappingName).toFox) - // TODO cache? - def lookUpAgglomerateFile(dataSourceId: DataSourceId, - dataLayer: DataLayer, - mappingName: String): Box[AgglomerateFileKey] = { + private def lookUpAgglomerateFileImpl(dataSourceId: DataSourceId, + dataLayer: DataLayer, + mappingName: String): Box[AgglomerateFileKey] = { val registeredAttachment: Option[LayerAttachment] = dataLayer.attachments match { case Some(attachments) => attachments.agglomerates.find(_.name == mappingName) case None => None @@ -93,7 +113,7 @@ class AgglomerateService @Inject()(config: DataStoreConfig, for { mappingName <- request.settings.appliedAgglomerate.toFox elementClass = request.dataLayer.elementClass - agglomerateFileKey <- lookUpAgglomerateFile(request.dataSourceIdOrVolumeDummy, request.dataLayer, mappingName).toFox + agglomerateFileKey <- lookUpAgglomerateFile(request.dataSourceIdOrVolumeDummy, request.dataLayer, mappingName) data <- agglomerateFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrAgglomerateService.applyAgglomerate(agglomerateFileKey, elementClass)(data) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala index 7c90c52ac65..29551b9d252 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala @@ -259,14 +259,11 @@ class BinaryDataService(val dataBaseDir: Path, def clearCache(organizationId: String, datasetDirectoryName: String, layerName: Option[String]): (Int, Int, Int) = { val dataSourceId = DataSourceId(datasetDirectoryName, organizationId) - def agglomerateFileMatchPredicate(agglomerateFileKey: AgglomerateFileKey) = - agglomerateFileKey.dataSourceId == dataSourceId && layerName.forall(agglomerateFileKey.layerName == _) - def bucketProviderPredicate(key: (DataSourceId, String)): Boolean = key._1 == DataSourceId(datasetDirectoryName, organizationId) && layerName.forall(_ == key._2) val closedAgglomerateFileHandleCount = - agglomerateServiceOpt.map(_.clearCaches(agglomerateFileMatchPredicate)).getOrElse(0) + agglomerateServiceOpt.map(_.clearCaches(dataSourceId, layerName)).getOrElse(0) val clearedBucketProviderCount = bucketProviderCache.clear(bucketProviderPredicate) @@ -278,4 +275,5 @@ class BinaryDataService(val dataBaseDir: Path, (closedAgglomerateFileHandleCount, clearedBucketProviderCount, removedChunksCount) } + } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala index e667601dcb3..f5b2736ea42 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala @@ -4,7 +4,7 @@ import com.scalableminds.util.cache.AlfuCache import java.nio.file.Paths import com.scalableminds.webknossos.datastore.DataStoreConfig -import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService +import com.scalableminds.webknossos.datastore.storage.{DataVaultService, RemoteSourceDescriptorService} import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.{Box, Full} import ucar.ma2.{Array => MultiArray} @@ -19,14 +19,13 @@ import scala.concurrent.ExecutionContext * The DataStore one is singleton-ized via this holder. */ -class BinaryDataServiceHolder @Inject()( - config: DataStoreConfig, - agglomerateService: AgglomerateService, - remoteSourceDescriptorService: RemoteSourceDescriptorService, - datasetErrorLoggingService: DSDatasetErrorLoggingService)(implicit ec: ExecutionContext) +class BinaryDataServiceHolder @Inject()(config: DataStoreConfig, + remoteSourceDescriptorService: RemoteSourceDescriptorService, + datasetErrorLoggingService: DSDatasetErrorLoggingService, + dataVaultService: DataVaultService)(implicit ec: ExecutionContext) extends LazyLogging { - private lazy val sharedChunkContentsCache: AlfuCache[String, MultiArray] = { + lazy val sharedChunkContentsCache: AlfuCache[String, MultiArray] = { // Used by DatasetArray-based datasets. Measure item weight in kilobytes because the weigher can only return int, not long val maxSizeKiloBytes = Math.floor(config.Datastore.Cache.ImageArrayChunks.maxSizeBytes.toDouble / 1000.0).toInt @@ -41,6 +40,16 @@ class BinaryDataServiceHolder @Inject()( AlfuCache(maxSizeKiloBytes, weighFn = Some(cacheWeight)) } + val zarrAgglomerateService = new ZarrAgglomerateService(config, dataVaultService, sharedChunkContentsCache) + val hdf5AgglomerateService = new Hdf5AgglomerateService(config) + + val agglomerateService = new AgglomerateService( + config, + zarrAgglomerateService, + hdf5AgglomerateService, + remoteSourceDescriptorService + ) + val binaryDataService: BinaryDataService = new BinaryDataService( Paths.get(config.Datastore.baseDirectory), Some(agglomerateService), diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala index 943626a8e5b..9ad442f8f84 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala @@ -32,8 +32,10 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv // TODO other keys, also in zarr case private val keySegmentToAgglomerate = "/segment_to_agglomerate" - // TODO clear on reload - lazy val agglomerateFileCache = new AgglomerateFileCache(config.Datastore.Cache.AgglomerateFile.maxFileHandleEntries) + private lazy val agglomerateFileCache = new AgglomerateFileCache( + config.Datastore.Cache.AgglomerateFile.maxFileHandleEntries) + + def clearCache(predicate: AgglomerateFileKey => Boolean): Int = agglomerateFileCache.clear(predicate) private def openHdf5(agglomerateFileKey: AgglomerateFileKey): IHDF5Reader = { if (agglomerateFileKey.attachment.path.getScheme.nonEmpty && agglomerateFileKey.attachment.path.getScheme != "file") { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala index a47829884be..fde4e56a8b8 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala @@ -227,13 +227,11 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, datasetDirectoryName, dataLayerName) agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - agglomerateFileAttachment <- agglomerateService - .lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) - .toFox - largestAgglomerateId <- agglomerateService.largestAgglomerateId(agglomerateFileAttachment) + agglomerateFileKey <- agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + largestAgglomerateId <- agglomerateService.largestAgglomerateId(agglomerateFileKey) segmentIds <- if (segmentOrAgglomerateId <= largestAgglomerateId) { agglomerateService.segmentIdsForAgglomerateId( - agglomerateFileAttachment, + agglomerateFileKey, segmentOrAgglomerateId ) } else diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala index 58b37beabee..f28e5e9a9e5 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala @@ -15,35 +15,22 @@ import com.scalableminds.webknossos.datastore.models.datasource.{DataSourceId, E import com.scalableminds.webknossos.datastore.storage.{AgglomerateFileKey, DataVaultService, RemoteSourceDescriptor} import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.Box.tryo -import net.liftweb.common.{Box, Full} import ucar.ma2.{Array => MultiArray} import java.nio.{ByteBuffer, ByteOrder, LongBuffer} -import javax.inject.Inject import scala.collection.compat.immutable.ArraySeq import scala.concurrent.ExecutionContext -class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService: DataVaultService) +class ZarrAgglomerateService(config: DataStoreConfig, + dataVaultService: DataVaultService, + sharedChunkContentsCache: AlfuCache[String, MultiArray]) extends DataConverter with LazyLogging { private lazy val openArraysCache = AlfuCache[(AgglomerateFileKey, String), DatasetArray]() - // TODO unify with existing chunkContentsCache from binaryDataService? - private lazy val sharedChunkContentsCache: AlfuCache[String, MultiArray] = { - // Used by DatasetArray-based datasets. Measure item weight in kilobytes because the weigher can only return int, not long - - val maxSizeKiloBytes = Math.floor(config.Datastore.Cache.ImageArrayChunks.maxSizeBytes.toDouble / 1000.0).toInt - - def cacheWeight(key: String, arrayBox: Box[MultiArray]): Int = - arrayBox match { - case Full(array) => - (array.getSizeBytes / 1000L).toInt - case _ => 0 - } - - AlfuCache(maxSizeKiloBytes, weighFn = Some(cacheWeight)) - } + def clearCache(predicate: ((AgglomerateFileKey, String)) => Boolean): Int = + openArraysCache.clear(predicate) protected lazy val bucketScanner = new NativeBucketScanner() diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala index 9c54ce0a22b..6bb64a978b0 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala @@ -39,12 +39,8 @@ trait MeshMappingHelper extends FoxImplicits { // assume agglomerate id, fetch oversegmentation segment ids for it for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - agglomerateFileAttachment <- agglomerateService - .lookUpAgglomerateFile(dataSourceId, dataLayer, mappingName) - .toFox - segmentIdsBox <- agglomerateService - .segmentIdsForAgglomerateId(agglomerateFileAttachment, agglomerateId) - .shiftBox + agglomerateFileKey <- agglomerateService.lookUpAgglomerateFile(dataSourceId, dataLayer, mappingName) + segmentIdsBox <- agglomerateService.segmentIdsForAgglomerateId(agglomerateFileKey, agglomerateId).shiftBox segmentIds <- segmentIdsBox match { case Full(segmentIds) => Fox.successful(segmentIds) case _ => if (omitMissing) Fox.successful(List.empty) else segmentIdsBox.toFox @@ -63,10 +59,8 @@ trait MeshMappingHelper extends FoxImplicits { else // the agglomerate id is not present in the editable mapping. Fetch its info from the base mapping. for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - agglomerateFileAttachment <- agglomerateService - .lookUpAgglomerateFile(dataSourceId, dataLayer, mappingName) - .toFox - localSegmentIds <- agglomerateService.segmentIdsForAgglomerateId(agglomerateFileAttachment, agglomerateId) + agglomerateFileKey <- agglomerateService.lookUpAgglomerateFile(dataSourceId, dataLayer, mappingName) + localSegmentIds <- agglomerateService.segmentIdsForAgglomerateId(agglomerateFileKey, agglomerateId) } yield localSegmentIds } yield segmentIds case _ => Fox.failure("Cannot determine segment ids for editable mapping without base mapping") From 419490f93865b7e859e1e7420fd32bf3f5ffc133 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 4 Jun 2025 13:07:26 +0200 Subject: [PATCH 028/100] avoid injection --- .../webknossos/datastore/DataStoreModule.scala | 2 -- .../webknossos/datastore/controllers/Application.scala | 7 +++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala index 4b1ee3c06a2..2c5f1c63239 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala @@ -27,8 +27,6 @@ class DataStoreModule extends AbstractModule { bind(classOf[DSRemoteWebknossosClient]).asEagerSingleton() bind(classOf[BinaryDataServiceHolder]).asEagerSingleton() bind(classOf[MappingService]).asEagerSingleton() - bind(classOf[AgglomerateService]).asEagerSingleton() - bind(classOf[ZarrAgglomerateService]).asEagerSingleton() bind(classOf[AdHocMeshServiceHolder]).asEagerSingleton() bind(classOf[ApplicationHealthService]).asEagerSingleton() bind(classOf[DSDatasetErrorLoggingService]).asEagerSingleton() diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala index 90e222df1be..693b917d7dd 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/Application.scala @@ -4,7 +4,7 @@ import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.Fox import com.scalableminds.webknossos.datastore.helpers.NativeBucketScanner import com.scalableminds.webknossos.datastore.models.datasource.ElementClass -import com.scalableminds.webknossos.datastore.services.{ApplicationHealthService, ZarrAgglomerateService} +import com.scalableminds.webknossos.datastore.services.ApplicationHealthService import com.scalableminds.webknossos.datastore.storage.DataStoreRedisStore import net.liftweb.common.Box.tryo @@ -13,9 +13,8 @@ import play.api.mvc.{Action, AnyContent} import scala.concurrent.ExecutionContext -class Application @Inject()(redisClient: DataStoreRedisStore, - applicationHealthService: ApplicationHealthService, - agglomerateService: ZarrAgglomerateService)(implicit ec: ExecutionContext) +class Application @Inject()(redisClient: DataStoreRedisStore, applicationHealthService: ApplicationHealthService)( + implicit ec: ExecutionContext) extends Controller { override def allowRemoteOrigin: Boolean = true From 9c781670163cd87d2dd71e8238d5d8581ba6d94f Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 5 Jun 2025 08:39:02 +0200 Subject: [PATCH 029/100] prioritize WebknossosZarrExplorer --- .../datastore/explore/ExploreRemoteLayerService.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/ExploreRemoteLayerService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/ExploreRemoteLayerService.scala index d798178069a..642d3f44c60 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/ExploreRemoteLayerService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/ExploreRemoteLayerService.scala @@ -96,9 +96,9 @@ class ExploreRemoteLayerService @Inject()(dataVaultService: DataVaultService, credentialId, List( // Explorers are ordered to prioritize the explorer reading meta information over raw Zarr, N5, ... data. + new WebknossosZarrExplorer, new NgffV0_4Explorer, new NgffV0_5Explorer, - new WebknossosZarrExplorer, new Zarr3ArrayExplorer, new ZarrArrayExplorer(Vec3Int.ones), new N5MultiscalesExplorer, From fdd7b4a366e2849ed8b3cb7672bba16a65151893 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 5 Jun 2025 09:07:35 +0200 Subject: [PATCH 030/100] cleanup --- conf/application.conf | 2 +- .../datastore/datareaders/DatasetArray.scala | 5 +- .../datareaders/MultiArrayUtils.scala | 3 +- .../datareaders/zarr3/Zarr3Array.scala | 7 +-- .../services/Hdf5AgglomerateService.scala | 44 ++++++------- .../services/ZarrAgglomerateService.scala | 61 +++++++++++-------- .../RemoteSourceDescriptorService.scala | 10 +-- 7 files changed, 70 insertions(+), 62 deletions(-) diff --git a/conf/application.conf b/conf/application.conf index c295317e578..aac6419d24e 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -347,4 +347,4 @@ pidfile.path = "/dev/null" # uncomment these lines for faster restart during local backend development (but beware the then-missing features): -slick.checkSchemaOnStartup = false +#slick.checkSchemaOnStartup = false diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala index 683071dbf0f..2a3a33b2b17 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala @@ -8,7 +8,6 @@ import com.scalableminds.webknossos.datastore.datavault.VaultPath import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId import com.scalableminds.webknossos.datastore.models.AdditionalCoordinate import com.scalableminds.webknossos.datastore.models.datasource.AdditionalAxis -import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.Box.tryo import ucar.ma2.{Array => MultiArray} @@ -27,8 +26,7 @@ class DatasetArray(vaultPath: VaultPath, channelIndex: Option[Int], additionalAxes: Option[Seq[AdditionalAxis]], sharedChunkContentsCache: AlfuCache[String, MultiArray]) - extends FoxImplicits - with LazyLogging { + extends FoxImplicits { protected lazy val fullAxisOrder: FullAxisOrder = FullAxisOrder.fromAxisOrderAndAdditionalAxes(rank, axisOrder, additionalAxes) @@ -308,7 +306,6 @@ class DatasetArray(vaultPath: VaultPath, (globalOffset(dim) - (chunkIndex(dim).toLong * chunkShape(dim).toLong)).toInt }.toArray - // TODO works only for wk dataet arrays, not agglomerate files override def toString: String = s"${getClass.getCanonicalName} fullAxisOrder=$fullAxisOrder shape=${header.datasetShape.map(s => printAsInner(s.map(_.toInt)))} chunkShape=${printAsInner( header.chunkShape)} dtype=${header.resolvedDataType} fillValue=${header.fillValueNumber}, ${header.compressorImpl}, byteOrder=${header.byteOrder}, vault=${vaultPath.summary}}" diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala index 86e19397b85..a5820990e28 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala @@ -46,7 +46,8 @@ object MultiArrayUtils extends LazyLogging { } def createEmpty(rank: Int): MultiArray = - MultiArray.factory(MADataType.FLOAT, Array.fill(rank)(0)) + // The data type of the empty MultiArray doesn’t matter, since we’re not accessing any actual elements. + MultiArray.factory(MADataType.INT, Array.fill(rank)(0)) def toLongArray(multiArray: MultiArray): Box[Array[Long]] = multiArray.getDataType match { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala index ce417854119..bef05eaa774 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala @@ -22,10 +22,10 @@ object Zarr3Array extends LazyLogging with FoxImplicits { channelIndex: Option[Int], additionalAxes: Option[Seq[AdditionalAxis]], sharedChunkContentsCache: AlfuCache[String, MultiArray])(implicit ec: ExecutionContext, - tc: TokenContext): Fox[Zarr3Array] = { - val headerPath = path / Zarr3ArrayHeader.FILENAME_ZARR_JSON + tc: TokenContext): Fox[Zarr3Array] = for { - headerBytes <- headerPath.readBytes() ?~> s"Could not read header at $headerPath" + headerBytes <- (path / Zarr3ArrayHeader.FILENAME_ZARR_JSON) + .readBytes() ?~> s"Could not read header at ${Zarr3ArrayHeader.FILENAME_ZARR_JSON}" header <- JsonHelper.parseAs[Zarr3ArrayHeader](headerBytes).toFox ?~> "Could not parse array header" array <- tryo( new Zarr3Array(path, @@ -37,7 +37,6 @@ object Zarr3Array extends LazyLogging with FoxImplicits { additionalAxes, sharedChunkContentsCache)).toFox ?~> "Could not open zarr3 array" } yield array - } } class Zarr3Array(vaultPath: VaultPath, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala index 9ad442f8f84..4cb636c8472 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala @@ -29,8 +29,14 @@ import scala.collection.compat.immutable.ArraySeq class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConverter { private val cumsumFileName = "cumsum.json" - // TODO other keys, also in zarr case + private val keySegmentToAgglomerate = "/segment_to_agglomerate" + private val keyAgglomerateToSegmentsOffsets = "/agglomerate_to_segments_offsets" + private val keyAgglomerateToSegments = "/agglomerate_to_segments" + private val keyAgglomerateToPositions = "/agglomerate_to_positions" + private val keyAgglomerateToEdges = "/agglomerate_to_edges" + private val keyAgglomerateToEdgesOffsets = "/agglomerate_to_edges_offsets" + private val keyAgglomerateToAffinities = "/agglomerate_to_affinities" private lazy val agglomerateFileCache = new AgglomerateFileCache( config.Datastore.Cache.AgglomerateFile.maxFileHandleEntries) @@ -48,7 +54,7 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv def largestAgglomerateId(agglomerateFileKey: AgglomerateFileKey): Box[Long] = tryo { val reader = openHdf5(agglomerateFileKey) - reader.`object`().getNumberOfElements("/agglomerate_to_segments_offsets") - 1L + reader.`object`().getNumberOfElements(keyAgglomerateToSegmentsOffsets) - 1L } def applyAgglomerate(agglomerateFileKey: AgglomerateFileKey, request: DataServiceDataRequest)( @@ -117,9 +123,9 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv try { val reader = openHdf5(agglomerateFileKey) val positionsRange: Array[Long] = - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId) + reader.uint64().readArrayBlockWithOffset(keyAgglomerateToSegmentsOffsets, 2, agglomerateId) val edgesRange: Array[Long] = - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_edges_offsets", 2, agglomerateId) + reader.uint64().readArrayBlockWithOffset(keyAgglomerateToEdgesOffsets, 2, agglomerateId) val nodeCount = positionsRange(1) - positionsRange(0) val edgeCount = edgesRange(1) - edgesRange(0) @@ -134,15 +140,13 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv if (nodeCount == 0L) { Array.empty[Array[Long]] } else { - reader - .uint64() - .readMatrixBlockWithOffset("/agglomerate_to_positions", nodeCount.toInt, 3, positionsRange(0), 0) + reader.uint64().readMatrixBlockWithOffset(keyAgglomerateToPositions, nodeCount.toInt, 3, positionsRange(0), 0) } val edges: Array[Array[Long]] = { if (edgeCount == 0L) { Array.empty[Array[Long]] } else { - reader.uint64().readMatrixBlockWithOffset("/agglomerate_to_edges", edgeCount.toInt, 2, edgesRange(0), 0) + reader.uint64().readMatrixBlockWithOffset(keyAgglomerateToEdges, edgeCount.toInt, 2, edgesRange(0), 0) } } @@ -182,9 +186,9 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv val reader = openHdf5(agglomerateFileKey) val positionsRange: Array[Long] = - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId) + reader.uint64().readArrayBlockWithOffset(keyAgglomerateToSegmentsOffsets, 2, agglomerateId) val edgesRange: Array[Long] = - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_edges_offsets", 2, agglomerateId) + reader.uint64().readArrayBlockWithOffset(keyAgglomerateToEdgesOffsets, 2, agglomerateId) val nodeCount = positionsRange(1) - positionsRange(0) val edgeCount = edgesRange(1) - edgesRange(0) @@ -198,21 +202,19 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv val segmentIds: Array[Long] = if (nodeCount == 0L) Array[Long]() else - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments", nodeCount.toInt, positionsRange(0)) + reader.uint64().readArrayBlockWithOffset(keyAgglomerateToSegments, nodeCount.toInt, positionsRange(0)) val positions: Array[Array[Long]] = if (nodeCount == 0L) Array[Array[Long]]() else - reader - .uint64() - .readMatrixBlockWithOffset("/agglomerate_to_positions", nodeCount.toInt, 3, positionsRange(0), 0) + reader.uint64().readMatrixBlockWithOffset(keyAgglomerateToPositions, nodeCount.toInt, 3, positionsRange(0), 0) val edges: Array[Array[Long]] = if (edgeCount == 0L) Array[Array[Long]]() else - reader.uint64().readMatrixBlockWithOffset("/agglomerate_to_edges", edgeCount.toInt, 2, edgesRange(0), 0) + reader.uint64().readMatrixBlockWithOffset(keyAgglomerateToEdges, edgeCount.toInt, 2, edgesRange(0), 0) val affinities: Array[Float] = if (edgeCount == 0L) Array[Float]() else - reader.float32().readArrayBlockWithOffset("/agglomerate_to_affinities", edgeCount.toInt, edgesRange(0)) + reader.float32().readArrayBlockWithOffset(keyAgglomerateToAffinities, edgeCount.toInt, edgesRange(0)) AgglomerateGraph( // unsafeWrapArray is fine, because the underlying arrays are never mutated @@ -229,13 +231,13 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv tryo { val reader = openHdf5(agglomerateFileKey) val positionsRange: Array[Long] = - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId) + reader.uint64().readArrayBlockWithOffset(keyAgglomerateToSegmentsOffsets, 2, agglomerateId) val segmentCount = positionsRange(1) - positionsRange(0) val segmentIds: Array[Long] = if (segmentCount == 0) Array.empty[Long] else { - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments", segmentCount.toInt, positionsRange(0)) + reader.uint64().readArrayBlockWithOffset(keyAgglomerateToSegments, segmentCount.toInt, positionsRange(0)) } segmentIds.toSeq } @@ -247,9 +249,9 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv reader.uint64().readArrayBlockWithOffset(keySegmentToAgglomerate, 1, segmentId)) agglomerateId = agglomerateIdArr(0) segmentsRange: Array[Long] <- tryo( - reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments_offsets", 2, agglomerateId)) + reader.uint64().readArrayBlockWithOffset(keyAgglomerateToSegmentsOffsets, 2, agglomerateId)) segmentIndex <- binarySearchForSegment(segmentsRange(0), segmentsRange(1), segmentId, reader) - position <- tryo(reader.uint64().readMatrixBlockWithOffset("/agglomerate_to_positions", 1, 3, segmentIndex, 0)(0)) + position <- tryo(reader.uint64().readMatrixBlockWithOffset(keyAgglomerateToPositions, 1, 3, segmentIndex, 0)(0)) } yield Vec3Int(position(0).toInt, position(1).toInt, position(2).toInt) } @@ -261,7 +263,7 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv if (rangeStart > rangeEnd) Failure("Could not find segmentId in agglomerate file") else { val middle = rangeStart + (rangeEnd - rangeStart) / 2 - val segmentIdAtMiddle: Long = reader.uint64().readArrayBlockWithOffset("/agglomerate_to_segments", 1, middle)(0) + val segmentIdAtMiddle: Long = reader.uint64().readArrayBlockWithOffset(keyAgglomerateToSegments, 1, middle)(0) if (segmentIdAtMiddle == segmentId) Full(middle) else if (segmentIdAtMiddle < segmentId) binarySearchForSegment(middle + 1L, rangeEnd, segmentId, reader) else binarySearchForSegment(rangeStart, middle - 1L, segmentId, reader) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala index f28e5e9a9e5..ed420dcf428 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala @@ -34,6 +34,14 @@ class ZarrAgglomerateService(config: DataStoreConfig, protected lazy val bucketScanner = new NativeBucketScanner() + private val keySegmentToAgglomerate = "segment_to_agglomerate" + private val keyAgglomerateToSegmentsOffsets = "agglomerate_to_segments_offsets" + private val keyAgglomerateToSegments = "agglomerate_to_segments" + private val keyAgglomerateToPositions = "agglomerate_to_positions" + private val keyAgglomerateToEdges = "agglomerate_to_edges" + private val keyAgglomerateToEdgesOffsets = "agglomerate_to_edges_offsets" + private val keyAgglomerateToAffinities = "agglomerate_to_affinities" + private def mapSingleSegment(segmentToAgglomerate: DatasetArray, segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = for { @@ -78,7 +86,7 @@ class ZarrAgglomerateService(config: DataStoreConfig, bucketScanner.collectSegmentIds(data, bytesPerElement, isSigned = false, skipZeroes = false) for { - segmentToAgglomerate <- openZarrArrayCached(agglomerateFileKey, "segment_to_agglomerate") + segmentToAgglomerate <- openZarrArrayCached(agglomerateFileKey, keySegmentToAgglomerate) relevantAgglomerateMap: Map[Long, Long] <- Fox .serialCombined(distinctSegmentIds) { segmentId => mapSingleSegment(segmentToAgglomerate, segmentId).map((segmentId, _)) @@ -106,21 +114,22 @@ class ZarrAgglomerateService(config: DataStoreConfig, def generateSkeleton(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[SkeletonTracing] = for { - agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") - agglomerateToEdgesOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges_offsets") + agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, keyAgglomerateToSegmentsOffsets) + agglomerateToEdgesOffsets <- openZarrArrayCached(agglomerateFileKey, keyAgglomerateToEdgesOffsets) positionsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) - edgesRange: MultiArray <- agglomerateToEdgesOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) nodeCount <- tryo(positionsRange.getLong(1) - positionsRange.getLong(0)).toFox - edgeCount <- tryo(edgesRange.getLong(1) - edgesRange.getLong(0)).toFox + edgesRange: MultiArray <- agglomerateToEdgesOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) + edgesOffset <- tryo(edgesRange.getLong(0)).toFox + edgeCount <- tryo(edgesRange.getLong(1) - edgesOffset).toFox edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges _ <- Fox.fromBool(nodeCount <= edgeLimit) ?~> s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)" _ <- Fox.fromBool(edgeCount <= edgeLimit) ?~> s"Agglomerate has too many edges ($edgeCount > $edgeLimit)" - agglomerateToPositions <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_positions") + agglomerateToPositions <- openZarrArrayCached(agglomerateFileKey, keyAgglomerateToPositions) positions <- agglomerateToPositions.readAsMultiArray(offset = Array(positionsRange.getLong(0), 0), shape = Array(nodeCount.toInt, 3)) - agglomerateToEdges <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges") - edges: MultiArray <- agglomerateToEdges.readAsMultiArray(offset = Array(edgesRange.getLong(0), 0), + agglomerateToEdges <- openZarrArrayCached(agglomerateFileKey, keyAgglomerateToEdges) + edges: MultiArray <- agglomerateToEdges.readAsMultiArray(offset = Array(edgesOffset, 0), shape = Array(edgeCount.toInt, 2)) nodeIdStartAtOneOffset = 1 @@ -163,7 +172,7 @@ class ZarrAgglomerateService(config: DataStoreConfig, def largestAgglomerateId(agglomerateFileKey: AgglomerateFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = for { - array <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") + array <- openZarrArrayCached(agglomerateFileKey, keyAgglomerateToSegmentsOffsets) shape <- array.datasetShape.toFox ?~> "Could not determine array shape" shapeFirstElement <- tryo(shape(0)).toFox } yield shapeFirstElement @@ -172,29 +181,29 @@ class ZarrAgglomerateService(config: DataStoreConfig, implicit ec: ExecutionContext, tc: TokenContext): Fox[AgglomerateGraph] = for { - agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") - agglomerateToEdgesOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges_offsets") + agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, keyAgglomerateToSegmentsOffsets) + agglomerateToEdgesOffsets <- openZarrArrayCached(agglomerateFileKey, keyAgglomerateToEdgesOffsets) positionsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) edgesRange: MultiArray <- agglomerateToEdgesOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) nodeCount <- tryo(positionsRange.getLong(1) - positionsRange.getLong(0)).toFox - edgeCount <- tryo(edgesRange.getLong(1) - edgesRange.getLong(0)).toFox + edgesOffset <- tryo(edgesRange.getLong(0)).toFox + edgeCount <- tryo(edgesRange.getLong(1) - edgesOffset).toFox edgeLimit = config.Datastore.AgglomerateSkeleton.maxEdges _ <- Fox.fromBool(nodeCount <= edgeLimit) ?~> s"Agglomerate has too many nodes ($nodeCount > $edgeLimit)" _ <- Fox.fromBool(edgeCount <= edgeLimit) ?~> s"Agglomerate has too many edges ($edgeCount > $edgeLimit)" - agglomerateToPositions <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_positions") + agglomerateToPositions <- openZarrArrayCached(agglomerateFileKey, keyAgglomerateToPositions) positions: MultiArray <- agglomerateToPositions.readAsMultiArray(offset = Array(positionsRange.getLong(0), 0), shape = Array(nodeCount.toInt, 3)) - agglomerateToSegments <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments") + agglomerateToSegments <- openZarrArrayCached(agglomerateFileKey, keyAgglomerateToSegments) segmentIdsMA: MultiArray <- agglomerateToSegments.readAsMultiArray(offset = positionsRange.getInt(0), shape = nodeCount.toInt) segmentIds: Array[Long] <- MultiArrayUtils.toLongArray(segmentIdsMA).toFox - agglomerateToEdges <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_edges") - edges: MultiArray <- agglomerateToEdges.readAsMultiArray(offset = Array(edgesRange.getLong(0), 0), + agglomerateToEdges <- openZarrArrayCached(agglomerateFileKey, keyAgglomerateToEdges) + edges: MultiArray <- agglomerateToEdges.readAsMultiArray(offset = Array(edgesOffset, 0), shape = Array(edgeCount.toInt, 2)) - agglomerateToAffinities <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_affinities") - affinities: MultiArray <- agglomerateToAffinities.readAsMultiArray(offset = edgesRange.getLong(0), - shape = edgeCount.toInt) + agglomerateToAffinities <- openZarrArrayCached(agglomerateFileKey, keyAgglomerateToAffinities) + affinities: MultiArray <- agglomerateToAffinities.readAsMultiArray(offset = edgesOffset, shape = edgeCount.toInt) agglomerateGraph <- tryo { AgglomerateGraph( @@ -221,8 +230,8 @@ class ZarrAgglomerateService(config: DataStoreConfig, def segmentIdsForAgglomerateId(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = for { - agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") - agglomerateToSegments <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments") + agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, keyAgglomerateToSegmentsOffsets) + agglomerateToSegments <- openZarrArrayCached(agglomerateFileKey, keyAgglomerateToSegments) segmentRange <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) segmentOffset <- tryo(segmentRange.getLong(0)).toFox segmentCount <- tryo(segmentRange.getLong(1) - segmentOffset).toFox @@ -238,7 +247,7 @@ class ZarrAgglomerateService(config: DataStoreConfig, implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = for { - segmentToAgglomerate <- openZarrArrayCached(agglomerateFileKey, "segment_to_agglomerate") + segmentToAgglomerate <- openZarrArrayCached(agglomerateFileKey, keySegmentToAgglomerate) agglomerateIds <- Fox.serialCombined(segmentIds) { segmentId => mapSingleSegment(segmentToAgglomerate, segmentId) } @@ -247,16 +256,16 @@ class ZarrAgglomerateService(config: DataStoreConfig, def positionForSegmentId(agglomerateFileKey: AgglomerateFileKey, segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Vec3Int] = for { - segmentToAgglomerate <- openZarrArrayCached(agglomerateFileKey, "segment_to_agglomerate") + segmentToAgglomerate <- openZarrArrayCached(agglomerateFileKey, keySegmentToAgglomerate) agglomerateId <- mapSingleSegment(segmentToAgglomerate, segmentId) - agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments_offsets") + agglomerateToSegmentsOffsets <- openZarrArrayCached(agglomerateFileKey, keyAgglomerateToSegmentsOffsets) segmentsRange: MultiArray <- agglomerateToSegmentsOffsets.readAsMultiArray(offset = agglomerateId, shape = 2) - agglomerateToSegments <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_segments") + agglomerateToSegments <- openZarrArrayCached(agglomerateFileKey, keyAgglomerateToSegments) segmentIndex <- binarySearchForSegment(segmentsRange.getLong(0), segmentsRange.getLong(1), segmentId, agglomerateToSegments) - agglomerateToPositions <- openZarrArrayCached(agglomerateFileKey, "agglomerate_to_positions") + agglomerateToPositions <- openZarrArrayCached(agglomerateFileKey, keyAgglomerateToPositions) position <- agglomerateToPositions.readAsMultiArray(offset = Array(segmentIndex, 0), shape = Array(3, 1)) } yield Vec3Int(position.getInt(0), position.getInt(1), position.getInt(2)) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/RemoteSourceDescriptorService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/RemoteSourceDescriptorService.scala index 7d65109e6ad..84dda12083c 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/RemoteSourceDescriptorService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/RemoteSourceDescriptorService.scala @@ -62,12 +62,12 @@ class RemoteSourceDescriptorService @Inject()(dSRemoteWebknossosClient: DSRemote throw new Exception( s"Absolute path $localPath in local file system is not in path whitelist. Consider adding it to datastore.localDirectoryWhitelist") } else { // relative local path, resolve in dataset dir - val magPathRelativeToDataset = localDatasetDir.resolve(localPath) - val magPathRelativeToLayer = localDatasetDir.resolve(layerName).resolve(localPath) - if (magPathRelativeToDataset.toFile.exists) { - magPathRelativeToDataset.toUri + val pathRelativeToDataset = localDatasetDir.resolve(localPath) + val pathRelativeToLayer = localDatasetDir.resolve(layerName).resolve(localPath) + if (pathRelativeToDataset.toFile.exists) { + pathRelativeToDataset.toUri } else { - magPathRelativeToLayer.toUri + pathRelativeToLayer.toUri } } } else { From 09dfc0ce379493d0c02705bfa5561fcda294c7e4 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 5 Jun 2025 09:08:49 +0200 Subject: [PATCH 031/100] changelog --- CHANGELOG.unreleased.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.unreleased.md b/CHANGELOG.unreleased.md index 61c76046a41..aaf7f3f39d1 100644 --- a/CHANGELOG.unreleased.md +++ b/CHANGELOG.unreleased.md @@ -13,6 +13,7 @@ For upgrade instructions, please check the [migration guide](MIGRATIONS.released ### Added - Added the ability to duplicate trees in skeleton annotations. Users can create a copy of any tree (including all nodes, edges, and properties) via the context menu in the skeleton tab. [#8662](https://github.com/scalableminds/webknossos/pull/8662) - Meshes are now reloaded using their previous opacity value. [#8622](https://github.com/scalableminds/webknossos/pull/8622) +- Agglomerate Mappings can now also be read from the new zarr3-based format, and from remote object storage. [#8633](https://github.com/scalableminds/webknossos/pull/8633) ### Changed From 10dc7c378cf24b462c9c0e5f75a7be5ceb7bbe08 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 10 Jun 2025 09:48:03 +0200 Subject: [PATCH 032/100] make dummy datasource id more explicit --- .../webknossos/datastore/services/ZarrAgglomerateService.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala index ed420dcf428..868c1ca70d6 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala @@ -59,7 +59,7 @@ class ZarrAgglomerateService(config: DataStoreConfig, groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(agglomerateFileKey.attachment.path, None)) segmentToAgglomeratePath = groupVaultPath / zarrArrayName zarrArray <- Zarr3Array.open(segmentToAgglomeratePath, - DataSourceId("zarr", "test"), + DataSourceId("dummy", "unused"), "layer", None, None, From 73b2f8065a7be3ff074fe5d6cf0e1df4437df128 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 10 Jun 2025 10:58:06 +0200 Subject: [PATCH 033/100] WIP Read Zarr Meshfiles --- .../datastore/DataStoreModule.scala | 1 + .../services/BinaryDataServiceHolder.scala | 26 ++++--------------- .../services/ChunkCacheService.scala | 24 +++++++++++++++++ .../services/mesh/ZarrMeshFileService.scala | 8 ++++++ 4 files changed, 38 insertions(+), 21 deletions(-) create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ChunkCacheService.scala create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala index 2c5f1c63239..51fe3cd5590 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala @@ -33,5 +33,6 @@ class DataStoreModule extends AbstractModule { bind(classOf[MeshFileService]).asEagerSingleton() bind(classOf[NeuroglancerPrecomputedMeshFileService]).asEagerSingleton() bind(classOf[RemoteSourceDescriptorService]).asEagerSingleton() + bind(classOf[ChunkCacheService]).asEagerSingleton() } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala index f5b2736ea42..db99771d13b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala @@ -1,13 +1,9 @@ package com.scalableminds.webknossos.datastore.services -import com.scalableminds.util.cache.AlfuCache - import java.nio.file.Paths import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.storage.{DataVaultService, RemoteSourceDescriptorService} import com.typesafe.scalalogging.LazyLogging -import net.liftweb.common.{Box, Full} -import ucar.ma2.{Array => MultiArray} import javax.inject.Inject import scala.concurrent.ExecutionContext @@ -22,25 +18,13 @@ import scala.concurrent.ExecutionContext class BinaryDataServiceHolder @Inject()(config: DataStoreConfig, remoteSourceDescriptorService: RemoteSourceDescriptorService, datasetErrorLoggingService: DSDatasetErrorLoggingService, + chunkCacheService: ChunkCacheService, dataVaultService: DataVaultService)(implicit ec: ExecutionContext) extends LazyLogging { - lazy val sharedChunkContentsCache: AlfuCache[String, MultiArray] = { - // Used by DatasetArray-based datasets. Measure item weight in kilobytes because the weigher can only return int, not long - - val maxSizeKiloBytes = Math.floor(config.Datastore.Cache.ImageArrayChunks.maxSizeBytes.toDouble / 1000.0).toInt - - def cacheWeight(key: String, arrayBox: Box[MultiArray]): Int = - arrayBox match { - case Full(array) => - (array.getSizeBytes / 1000L).toInt - case _ => 0 - } - - AlfuCache(maxSizeKiloBytes, weighFn = Some(cacheWeight)) - } - - val zarrAgglomerateService = new ZarrAgglomerateService(config, dataVaultService, sharedChunkContentsCache) + // TODO make them injectable again + val zarrAgglomerateService = + new ZarrAgglomerateService(config, dataVaultService, chunkCacheService.sharedChunkContentsCache) val hdf5AgglomerateService = new Hdf5AgglomerateService(config) val agglomerateService = new AgglomerateService( @@ -54,7 +38,7 @@ class BinaryDataServiceHolder @Inject()(config: DataStoreConfig, Paths.get(config.Datastore.baseDirectory), Some(agglomerateService), Some(remoteSourceDescriptorService), - Some(sharedChunkContentsCache), + Some(chunkCacheService.sharedChunkContentsCache), datasetErrorLoggingService ) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ChunkCacheService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ChunkCacheService.scala new file mode 100644 index 00000000000..36e2db3f014 --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ChunkCacheService.scala @@ -0,0 +1,24 @@ +package com.scalableminds.webknossos.datastore.services + +import com.scalableminds.util.cache.AlfuCache +import com.scalableminds.webknossos.datastore.DataStoreConfig +import net.liftweb.common.{Box, Full} +import ucar.ma2.{Array => MultiArray} +import jakarta.inject.Inject + +class ChunkCacheService @Inject()(config: DataStoreConfig) { + lazy val sharedChunkContentsCache: AlfuCache[String, MultiArray] = { + // Used by DatasetArray-based datasets. Measure item weight in kilobytes because the weigher can only return int, not long + + val maxSizeKiloBytes = Math.floor(config.Datastore.Cache.ImageArrayChunks.maxSizeBytes.toDouble / 1000.0).toInt + + def cacheWeight(key: String, arrayBox: Box[MultiArray]): Int = + arrayBox match { + case Full(array) => + (array.getSizeBytes / 1000L).toInt + case _ => 0 + } + + AlfuCache(maxSizeKiloBytes, weighFn = Some(cacheWeight)) + } +} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala new file mode 100644 index 00000000000..420d3ceb9a9 --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala @@ -0,0 +1,8 @@ +package com.scalableminds.webknossos.datastore.services.mesh + +import javax.inject.Inject + + +class ZarrMeshFileService @Inject()() { + +} From ca30ebe97b3908efd090fa3419dccb4c1f6181b5 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 10 Jun 2025 11:54:04 +0200 Subject: [PATCH 034/100] read metadata from zarr group header --- .../controllers/ZarrStreamingController.scala | 4 +-- .../datareaders/zarr3/Zarr3GroupHeader.scala | 7 +++++ .../services/mesh/ZarrMeshFileService.scala | 27 ++++++++++++++++++- ...VolumeTracingZarrStreamingController.scala | 2 +- 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/ZarrStreamingController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/ZarrStreamingController.scala index c06bd6367bb..2dad0515110 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/ZarrStreamingController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/ZarrStreamingController.scala @@ -83,7 +83,7 @@ class ZarrStreamingController @Inject()( dataSource.scale, dataLayer.sortedMags, dataLayer.additionalAxes) - zarr3GroupHeader = Zarr3GroupHeader(3, "group", Some(omeNgffHeaderV0_5)) + zarr3GroupHeader = Zarr3GroupHeader(3, "group", Some(omeNgffHeaderV0_5), None) } yield Ok(Json.toJson(zarr3GroupHeader)) } } @@ -132,7 +132,7 @@ class ZarrStreamingController @Inject()( dataSource.scale, dataLayer.sortedMags, dataLayer.additionalAxes) - zarr3GroupHeader = Zarr3GroupHeader(3, "group", Some(dataSourceOmeNgffHeader)) + zarr3GroupHeader = Zarr3GroupHeader(3, "group", Some(dataSourceOmeNgffHeader), None) } yield Ok(Json.toJson(zarr3GroupHeader)) ) } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3GroupHeader.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3GroupHeader.scala index 396f830288b..de1eb77ed65 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3GroupHeader.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3GroupHeader.scala @@ -1,15 +1,19 @@ package com.scalableminds.webknossos.datastore.datareaders.zarr3 import com.scalableminds.webknossos.datastore.datareaders.zarr.NgffMetadataV0_5 +import com.scalableminds.webknossos.datastore.services.mesh.MeshfileAttributes import play.api.libs.json._ case class Zarr3GroupHeader( zarr_format: Int, // must be 3 node_type: String, // must be "group" ngffMetadata: Option[NgffMetadataV0_5], + meshfileAttributes: Option[MeshfileAttributes] ) object Zarr3GroupHeader { + def FILENAME_ZARR_JSON = "zarr.json" + implicit object Zarr3GroupHeaderFormat extends Format[Zarr3GroupHeader] { override def reads(json: JsValue): JsResult[Zarr3GroupHeader] = for { @@ -17,11 +21,14 @@ object Zarr3GroupHeader { node_type <- (json \ "node_type").validate[String] // Read the metadata from the correct json path. ngffMetadata <- (json \ "attributes" \ "ome").validateOpt[NgffMetadataV0_5] + // TODO unify. also, include in Writes + meshfileAttributes <- (json \ "attributes").validateOpt[MeshfileAttributes] } yield Zarr3GroupHeader( zarr_format, node_type, ngffMetadata, + meshfileAttributes ) override def writes(zarrArrayGroup: Zarr3GroupHeader): JsValue = diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala index 420d3ceb9a9..f2810645198 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala @@ -1,8 +1,33 @@ package com.scalableminds.webknossos.datastore.services.mesh +import com.scalableminds.util.accesscontext.TokenContext +import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} +import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3GroupHeader +import com.scalableminds.webknossos.datastore.datavault.VaultPath +import com.scalableminds.webknossos.datastore.services.ChunkCacheService +import play.api.libs.json.{Json, OFormat} + import javax.inject.Inject +import scala.concurrent.ExecutionContext + +case class MeshfileAttributes( + mesh_format: String, + lod_scale_multiplier: Double, + transform: Array[Array[Double]] +) + +object MeshfileAttributes { + implicit val jsonFormat: OFormat[MeshfileAttributes] = Json.format[MeshfileAttributes] +} +class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService) extends FoxImplicits { -class ZarrMeshFileService @Inject()() { + def readMeshfileMetadata(meshFilePath: VaultPath)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[(String, Double, Array[Array[Double]])] = + for { + groupHeaderBytes <- (meshFilePath / Zarr3GroupHeader.FILENAME_ZARR_JSON).readBytes() + groupHeader <- JsonHelper.parseAs[Zarr3GroupHeader](groupHeaderBytes).toFox ?~> "Could not parse array header" + meshfileAttributes <- groupHeader.meshfileAttributes.toFox ?~> "Could not parse meshfile attributes from zarr group file" + } yield (meshfileAttributes.mesh_format, meshfileAttributes.lod_scale_multiplier, meshfileAttributes.transform) } diff --git a/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/controllers/VolumeTracingZarrStreamingController.scala b/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/controllers/VolumeTracingZarrStreamingController.scala index 9c24db0cb66..01952f03c97 100644 --- a/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/controllers/VolumeTracingZarrStreamingController.scala +++ b/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/controllers/VolumeTracingZarrStreamingController.scala @@ -257,7 +257,7 @@ class VolumeTracingZarrStreamingController @Inject()( dataSourceVoxelSize = dataSource.scale, mags = sortedExistingMags, additionalAxes = dataSource.additionalAxesUnion) - zarr3GroupHeader = Zarr3GroupHeader(3, "group", Some(omeNgffHeader)) + zarr3GroupHeader = Zarr3GroupHeader(3, "group", Some(omeNgffHeader), None) } yield Ok(Json.toJson(zarr3GroupHeader)) } } From 29073e8c917267eca4a26d117fca72dc5f9b2444 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 10 Jun 2025 13:20:06 +0200 Subject: [PATCH 035/100] wip read neuroglancer segment manifests --- .../services/mesh/ZarrMeshFileService.scala | 77 +++++++++++++++++-- 1 file changed, 72 insertions(+), 5 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala index f2810645198..199bf7c8afe 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala @@ -2,9 +2,12 @@ package com.scalableminds.webknossos.datastore.services.mesh import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} -import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3GroupHeader +import com.scalableminds.webknossos.datastore.datareaders.DatasetArray +import com.scalableminds.webknossos.datastore.datareaders.zarr3.{Zarr3Array, Zarr3GroupHeader} import com.scalableminds.webknossos.datastore.datavault.VaultPath -import com.scalableminds.webknossos.datastore.services.ChunkCacheService +import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId +import com.scalableminds.webknossos.datastore.services.{ChunkCacheService, Hdf5HashedArrayUtils} +import net.liftweb.common.Box.tryo import play.api.libs.json.{Json, OFormat} import javax.inject.Inject @@ -13,14 +16,24 @@ import scala.concurrent.ExecutionContext case class MeshfileAttributes( mesh_format: String, lod_scale_multiplier: Double, - transform: Array[Array[Double]] -) + transform: Array[Array[Double]], + hash_function: String, + n_buckets: Int // TODO camelCase + custom format? +) extends Hdf5HashedArrayUtils { + lazy val applyHashFunction: Long => Long = getHashFunction(hash_function) +} object MeshfileAttributes { implicit val jsonFormat: OFormat[MeshfileAttributes] = Json.format[MeshfileAttributes] } -class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService) extends FoxImplicits { +class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService) + extends FoxImplicits + with NeuroglancerMeshHelper { + + private val keyBucketOffsets = "bucket_offsets" + private val keyBuckets = "buckets" + private val keyNeuroglancer = "neuroglancer" def readMeshfileMetadata(meshFilePath: VaultPath)(implicit ec: ExecutionContext, tc: TokenContext): Fox[(String, Double, Array[Array[Double]])] = @@ -30,4 +43,58 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService) extend meshfileAttributes <- groupHeader.meshfileAttributes.toFox ?~> "Could not parse meshfile attributes from zarr group file" } yield (meshfileAttributes.mesh_format, meshfileAttributes.lod_scale_multiplier, meshfileAttributes.transform) + def listMeshChunksForSegment(meshFilePath: VaultPath, segmentId: Long, meshfileAttributes: MeshfileAttributes)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[List[MeshLodInfo]] = + for { + (neuroglancerSegmentManifestStart, neuroglancerSegmentManifestEnd) <- getNeuroglancerSegmentManifestOffsets( + meshFilePath, + meshfileAttributes, + segmentId) + neuroglancerArray <- openZarrArray(meshFilePath, keyNeuroglancer) + manifestBytes <- neuroglancerArray.readAsMultiArray( + offset = neuroglancerSegmentManifestStart, + shape = (neuroglancerSegmentManifestEnd - neuroglancerSegmentManifestStart).toInt) + segmentManifest <- tryo(NeuroglancerSegmentManifest.fromBytes(manifestBytes.getStorage.asInstanceOf[Array[Byte]])).toFox + } yield + enrichSegmentInfo(segmentManifest, + meshfileAttributes.lod_scale_multiplier, + meshfileAttributes.transform, + neuroglancerSegmentManifestStart, + segmentId) + + private def getNeuroglancerSegmentManifestOffsets( + meshFilePath: VaultPath, + meshfileAttributes: MeshfileAttributes, + segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[(Long, Long)] = { + val bucketIndex = meshfileAttributes.applyHashFunction(segmentId) % meshfileAttributes.n_buckets + for { + bucketOffsetsArray <- openZarrArray(meshFilePath, keyBucketOffsets) + bucketRange <- bucketOffsetsArray.readAsMultiArray(offset = bucketIndex, shape = 2) + bucketStart <- tryo(bucketRange.getLong(0)).toFox + bucketEnd <- tryo(bucketRange.getLong(1)).toFox + _ <- Fox.fromBool(bucketEnd - bucketStart > 0) ?~> s"No entry for segment $segmentId" + bucketsArray <- openZarrArray(meshFilePath, keyBuckets) + buckets <- bucketsArray.readAsMultiArray(offset = Array(bucketStart, 0), + shape = Array((bucketEnd - bucketStart + 1).toInt, 3)) + bucketLocalOffset = 0 // TODO buckets.map(_(0)).indexOf(segmentId) + _ <- Fox.fromBool(bucketLocalOffset >= 0) ?~> s"SegmentId $segmentId not in bucket list" + neuroglancerStart = buckets.getLong(buckets.getIndex.set(Array(bucketLocalOffset, 1))) + neuroglancerEnd = buckets.getLong(buckets.getIndex.set(Array(bucketLocalOffset, 2))) + } yield (neuroglancerStart, neuroglancerEnd) + } + + private def openZarrArray(meshFilePath: VaultPath, zarrArrayName: String)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[DatasetArray] = { + val arrayPath = meshFilePath / zarrArrayName + for { + zarrArray <- Zarr3Array.open(arrayPath, + DataSourceId("dummy", "unused"), + "layer", + None, + None, + None, + chunkCacheService.sharedChunkContentsCache) + } yield zarrArray + } } From 41de8c878a85d5d5eb49904252524d7627a4afbc Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 10 Jun 2025 14:00:30 +0200 Subject: [PATCH 036/100] enrich --- .../datastore/services/mesh/MeshFileService.scala | 3 ++- .../services/mesh/NeuroglancerMeshHelper.scala | 1 + .../services/mesh/ZarrMeshFileService.scala | 13 +++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala index 92f04699283..a7a1f565224 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala @@ -73,7 +73,8 @@ object MeshFileInfo { implicit val jsonFormat: OFormat[MeshFileInfo] = Json.format[MeshFileInfo] } -class MeshFileService @Inject()(config: DataStoreConfig)(implicit ec: ExecutionContext) +class MeshFileService @Inject()(config: DataStoreConfig, zarrMeshFileService: ZarrMeshFileService)( + implicit ec: ExecutionContext) extends FoxImplicits with LazyLogging with Hdf5HashedArrayUtils diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerMeshHelper.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerMeshHelper.scala index 415e56b618e..c8dd667b284 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerMeshHelper.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerMeshHelper.scala @@ -125,6 +125,7 @@ object WebknossosSegmentInfo { } } + trait NeuroglancerMeshHelper { def computeGlobalPosition(segmentInfo: NeuroglancerSegmentManifest, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala index 199bf7c8afe..b9ef3269f7a 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala @@ -1,6 +1,7 @@ package com.scalableminds.webknossos.datastore.services.mesh import com.scalableminds.util.accesscontext.TokenContext +import com.scalableminds.util.geometry.Vec3Float import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} import com.scalableminds.webknossos.datastore.datareaders.DatasetArray import com.scalableminds.webknossos.datastore.datareaders.zarr3.{Zarr3Array, Zarr3GroupHeader} @@ -97,4 +98,16 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService) chunkCacheService.sharedChunkContentsCache) } yield zarrArray } + + override def computeGlobalPosition(segmentInfo: NeuroglancerSegmentManifest, + lod: Int, + lodScaleMultiplier: Double, + currentChunk: Int): Vec3Float = + segmentInfo.gridOrigin + segmentInfo.chunkPositions(lod)(currentChunk).toVec3Float * segmentInfo.chunkShape * Math + .pow(2, lod) * segmentInfo.lodScales(lod) * lodScaleMultiplier + + override def getLodTransform(segmentInfo: NeuroglancerSegmentManifest, + lodScaleMultiplier: Double, + transform: Array[Array[Double]], + lod: Int): Array[Array[Double]] = transform } From adb7d07e8795632e94d4b30ecd7ce654342e578d Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 12 Jun 2025 11:03:28 +0200 Subject: [PATCH 037/100] find local offset in bucket --- .../services/mesh/ZarrMeshFileService.scala | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala index b9ef3269f7a..90991af3c2a 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala @@ -10,6 +10,7 @@ import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId import com.scalableminds.webknossos.datastore.services.{ChunkCacheService, Hdf5HashedArrayUtils} import net.liftweb.common.Box.tryo import play.api.libs.json.{Json, OFormat} +import ucar.ma2.{Array => MultiArray} import javax.inject.Inject import scala.concurrent.ExecutionContext @@ -74,17 +75,20 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService) bucketRange <- bucketOffsetsArray.readAsMultiArray(offset = bucketIndex, shape = 2) bucketStart <- tryo(bucketRange.getLong(0)).toFox bucketEnd <- tryo(bucketRange.getLong(1)).toFox - _ <- Fox.fromBool(bucketEnd - bucketStart > 0) ?~> s"No entry for segment $segmentId" + bucketSize = (bucketEnd - bucketStart).toInt + _ <- Fox.fromBool(bucketSize > 0) ?~> s"No entry for segment $segmentId" bucketsArray <- openZarrArray(meshFilePath, keyBuckets) - buckets <- bucketsArray.readAsMultiArray(offset = Array(bucketStart, 0), - shape = Array((bucketEnd - bucketStart + 1).toInt, 3)) - bucketLocalOffset = 0 // TODO buckets.map(_(0)).indexOf(segmentId) + bucket <- bucketsArray.readAsMultiArray(offset = Array(bucketStart, 0), shape = Array(bucketSize + 1, 3)) + bucketLocalOffset <- findLocalOffsetInBucket(bucket, segmentId).toFox _ <- Fox.fromBool(bucketLocalOffset >= 0) ?~> s"SegmentId $segmentId not in bucket list" - neuroglancerStart = buckets.getLong(buckets.getIndex.set(Array(bucketLocalOffset, 1))) - neuroglancerEnd = buckets.getLong(buckets.getIndex.set(Array(bucketLocalOffset, 2))) + neuroglancerStart = bucket.getLong(bucket.getIndex.set(Array(bucketLocalOffset, 1))) + neuroglancerEnd = bucket.getLong(bucket.getIndex.set(Array(bucketLocalOffset, 2))) } yield (neuroglancerStart, neuroglancerEnd) } + private def findLocalOffsetInBucket(bucket: MultiArray, segmentId: Long): Option[Int] = + (0 until (bucket.getShape()(0))).find(idx => bucket.getLong(bucket.getIndex.set(Array(idx, 0))) == segmentId) + private def openZarrArray(meshFilePath: VaultPath, zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext): Fox[DatasetArray] = { val arrayPath = meshFilePath / zarrArrayName From e63fb2a61311e1820eeddb687f9ac120b5ee5945 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 12 Jun 2025 11:58:58 +0200 Subject: [PATCH 038/100] sort meshfile services, lookup with MeshFileKey --- .../controllers/DSMeshController.scala | 9 +- .../datasource/DatasetLayerAttachments.scala | 2 +- .../services/mesh/Hdf5MeshFileService.scala | 35 ++++ .../services/mesh/MeshFileService.scala | 151 +++++++++++------- .../services/mesh/MeshMappingHelper.scala | 4 +- ...uroglancerPrecomputedMeshFileService.scala | 4 +- .../services/mesh/ZarrMeshFileService.scala | 18 ++- .../datastore/storage/Hdf5FileCache.scala | 4 + 8 files changed, 158 insertions(+), 69 deletions(-) create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala index e526c626d12..10095758ec7 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala @@ -62,14 +62,11 @@ class DSMeshController @Inject()( accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { - _ <- Fox.successful(()) - mappingNameForMeshFile = meshFileService.mappingNameForMeshFile(organizationId, - datasetDirectoryName, - dataLayerName, - request.body.meshFile.name) (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) + meshFileKey <- meshFileService.lookUpMeshFile(dataSource.id, dataLayer, request.body.meshFile.name) + mappingNameForMeshFile <- meshFileService.mappingNameForMeshFile(meshFileKey).shiftBox segmentIds: Seq[Long] <- segmentIdsForAgglomerateIdIfNeeded( dataSource.id, dataLayer, @@ -80,7 +77,7 @@ class DSMeshController @Inject()( omitMissing = false ) chunkInfos <- if (request.body.meshFile.isNeuroglancerPrecomputed) { - neuroglancerPrecomputedMeshService.listMeshChunksForMultipleSegments(request.body.meshFile.path, segmentIds) + } else { meshFileService.listMeshChunksForSegmentsMerged(organizationId, datasetDirectoryName, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala index ec5d58b3a88..5673becb400 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala @@ -24,7 +24,7 @@ object DatasetLayerAttachments { object LayerAttachmentDataformat extends ExtendedEnumeration { type LayerAttachmentDataformat = Value - val hdf5, json, zarr3 = Value + val hdf5, json, zarr3, neuroglancerPrecomputed = Value } object LayerAttachmentType extends ExtendedEnumeration { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala new file mode 100644 index 00000000000..0dfcb1be5a1 --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala @@ -0,0 +1,35 @@ +package com.scalableminds.webknossos.datastore.services.mesh + +import com.scalableminds.webknossos.datastore.storage.Hdf5FileCache +import jakarta.inject.Inject +import net.liftweb.common.{Box, Full, Empty} + +class Hdf5MeshFileService @Inject()() { + + private lazy val meshFileCache = new Hdf5FileCache(30) + + def mappingNameForMeshFile(meshFileKey: MeshFileKey): Box[String] = { + val asOption = meshFileCache + .withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => + cachedMeshFile.stringReader.getAttr("/", "mapping_name") + } + .toOption + .flatMap { value => + Option(value) // catch null + } + + asOption match { + case Some(mappingName) => Full(mappingName) + case None => Empty + } + } + + def readMeshfileMetadata(meshFileKey: MeshFileKey): Box[(String, Double, Array[Array[Double]])] = + meshFileCache.withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => + val encoding = cachedMeshFile.meshFormat + val lodScaleMultiplier = cachedMeshFile.float64Reader.getAttr("/", "lod_scale_multiplier") + val transform = cachedMeshFile.float64Reader.getMatrixAttr("/", "transform") + (encoding, lodScaleMultiplier, transform) + } + +} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala index a7a1f565224..9578f3bd7af 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala @@ -1,19 +1,29 @@ package com.scalableminds.webknossos.datastore.services.mesh +import com.scalableminds.util.accesscontext.TokenContext +import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.enumeration.ExtendedEnumeration import com.scalableminds.util.geometry.Vec3Float import com.scalableminds.util.io.PathUtils import com.scalableminds.util.tools.{ByteUtils, Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.DataStoreConfig +import com.scalableminds.webknossos.datastore.models.datasource.{ + DataLayer, + DataSourceId, + LayerAttachment, + LayerAttachmentDataformat +} import com.scalableminds.webknossos.datastore.services.Hdf5HashedArrayUtils -import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache} +import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache, RemoteSourceDescriptorService} import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.Box.tryo import net.liftweb.common.{Box, Full} import org.apache.commons.io.FilenameUtils +import org.checkerframework.checker.units.qual.m import play.api.i18n.{Messages, MessagesProvider} import play.api.libs.json.{Format, JsResult, JsString, JsValue, Json, OFormat} +import java.net.URI import java.nio.file.{Path, Paths} import javax.inject.Inject import scala.concurrent.{ExecutionContext, Future} @@ -46,6 +56,9 @@ object MeshChunkDataRequestList { implicit val jsonFormat: OFormat[MeshChunkDataRequestList] = Json.format[MeshChunkDataRequestList] } +// TODO should this become a generic AttachmentKey? +case class MeshFileKey(dataSourceId: DataSourceId, layerName: String, attachment: LayerAttachment) + object MeshFileType extends ExtendedEnumeration { type MeshFileType = Value val local, neuroglancerPrecomputed = Value @@ -73,8 +86,12 @@ object MeshFileInfo { implicit val jsonFormat: OFormat[MeshFileInfo] = Json.format[MeshFileInfo] } -class MeshFileService @Inject()(config: DataStoreConfig, zarrMeshFileService: ZarrMeshFileService)( - implicit ec: ExecutionContext) +class MeshFileService @Inject()( + config: DataStoreConfig, + hdf5MeshFileService: Hdf5MeshFileService, + zarrMeshFileService: ZarrMeshFileService, + neuroglancerPrecomputedMeshService: NeuroglancerPrecomputedMeshFileService, + remoteSourceDescriptorService: RemoteSourceDescriptorService)(implicit ec: ExecutionContext) extends FoxImplicits with LazyLogging with Hdf5HashedArrayUtils @@ -84,7 +101,41 @@ class MeshFileService @Inject()(config: DataStoreConfig, zarrMeshFileService: Za private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) private val meshesDir = "meshes" - private lazy val meshFileCache = new Hdf5FileCache(30) + private val meshFileKeyCache + : AlfuCache[(DataSourceId, String, String), MeshFileKey] = AlfuCache() // dataSourceId, layerName, mappingName → MeshFileKey + + def lookUpMeshFile(dataSourceId: DataSourceId, dataLayer: DataLayer, meshFileName: String)( + implicit ec: ExecutionContext): Fox[MeshFileKey] = + meshFileKeyCache.getOrLoad((dataSourceId, dataLayer.name, meshFileName), + _ => lookUpMeshFileImpl(dataSourceId, dataLayer, meshFileName).toFox) + + private def lookUpMeshFileImpl(dataSourceId: DataSourceId, + dataLayer: DataLayer, + meshFileName: String): Box[MeshFileKey] = { + val registeredAttachment: Option[LayerAttachment] = dataLayer.attachments match { + case Some(attachments) => attachments.meshes.find(_.name == meshFileName) + case None => None + } + val localDatsetDir = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) + for { + registeredAttachmentNormalized <- tryo(registeredAttachment.map { attachment => + attachment.copy( + path = + remoteSourceDescriptorService.uriFromPathLiteral(attachment.path.toString, localDatsetDir, dataLayer.name)) + }) + } yield + MeshFileKey( + dataSourceId, + dataLayer.name, + registeredAttachmentNormalized.getOrElse( + LayerAttachment( + meshFileName, + new URI(dataBaseDir.resolve(dataLayer.name).resolve(meshesDir).toString), + LayerAttachmentDataformat.hdf5 + ) + ) + ) + } def exploreMeshFiles(organizationId: String, datasetDirectoryName: String, @@ -135,26 +186,13 @@ class MeshFileService @Inject()(config: DataStoreConfig, zarrMeshFileService: Za } // Same as above but this variant constructs the meshFilePath itself and converts null to None - def mappingNameForMeshFile(organizationId: String, - datasetDirectoryName: String, - dataLayerName: String, - meshFileName: String): Option[String] = { - val meshFilePath = - dataBaseDir - .resolve(organizationId) - .resolve(datasetDirectoryName) - .resolve(dataLayerName) - .resolve(meshesDir) - .resolve(s"$meshFileName.$hdf5FileExtension") - meshFileCache - .withCachedHdf5(meshFilePath) { cachedMeshFile => - cachedMeshFile.stringReader.getAttr("/", "mapping_name") - } - .toOption - .flatMap { value => - Option(value) // catch null - } - } + def mappingNameForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[String] = + meshFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => + zarrMeshFileService.mappingNameForMeshFile(meshFileKey) + case LayerAttachmentDataformat.hdf5 => + hdf5MeshFileService.mappingNameForMeshFile(meshFileKey).toFox + } private def versionForMeshFile(meshFilePath: Path): Long = meshFileCache @@ -164,30 +202,39 @@ class MeshFileService @Inject()(config: DataStoreConfig, zarrMeshFileService: Za .toOption .getOrElse(0) - def listMeshChunksForSegmentsMerged(organizationId: String, - datasetDirectoryName: String, - dataLayerName: String, - meshFileName: String, - segmentIds: Seq[Long])(implicit m: MessagesProvider): Fox[WebknossosSegmentInfo] = - for { - _ <- Fox.successful(()) - meshFilePath: Path = dataBaseDir - .resolve(organizationId) - .resolve(datasetDirectoryName) - .resolve(dataLayerName) - .resolve(meshesDir) - .resolve(s"$meshFileName.$hdf5FileExtension") - (encoding, lodScaleMultiplier, transform) <- readMeshfileMetadata(meshFilePath).toFox - meshChunksForUnmappedSegments: List[List[MeshLodInfo]] = listMeshChunksForSegments(meshFilePath, - segmentIds, - lodScaleMultiplier, - transform) - _ <- Fox.fromBool(meshChunksForUnmappedSegments.nonEmpty) ?~> "zero chunks" ?~> Messages( - "mesh.file.listChunks.failed", - segmentIds.mkString(","), - meshFileName) - wkChunkInfos <- WebknossosSegmentInfo.fromMeshInfosAndMetadata(meshChunksForUnmappedSegments, encoding).toFox - } yield wkChunkInfos + def listMeshChunksForSegmentsMerged(meshFileKey: MeshFileKey, segmentIds: Seq[Long])( + implicit ec: ExecutionContext, + tc: TokenContext, + m: MessagesProvider): Fox[WebknossosSegmentInfo] = + meshFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.neuroglancerPrecomputed => + neuroglancerPrecomputedMeshService.listMeshChunksForMultipleSegments(meshFileKey, segmentIds) + case LayerAttachmentDataformat.zarr3 => + zarrMeshFileService.listMeshChunksForMultipleSegments() + case LayerAttachmentDataformat.hdf5 => + hdf5MeshFileService.listMeshChunksForMultipleSegments() + } + + // TODO move to hdf5 meshfile service + for { + _ <- Fox.successful(()) + meshFilePath: Path = dataBaseDir + .resolve(organizationId) + .resolve(datasetDirectoryName) + .resolve(dataLayerName) + .resolve(meshesDir) + .resolve(s"$meshFileName.$hdf5FileExtension") + (encoding, lodScaleMultiplier, transform) <- readMeshfileMetadata(meshFilePath).toFox + meshChunksForUnmappedSegments: List[List[MeshLodInfo]] = listMeshChunksForSegments(meshFilePath, + segmentIds, + lodScaleMultiplier, + transform) + _ <- Fox.fromBool(meshChunksForUnmappedSegments.nonEmpty) ?~> "zero chunks" ?~> Messages( + "mesh.file.listChunks.failed", + segmentIds.mkString(","), + meshFileName) + wkChunkInfos <- WebknossosSegmentInfo.fromMeshInfosAndMetadata(meshChunksForUnmappedSegments, encoding).toFox + } yield wkChunkInfos private def listMeshChunksForSegments(meshFilePath: Path, segmentIds: Seq[Long], @@ -201,14 +248,6 @@ class MeshFileService @Inject()(config: DataStoreConfig, zarrMeshFileService: Za .toOption .getOrElse(List.empty) - private def readMeshfileMetadata(meshFilePath: Path): Box[(String, Double, Array[Array[Double]])] = - meshFileCache.withCachedHdf5(meshFilePath) { cachedMeshFile => - val encoding = cachedMeshFile.meshFormat - val lodScaleMultiplier = cachedMeshFile.float64Reader.getAttr("/", "lod_scale_multiplier") - val transform = cachedMeshFile.float64Reader.getMatrixAttr("/", "transform") - (encoding, lodScaleMultiplier, transform) - } - private def listMeshChunksForSegment(cachedMeshFile: CachedHdf5File, segmentId: Long, lodScaleMultiplier: Double, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala index 6bb64a978b0..1be2f1de985 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala @@ -8,7 +8,7 @@ import com.scalableminds.webknossos.datastore.services.{ } import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.models.datasource.{DataLayer, DataSourceId} -import net.liftweb.common.Full +import net.liftweb.common.{Box, Full} import scala.concurrent.ExecutionContext @@ -24,7 +24,7 @@ trait MeshMappingHelper extends FoxImplicits { targetMappingName: Option[String], editableMappingTracingId: Option[String], agglomerateId: Long, - mappingNameForMeshFile: Option[String], + mappingNameForMeshFile: Box[String], omitMissing: Boolean // If true, failing lookups in the agglomerate file will just return empty list. )(implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = (targetMappingName, editableMappingTracingId) match { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala index 0497dca3b87..7b8a8516a2c 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala @@ -125,10 +125,10 @@ class NeuroglancerPrecomputedMeshFileService @Inject()(config: DataStoreConfig, ) } - def listMeshChunksForMultipleSegments(meshFilePathOpt: Option[String], segmentId: Seq[Long])( + def listMeshChunksForMultipleSegments(meshFileKey: MeshFileKey, segmentId: Seq[Long])( implicit tc: TokenContext): Fox[WebknossosSegmentInfo] = for { - meshFilePath <- meshFilePathOpt.toFox ?~> "No mesh file path provided" + meshFilePath <- meshFileKey.attachment.path // TODO vaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(new URI(meshFilePath), None)) mesh <- neuroglancerPrecomputedMeshInfoCache.getOrLoad(vaultPath, loadRemoteMeshInfo) chunkScale = Array.fill(3)(1 / math.pow(2, mesh.meshInfo.vertex_quantization_bits)) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala index 90991af3c2a..e05a31305d5 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala @@ -8,6 +8,7 @@ import com.scalableminds.webknossos.datastore.datareaders.zarr3.{Zarr3Array, Zar import com.scalableminds.webknossos.datastore.datavault.VaultPath import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId import com.scalableminds.webknossos.datastore.services.{ChunkCacheService, Hdf5HashedArrayUtils} +import com.scalableminds.webknossos.datastore.storage.{DataVaultService, RemoteSourceDescriptor} import net.liftweb.common.Box.tryo import play.api.libs.json.{Json, OFormat} import ucar.ma2.{Array => MultiArray} @@ -20,7 +21,8 @@ case class MeshfileAttributes( lod_scale_multiplier: Double, transform: Array[Array[Double]], hash_function: String, - n_buckets: Int // TODO camelCase + custom format? + n_buckets: Int, // TODO camelCase + custom format? + mapping_name: Option[String] // TODO double-check ) extends Hdf5HashedArrayUtils { lazy val applyHashFunction: Long => Long = getHashFunction(hash_function) } @@ -29,7 +31,7 @@ object MeshfileAttributes { implicit val jsonFormat: OFormat[MeshfileAttributes] = Json.format[MeshfileAttributes] } -class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService) +class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVaultService: DataVaultService) extends FoxImplicits with NeuroglancerMeshHelper { @@ -45,6 +47,18 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService) meshfileAttributes <- groupHeader.meshfileAttributes.toFox ?~> "Could not parse meshfile attributes from zarr group file" } yield (meshfileAttributes.mesh_format, meshfileAttributes.lod_scale_multiplier, meshfileAttributes.transform) + def mappingNameForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[String] = + for { + groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(meshFileKey.attachment.path, None)) + groupHeaderBytes <- (groupVaultPath / Zarr3GroupHeader.FILENAME_ZARR_JSON).readBytes() + groupHeader <- JsonHelper.parseAs[Zarr3GroupHeader](groupHeaderBytes).toFox ?~> "Could not parse array header" + meshfileAttributes <- groupHeader.meshfileAttributes.toFox ?~> "Could not parse meshfile attributes from zarr group file" + mappingNameOrEmpty <- meshfileAttributes.mapping_name match { // TODO Does Fox have a shortcut for this? + case Some(mappingName) => Fox.successful(mappingName) + case None => Fox.empty + } + } yield mappingNameOrEmpty + def listMeshChunksForSegment(meshFilePath: VaultPath, segmentId: Long, meshfileAttributes: MeshfileAttributes)( implicit ec: ExecutionContext, tc: TokenContext): Fox[List[MeshLodInfo]] = diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala index 607c1de58f8..31c3624081b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala @@ -11,6 +11,7 @@ import ch.systemsx.cisd.hdf5.{ } import com.scalableminds.util.cache.LRUConcurrentCache import com.scalableminds.webknossos.datastore.dataformats.SafeCachable +import com.scalableminds.webknossos.datastore.models.datasource.LayerAttachment import net.liftweb.common.{Box, Failure, Full} import com.scalableminds.webknossos.datastore.services.Hdf5HashedArrayUtils import com.typesafe.scalalogging.LazyLogging @@ -89,4 +90,7 @@ class Hdf5FileCache(val maxEntries: Int) extends LRUConcurrentCache[String, Cach case scala.util.Failure(e) => Failure(e.toString) } } yield boxedResult + + def withCachedHdf5[T](attachment: LayerAttachment)(block: CachedHdf5File => T): Box[T] = + withCachedHdf5(Path.of(attachment.path))(block) } From fd8dc30799f001164938d237497194c21c92631a Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 12 Jun 2025 14:24:22 +0200 Subject: [PATCH 039/100] move more code --- .../controllers/DSMeshController.scala | 10 +- .../services/mesh/Hdf5MeshFileService.scala | 136 +++++++++++++++- .../services/mesh/MeshFileService.scala | 154 +----------------- 3 files changed, 143 insertions(+), 157 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala index 10095758ec7..e3430198a94 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala @@ -76,15 +76,7 @@ class DSMeshController @Inject()( mappingNameForMeshFile, omitMissing = false ) - chunkInfos <- if (request.body.meshFile.isNeuroglancerPrecomputed) { - - } else { - meshFileService.listMeshChunksForSegmentsMerged(organizationId, - datasetDirectoryName, - dataLayerName, - request.body.meshFile.name, - segmentIds) - } + chunkInfos <- meshFileService.listMeshChunksForSegmentsMerged(meshFileKey, segmentIds) } yield Ok(Json.toJson(chunkInfos)) } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala index 0dfcb1be5a1..391e51f4413 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala @@ -1,10 +1,18 @@ package com.scalableminds.webknossos.datastore.services.mesh -import com.scalableminds.webknossos.datastore.storage.Hdf5FileCache +import com.scalableminds.util.accesscontext.TokenContext +import com.scalableminds.util.geometry.Vec3Float +import com.scalableminds.util.tools.{Fox, FoxImplicits} +import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache} import jakarta.inject.Inject -import net.liftweb.common.{Box, Full, Empty} +import net.liftweb.common.Box.tryo +import net.liftweb.common.{Box, Empty, Full} +import play.api.i18n.{Messages, MessagesProvider} -class Hdf5MeshFileService @Inject()() { +import java.nio.file.Path +import scala.concurrent.ExecutionContext + +class Hdf5MeshFileService @Inject()() extends NeuroglancerMeshHelper with FoxImplicits { private lazy val meshFileCache = new Hdf5FileCache(30) @@ -32,4 +40,126 @@ class Hdf5MeshFileService @Inject()() { (encoding, lodScaleMultiplier, transform) } + private def listMeshChunksForSegments(meshFileKey: MeshFileKey, + segmentIds: Seq[Long], + lodScaleMultiplier: Double, + transform: Array[Array[Double]]): List[List[MeshLodInfo]] = + meshFileCache + .withCachedHdf5(meshFileKey.attachment) { cachedMeshFile: CachedHdf5File => + segmentIds.toList.flatMap(segmentId => + listMeshChunksForSegment(cachedMeshFile, segmentId, lodScaleMultiplier, transform)) + } + .toOption + .getOrElse(List.empty) + + private def listMeshChunksForSegment(cachedMeshFile: CachedHdf5File, + segmentId: Long, + lodScaleMultiplier: Double, + transform: Array[Array[Double]]): Box[List[MeshLodInfo]] = + tryo { + val (neuroglancerSegmentManifestStart, neuroglancerSegmentManifestEnd) = + getNeuroglancerSegmentManifestOffsets(segmentId, cachedMeshFile) + + val manifestBytes = cachedMeshFile.uint8Reader.readArrayBlockWithOffset( + "/neuroglancer", + (neuroglancerSegmentManifestEnd - neuroglancerSegmentManifestStart).toInt, + neuroglancerSegmentManifestStart) + val segmentManifest = NeuroglancerSegmentManifest.fromBytes(manifestBytes) + enrichSegmentInfo(segmentManifest, lodScaleMultiplier, transform, neuroglancerSegmentManifestStart, segmentId) + } + + private def getNeuroglancerSegmentManifestOffsets(segmentId: Long, cachedMeshFile: CachedHdf5File): (Long, Long) = { + val bucketIndex = cachedMeshFile.hashFunction(segmentId) % cachedMeshFile.nBuckets + val bucketOffsets = cachedMeshFile.uint64Reader.readArrayBlockWithOffset("bucket_offsets", 2, bucketIndex) + val bucketStart = bucketOffsets(0) + val bucketEnd = bucketOffsets(1) + + if (bucketEnd - bucketStart == 0) throw new Exception(s"No entry for segment $segmentId") + + val buckets = cachedMeshFile.uint64Reader.readMatrixBlockWithOffset("buckets", + (bucketEnd - bucketStart + 1).toInt, + 3, + bucketStart, + 0) + + val bucketLocalOffset = buckets.map(_(0)).indexOf(segmentId) + if (bucketLocalOffset < 0) throw new Exception(s"SegmentId $segmentId not in bucket list") + val neuroglancerStart = buckets(bucketLocalOffset)(1) + val neuroglancerEnd = buckets(bucketLocalOffset)(2) + + (neuroglancerStart, neuroglancerEnd) + } + + // TODO null vs None? + private def mappingNameForMeshFile(meshFilePath: Path, meshFileVersion: Long): Box[String] = { + val attributeName = if (meshFileVersion == 0) "metadata/mapping_name" else "mapping_name" + meshFileCache.withCachedHdf5(meshFilePath) { cachedMeshFile => + cachedMeshFile.stringReader.getAttr("/", attributeName) + } + } + + override def computeGlobalPosition(segmentInfo: NeuroglancerSegmentManifest, + lod: Int, + lodScaleMultiplier: Double, + currentChunk: Int): Vec3Float = + segmentInfo.gridOrigin + segmentInfo.chunkPositions(lod)(currentChunk).toVec3Float * segmentInfo.chunkShape * Math + .pow(2, lod) * segmentInfo.lodScales(lod) * lodScaleMultiplier + + override def getLodTransform(segmentInfo: NeuroglancerSegmentManifest, + lodScaleMultiplier: Double, + transform: Array[Array[Double]], + lod: Int): Array[Array[Double]] = transform + + private def versionForMeshFile(meshFilePath: Path): Long = + meshFileCache + .withCachedHdf5(meshFilePath) { cachedMeshFile => + cachedMeshFile.int64Reader.getAttr("/", "artifact_schema_version") + } + .toOption + .getOrElse(0) + + def readMeshChunk(meshFileKey: MeshFileKey, meshChunkDataRequests: MeshChunkDataRequestList) = + for { + resultBox <- meshFileCache.withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => + readMeshChunkFromCachedMeshfile(cachedMeshFile, meshChunkDataRequests) + } + (output, encoding) <- resultBox + } yield (output, encoding) + + private def readMeshChunkFromCachedMeshfile( + cachedMeshFile: CachedHdf5File, + meshChunkDataRequests: MeshChunkDataRequestList): Box[(Array[Byte], String)] = { + val meshFormat = cachedMeshFile.meshFormat + // Sort the requests by byte offset to optimize for spinning disk access + val requestsReordered = + meshChunkDataRequests.requests.zipWithIndex.sortBy(requestAndIndex => requestAndIndex._1.byteOffset).toList + val data: List[(Array[Byte], Int)] = requestsReordered.map { requestAndIndex => + val meshChunkDataRequest = requestAndIndex._1 + val data = + cachedMeshFile.uint8Reader.readArrayBlockWithOffset("neuroglancer", + meshChunkDataRequest.byteSize, + meshChunkDataRequest.byteOffset) + (data, requestAndIndex._2) + } + val dataSorted = data.sortBy(d => d._2) + Full((dataSorted.flatMap(d => d._1).toArray, meshFormat)) + } + + def readMeshChunksForSegments(meshFileKey: MeshFileKey, segmentIds: Seq[Long])( + implicit ec: ExecutionContext, + tc: TokenContext, + m: MessagesProvider): Fox[WebknossosSegmentInfo] = + for { + (encoding, lodScaleMultiplier, transform) <- readMeshfileMetadata(meshFileKey).toFox + meshChunksForUnmappedSegments: List[List[MeshLodInfo]] = listMeshChunksForSegments(meshFileKey, + segmentIds, + lodScaleMultiplier, + transform) + _ <- Fox.fromBool(meshChunksForUnmappedSegments.nonEmpty) ?~> "zero chunks" ?~> Messages( + "mesh.file.listChunks.failed", + segmentIds.mkString(","), + meshFileKey.attachment.name) + wkChunkInfos <- WebknossosSegmentInfo.fromMeshInfosAndMetadata(meshChunksForUnmappedSegments, encoding).toFox + } yield wkChunkInfos + } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala index 9578f3bd7af..6c584b2659a 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala @@ -3,7 +3,6 @@ package com.scalableminds.webknossos.datastore.services.mesh import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.enumeration.ExtendedEnumeration -import com.scalableminds.util.geometry.Vec3Float import com.scalableminds.util.io.PathUtils import com.scalableminds.util.tools.{ByteUtils, Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.DataStoreConfig @@ -14,12 +13,11 @@ import com.scalableminds.webknossos.datastore.models.datasource.{ LayerAttachmentDataformat } import com.scalableminds.webknossos.datastore.services.Hdf5HashedArrayUtils -import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache, RemoteSourceDescriptorService} +import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, RemoteSourceDescriptorService} import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.Box.tryo import net.liftweb.common.{Box, Full} import org.apache.commons.io.FilenameUtils -import org.checkerframework.checker.units.qual.m import play.api.i18n.{Messages, MessagesProvider} import play.api.libs.json.{Format, JsResult, JsString, JsValue, Json, OFormat} @@ -95,8 +93,7 @@ class MeshFileService @Inject()( extends FoxImplicits with LazyLogging with Hdf5HashedArrayUtils - with ByteUtils - with NeuroglancerMeshHelper { + with ByteUtils { private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) private val meshesDir = "meshes" @@ -172,19 +169,6 @@ class MeshFileService @Inject()( .toSet } - /* - Note that null is a valid value here for once. Meshfiles with no information about the - meshFilePath will return Fox.empty, while meshfiles with one marked as empty, will return Fox.successful(null) - */ - private def mappingNameForMeshFile(meshFilePath: Path, meshFileVersion: Long): Fox[String] = { - val attributeName = if (meshFileVersion == 0) "metadata/mapping_name" else "mapping_name" - meshFileCache - .withCachedHdf5(meshFilePath) { cachedMeshFile => - cachedMeshFile.stringReader.getAttr("/", attributeName) - } - .toFox ?~> "mesh.file.readEncoding.failed" - } - // Same as above but this variant constructs the meshFilePath itself and converts null to None def mappingNameForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[String] = meshFileKey.attachment.dataFormat match { @@ -194,14 +178,6 @@ class MeshFileService @Inject()( hdf5MeshFileService.mappingNameForMeshFile(meshFileKey).toFox } - private def versionForMeshFile(meshFilePath: Path): Long = - meshFileCache - .withCachedHdf5(meshFilePath) { cachedMeshFile => - cachedMeshFile.int64Reader.getAttr("/", "artifact_schema_version") - } - .toOption - .getOrElse(0) - def listMeshChunksForSegmentsMerged(meshFileKey: MeshFileKey, segmentIds: Seq[Long])( implicit ec: ExecutionContext, tc: TokenContext, @@ -215,126 +191,14 @@ class MeshFileService @Inject()( hdf5MeshFileService.listMeshChunksForMultipleSegments() } - // TODO move to hdf5 meshfile service - for { - _ <- Fox.successful(()) - meshFilePath: Path = dataBaseDir - .resolve(organizationId) - .resolve(datasetDirectoryName) - .resolve(dataLayerName) - .resolve(meshesDir) - .resolve(s"$meshFileName.$hdf5FileExtension") - (encoding, lodScaleMultiplier, transform) <- readMeshfileMetadata(meshFilePath).toFox - meshChunksForUnmappedSegments: List[List[MeshLodInfo]] = listMeshChunksForSegments(meshFilePath, - segmentIds, - lodScaleMultiplier, - transform) - _ <- Fox.fromBool(meshChunksForUnmappedSegments.nonEmpty) ?~> "zero chunks" ?~> Messages( - "mesh.file.listChunks.failed", - segmentIds.mkString(","), - meshFileName) - wkChunkInfos <- WebknossosSegmentInfo.fromMeshInfosAndMetadata(meshChunksForUnmappedSegments, encoding).toFox - } yield wkChunkInfos - - private def listMeshChunksForSegments(meshFilePath: Path, - segmentIds: Seq[Long], - lodScaleMultiplier: Double, - transform: Array[Array[Double]]): List[List[MeshLodInfo]] = - meshFileCache - .withCachedHdf5(meshFilePath) { cachedMeshFile: CachedHdf5File => - segmentIds.toList.flatMap(segmentId => - listMeshChunksForSegment(cachedMeshFile, segmentId, lodScaleMultiplier, transform)) - } - .toOption - .getOrElse(List.empty) - - private def listMeshChunksForSegment(cachedMeshFile: CachedHdf5File, - segmentId: Long, - lodScaleMultiplier: Double, - transform: Array[Array[Double]]): Box[List[MeshLodInfo]] = - tryo { - val (neuroglancerSegmentManifestStart, neuroglancerSegmentManifestEnd) = - getNeuroglancerSegmentManifestOffsets(segmentId, cachedMeshFile) - - val manifestBytes = cachedMeshFile.uint8Reader.readArrayBlockWithOffset( - "/neuroglancer", - (neuroglancerSegmentManifestEnd - neuroglancerSegmentManifestStart).toInt, - neuroglancerSegmentManifestStart) - val segmentManifest = NeuroglancerSegmentManifest.fromBytes(manifestBytes) - enrichSegmentInfo(segmentManifest, lodScaleMultiplier, transform, neuroglancerSegmentManifestStart, segmentId) - } - - override def computeGlobalPosition(segmentInfo: NeuroglancerSegmentManifest, - lod: Int, - lodScaleMultiplier: Double, - currentChunk: Int): Vec3Float = - segmentInfo.gridOrigin + segmentInfo.chunkPositions(lod)(currentChunk).toVec3Float * segmentInfo.chunkShape * Math - .pow(2, lod) * segmentInfo.lodScales(lod) * lodScaleMultiplier - - override def getLodTransform(segmentInfo: NeuroglancerSegmentManifest, - lodScaleMultiplier: Double, - transform: Array[Array[Double]], - lod: Int): Array[Array[Double]] = transform - - private def getNeuroglancerSegmentManifestOffsets(segmentId: Long, cachedMeshFile: CachedHdf5File): (Long, Long) = { - val bucketIndex = cachedMeshFile.hashFunction(segmentId) % cachedMeshFile.nBuckets - val bucketOffsets = cachedMeshFile.uint64Reader.readArrayBlockWithOffset("bucket_offsets", 2, bucketIndex) - val bucketStart = bucketOffsets(0) - val bucketEnd = bucketOffsets(1) - - if (bucketEnd - bucketStart == 0) throw new Exception(s"No entry for segment $segmentId") - - val buckets = cachedMeshFile.uint64Reader.readMatrixBlockWithOffset("buckets", - (bucketEnd - bucketStart + 1).toInt, - 3, - bucketStart, - 0) - - val bucketLocalOffset = buckets.map(_(0)).indexOf(segmentId) - if (bucketLocalOffset < 0) throw new Exception(s"SegmentId $segmentId not in bucket list") - val neuroglancerStart = buckets(bucketLocalOffset)(1) - val neuroglancerEnd = buckets(bucketLocalOffset)(2) - - (neuroglancerStart, neuroglancerEnd) - } - - def readMeshChunk(organizationId: String, - datasetDirectoryName: String, - dataLayerName: String, - meshChunkDataRequests: MeshChunkDataRequestList, - ): Box[(Array[Byte], String)] = { - val meshFilePath = dataBaseDir - .resolve(organizationId) - .resolve(datasetDirectoryName) - .resolve(dataLayerName) - .resolve(meshesDir) - .resolve(s"${meshChunkDataRequests.meshFile.name}.$hdf5FileExtension") - for { - resultBox <- meshFileCache.withCachedHdf5(meshFilePath) { cachedMeshFile => - readMeshChunkFromCachedMeshfile(cachedMeshFile, meshChunkDataRequests) - } - (output, encoding) <- resultBox - } yield (output, encoding) - } - - private def readMeshChunkFromCachedMeshfile( - cachedMeshFile: CachedHdf5File, - meshChunkDataRequests: MeshChunkDataRequestList): Box[(Array[Byte], String)] = { - val meshFormat = cachedMeshFile.meshFormat - // Sort the requests by byte offset to optimize for spinning disk access - val requestsReordered = - meshChunkDataRequests.requests.zipWithIndex.sortBy(requestAndIndex => requestAndIndex._1.byteOffset).toList - val data: List[(Array[Byte], Int)] = requestsReordered.map { requestAndIndex => - val meshChunkDataRequest = requestAndIndex._1 - val data = - cachedMeshFile.uint8Reader.readArrayBlockWithOffset("neuroglancer", - meshChunkDataRequest.byteSize, - meshChunkDataRequest.byteOffset) - (data, requestAndIndex._2) + def readMeshChunk(meshFileKey: MeshFileKey, meshChunkDataRequests: MeshChunkDataRequestList, + )(implicit ec: ExecutionContext, tc: TokenContext): Fox[(Array[Byte], String)] = + meshFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.hdf5 => hdf5MeshFileService.readMeshChunk(meshFileKey, meshChunkDataRequests).toFox + case LayerAttachmentDataformat.zarr3 => zarrMeshFileService.readMeshChunk(meshFileKey, meshChunkDataRequests) + case LayerAttachmentDataformat.neuroglancerPrecomputed => + neuroglancerPrecomputedMeshService.readMeshChunk(meshFileKey, meshChunkDataRequests) } - val dataSorted = data.sortBy(d => d._2) - Full((dataSorted.flatMap(d => d._1).toArray, meshFormat)) - } def clearCache(organizationId: String, datasetDirectoryName: String, layerNameOpt: Option[String]): Int = { val datasetPath = dataBaseDir.resolve(organizationId).resolve(datasetDirectoryName) From 00e775cbd7ca1e99cccf6d43d1283b0938358ef5 Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 16 Jun 2025 14:20:38 +0200 Subject: [PATCH 040/100] iterate on meshfile services --- .../controllers/DSMeshController.scala | 13 +-- .../controllers/DataSourceController.scala | 3 +- .../services/AgglomerateService.scala | 9 +- .../services/mesh/DSFullMeshService.scala | 77 ++++++--------- .../services/mesh/Hdf5MeshFileService.scala | 48 ++++++---- .../services/mesh/MeshFileService.scala | 33 ++++--- .../mesh/NeuroglancerMeshHelper.scala | 4 +- ...uroglancerPrecomputedMeshFileService.scala | 17 ++-- .../services/mesh/ZarrMeshFileService.scala | 96 +++++++++++++++---- .../datastore/storage/DataVaultService.scala | 4 + 10 files changed, 186 insertions(+), 118 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala index e3430198a94..ee911847078 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala @@ -1,7 +1,6 @@ package com.scalableminds.webknossos.datastore.controllers import com.google.inject.Inject -import com.scalableminds.util.tools.Fox import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId import com.scalableminds.webknossos.datastore.services._ import com.scalableminds.webknossos.datastore.services.mesh.{ @@ -88,13 +87,11 @@ class DSMeshController @Inject()( accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { - (data, encoding) <- if (request.body.meshFile.isNeuroglancerPrecomputed) { - neuroglancerPrecomputedMeshService.readMeshChunk(request.body.meshFile.path, request.body.requests) - } else { - meshFileService - .readMeshChunk(organizationId, datasetDirectoryName, dataLayerName, request.body) - .toFox ?~> "mesh.file.loadChunk.failed" - } + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) + meshFileKey <- meshFileService.lookUpMeshFile(dataSource.id, dataLayer, request.body.meshFileName) + (data, encoding) <- meshFileService.readMeshChunk(meshFileKey, request.body.requests) ?~> "mesh.file.loadChunk.failed" } yield { if (encoding.contains("gzip")) { Ok(data).withHeaders("Content-Encoding" -> "gzip") diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 342b53e5836..1ed61591b67 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -450,7 +450,8 @@ class DataSourceController @Inject()( layerName: Option[String]): InboxDataSource = { val (closedAgglomerateFileHandleCount, clearedBucketProviderCount, removedChunksCount) = binaryDataServiceHolder.binaryDataService.clearCache(organizationId, datasetDirectoryName, layerName) - val closedMeshFileHandleCount = meshFileService.clearCache(organizationId, datasetDirectoryName, layerName) + val closedMeshFileHandleCount = + meshFileService.clearCache(DataSourceId(organizationId, datasetDirectoryName), layerName) val reloadedDataSource: InboxDataSource = dataSourceService.dataSourceFromDir( dataSourceService.dataBaseDir.resolve(organizationId).resolve(datasetDirectoryName), organizationId) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 6fc7a73a7c7..b94619e1359 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -58,18 +58,19 @@ class AgglomerateService(config: DataStoreConfig, attachedAgglomerates ++ exploredAgglomerates } - def clearCaches(dataSourceId: DataSourceId, layerName: Option[String]): Int = { + def clearCaches(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = { agglomerateKeyCache.clear { - case (keyDataSourceId, keyLayerName, _) => dataSourceId == keyDataSourceId && layerName.forall(_ == keyLayerName) + case (keyDataSourceId, keyLayerName, _) => + dataSourceId == keyDataSourceId && layerNameOpt.forall(_ == keyLayerName) } val clearedHdf5Count = hdf5AgglomerateService.clearCache { agglomerateFileKey => - agglomerateFileKey.dataSourceId == dataSourceId && layerName.forall(agglomerateFileKey.layerName == _) + agglomerateFileKey.dataSourceId == dataSourceId && layerNameOpt.forall(agglomerateFileKey.layerName == _) } val clearedZarrCount = zarrAgglomerateService.clearCache { case (agglomerateFileKey, _) => - agglomerateFileKey.dataSourceId == dataSourceId && layerName.forall(agglomerateFileKey.layerName == _) + agglomerateFileKey.dataSourceId == dataSourceId && layerNameOpt.forall(agglomerateFileKey.layerName == _) } clearedHdf5Count + clearedZarrCount diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala index 84435fb62bd..0c925f6b565 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala @@ -60,13 +60,10 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, fullMeshRequest: FullMeshRequest)(implicit ec: ExecutionContext, m: MessagesProvider, tc: TokenContext): Fox[Array[Byte]] = - fullMeshRequest.meshFileName match { - case Some(_) if fullMeshRequest.meshFilePath.isDefined => - loadFullMeshFromRemoteNeuroglancerMeshFile(fullMeshRequest) - case Some(_) => - loadFullMeshFromMeshfile(organizationId, datasetDirectoryName, dataLayerName, fullMeshRequest) - case None => loadFullMeshFromAdHoc(organizationId, datasetDirectoryName, dataLayerName, fullMeshRequest) - } + if (fullMeshRequest.meshFileName.isDefined) + loadFullMeshFromMeshfile(organizationId, datasetDirectoryName, dataLayerName, fullMeshRequest) + else + loadFullMeshFromAdHoc(organizationId, datasetDirectoryName, dataLayerName, fullMeshRequest) private def loadFullMeshFromAdHoc(organizationId: String, datasetName: String, @@ -124,70 +121,59 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, } yield allVertices } + // TODO make sure this also works for the remote neuroglancer variant. if so, delete other implementation private def loadFullMeshFromMeshfile(organizationId: String, datasetDirectoryName: String, - layerName: String, + dataLayerName: String, fullMeshRequest: FullMeshRequest)(implicit ec: ExecutionContext, m: MessagesProvider, tc: TokenContext): Fox[Array[Byte]] = for { - meshFileName <- fullMeshRequest.meshFileName.toFox ?~> "meshFileName.needed" - before = Instant.now - mappingNameForMeshFile = meshFileService.mappingNameForMeshFile(organizationId, - datasetDirectoryName, - layerName, - meshFileName) + before <- Instant.nowFox (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, - layerName) + dataLayerName) + meshFileName <- fullMeshRequest.meshFileName.toFox ?~> "meshFileName.required" + meshFileKey <- meshFileService.lookUpMeshFile(dataSource.id, dataLayer, meshFileName) + mappingNameForMeshFileBox <- meshFileService.mappingNameForMeshFile(meshFileKey).shiftBox segmentIds <- segmentIdsForAgglomerateIdIfNeeded( dataSource.id, dataLayer, fullMeshRequest.mappingName, fullMeshRequest.editableMappingTracingId, fullMeshRequest.segmentId, - mappingNameForMeshFile, + mappingNameForMeshFileBox, omitMissing = false ) - chunkInfos: WebknossosSegmentInfo <- meshFileService.listMeshChunksForSegmentsMerged(organizationId, - datasetDirectoryName, - layerName, - meshFileName, - segmentIds) + chunkInfos: WebknossosSegmentInfo <- meshFileService.listMeshChunksForSegmentsMerged(meshFileKey, segmentIds) allChunkRanges: List[MeshChunk] = chunkInfos.lods.head.chunks transform = chunkInfos.lods.head.transform stlEncodedChunks: Seq[Array[Byte]] <- Fox.serialCombined(allChunkRanges) { chunkRange: MeshChunk => - readMeshChunkAsStl(organizationId, datasetDirectoryName, layerName, meshFileName, chunkRange, transform) + readMeshChunkAsStl(meshFileKey, chunkRange, transform) } stlOutput = combineEncodedChunksToStl(stlEncodedChunks) _ = logMeshingDuration(before, "meshfile", stlOutput.length) } yield stlOutput - private def readMeshChunkAsStl(organizationId: String, - datasetDirectoryName: String, - layerName: String, - meshFileName: String, - chunkInfo: MeshChunk, - transform: Array[Array[Double]])(implicit ec: ExecutionContext): Fox[Array[Byte]] = + private def readMeshChunkAsStl(meshFileKey: MeshFileKey, chunkInfo: MeshChunk, transform: Array[Array[Double]])( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[Array[Byte]] = for { - (dracoMeshChunkBytes, encoding) <- meshFileService - .readMeshChunk( - organizationId, - datasetDirectoryName, - layerName, - MeshChunkDataRequestList(MeshFileInfo(meshFileName, None, None, None, 7), - List(MeshChunkDataRequest(chunkInfo.byteOffset, chunkInfo.byteSize, None))) - ) - .toFox ?~> "mesh.file.loadChunk.failed" + (dracoMeshChunkBytes, encoding) <- meshFileService.readMeshChunk( + meshFileKey, + List(MeshChunkDataRequest(chunkInfo.byteOffset, chunkInfo.byteSize, None)) + ) ?~> "mesh.file.loadChunk.failed" _ <- Fox.fromBool(encoding == "draco") ?~> s"mesh file encoding is $encoding, only draco is supported" stlEncodedChunk <- getStlEncodedChunkFromDraco(chunkInfo, transform, dracoMeshChunkBytes) } yield stlEncodedChunk - private def loadFullMeshFromRemoteNeuroglancerMeshFile( - fullMeshRequest: FullMeshRequest)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Byte]] = + // TODO delete if above works also for neuroglancer + private def loadFullMeshFromRemoteNeuroglancerMeshFile(meshFileKey: MeshFileKey, fullMeshRequest: FullMeshRequest)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[Array[Byte]] = for { chunkInfos: WebknossosSegmentInfo <- neuroglancerPrecomputedMeshService.listMeshChunksForMultipleSegments( - fullMeshRequest.meshFilePath, + meshFileKey, List(fullMeshRequest.segmentId) ) _ <- Fox.fromBool(fullMeshRequest.mappingName.isEmpty) ?~> "Mapping is not supported for remote neuroglancer mesh files" @@ -201,13 +187,12 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, Array(0, lodTransform(1)(1), 0), Array(0, 0, lodTransform(2)(2)) ) - vertexQuantizationBits <- neuroglancerPrecomputedMeshService.getVertexQuantizationBits( - fullMeshRequest.meshFilePath) + vertexQuantizationBits <- neuroglancerPrecomputedMeshService.getVertexQuantizationBits(meshFileKey) stlEncodedChunks: Seq[Array[Byte]] <- Fox.serialCombined(allChunkRanges) { chunkRange: MeshChunk => readNeuroglancerPrecomputedMeshChunkAsStl( + meshFileKey, chunkRange, transform, - fullMeshRequest.meshFilePath, Some(fullMeshRequest.segmentId), vertexQuantizationBits ) @@ -216,14 +201,14 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, } yield stlOutput private def readNeuroglancerPrecomputedMeshChunkAsStl( + meshFileKey: MeshFileKey, chunkInfo: MeshChunk, transform: Array[Array[Double]], - meshFilePath: Option[String], segmentId: Option[Long], vertexQuantizationBits: Int)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Byte]] = for { - (dracoMeshChunkBytes, encoding) <- neuroglancerPrecomputedMeshService.readMeshChunk( - meshFilePath, + (dracoMeshChunkBytes, encoding) <- meshFileService.readMeshChunk( + meshFileKey, Seq(MeshChunkDataRequest(chunkInfo.byteOffset, chunkInfo.byteSize, segmentId)) ) ?~> "mesh.file.loadChunk.failed" _ <- Fox.fromBool(encoding == "draco") ?~> s"mesh file encoding is $encoding, only draco is supported" diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala index 391e51f4413..6364b701506 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala @@ -3,16 +3,20 @@ package com.scalableminds.webknossos.datastore.services.mesh import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.geometry.Vec3Float import com.scalableminds.util.tools.{Fox, FoxImplicits} +import com.scalableminds.webknossos.datastore.DataStoreConfig +import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache} import jakarta.inject.Inject import net.liftweb.common.Box.tryo import net.liftweb.common.{Box, Empty, Full} import play.api.i18n.{Messages, MessagesProvider} -import java.nio.file.Path +import java.nio.file.{Path, Paths} import scala.concurrent.ExecutionContext -class Hdf5MeshFileService @Inject()() extends NeuroglancerMeshHelper with FoxImplicits { +class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends NeuroglancerMeshHelper with FoxImplicits { + + private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) private lazy val meshFileCache = new Hdf5FileCache(30) @@ -32,18 +36,17 @@ class Hdf5MeshFileService @Inject()() extends NeuroglancerMeshHelper with FoxImp } } - def readMeshfileMetadata(meshFileKey: MeshFileKey): Box[(String, Double, Array[Array[Double]])] = + private def readMeshfileMetadata(meshFileKey: MeshFileKey): Box[(String, Double, Array[Array[Double]])] = meshFileCache.withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => - val encoding = cachedMeshFile.meshFormat val lodScaleMultiplier = cachedMeshFile.float64Reader.getAttr("/", "lod_scale_multiplier") val transform = cachedMeshFile.float64Reader.getMatrixAttr("/", "transform") - (encoding, lodScaleMultiplier, transform) + (cachedMeshFile.meshFormat, lodScaleMultiplier, transform) } - private def listMeshChunksForSegments(meshFileKey: MeshFileKey, - segmentIds: Seq[Long], - lodScaleMultiplier: Double, - transform: Array[Array[Double]]): List[List[MeshLodInfo]] = + private def listMeshChunksForSegmentsNested(meshFileKey: MeshFileKey, + segmentIds: Seq[Long], + lodScaleMultiplier: Double, + transform: Array[Array[Double]]): List[List[MeshLodInfo]] = meshFileCache .withCachedHdf5(meshFileKey.attachment) { cachedMeshFile: CachedHdf5File => segmentIds.toList.flatMap(segmentId => @@ -110,6 +113,7 @@ class Hdf5MeshFileService @Inject()() extends NeuroglancerMeshHelper with FoxImp transform: Array[Array[Double]], lod: Int): Array[Array[Double]] = transform + // TODO should we give the version field to the frontend? private def versionForMeshFile(meshFilePath: Path): Long = meshFileCache .withCachedHdf5(meshFilePath) { cachedMeshFile => @@ -118,7 +122,8 @@ class Hdf5MeshFileService @Inject()() extends NeuroglancerMeshHelper with FoxImp .toOption .getOrElse(0) - def readMeshChunk(meshFileKey: MeshFileKey, meshChunkDataRequests: MeshChunkDataRequestList) = + def readMeshChunk(meshFileKey: MeshFileKey, + meshChunkDataRequests: Seq[MeshChunkDataRequest]): Box[(Array[Byte], String)] = for { resultBox <- meshFileCache.withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => readMeshChunkFromCachedMeshfile(cachedMeshFile, meshChunkDataRequests) @@ -128,11 +133,11 @@ class Hdf5MeshFileService @Inject()() extends NeuroglancerMeshHelper with FoxImp private def readMeshChunkFromCachedMeshfile( cachedMeshFile: CachedHdf5File, - meshChunkDataRequests: MeshChunkDataRequestList): Box[(Array[Byte], String)] = { + meshChunkDataRequests: Seq[MeshChunkDataRequest]): Box[(Array[Byte], String)] = { val meshFormat = cachedMeshFile.meshFormat // Sort the requests by byte offset to optimize for spinning disk access val requestsReordered = - meshChunkDataRequests.requests.zipWithIndex.sortBy(requestAndIndex => requestAndIndex._1.byteOffset).toList + meshChunkDataRequests.zipWithIndex.sortBy(requestAndIndex => requestAndIndex._1.byteOffset).toList val data: List[(Array[Byte], Int)] = requestsReordered.map { requestAndIndex => val meshChunkDataRequest = requestAndIndex._1 val data = @@ -145,21 +150,26 @@ class Hdf5MeshFileService @Inject()() extends NeuroglancerMeshHelper with FoxImp Full((dataSorted.flatMap(d => d._1).toArray, meshFormat)) } - def readMeshChunksForSegments(meshFileKey: MeshFileKey, segmentIds: Seq[Long])( + def listMeshChunksForMultipleSegments(meshFileKey: MeshFileKey, segmentIds: Seq[Long])( implicit ec: ExecutionContext, tc: TokenContext, m: MessagesProvider): Fox[WebknossosSegmentInfo] = for { - (encoding, lodScaleMultiplier, transform) <- readMeshfileMetadata(meshFileKey).toFox - meshChunksForUnmappedSegments: List[List[MeshLodInfo]] = listMeshChunksForSegments(meshFileKey, - segmentIds, - lodScaleMultiplier, - transform) + (meshFormat, lodScaleMultiplier, transform) <- readMeshfileMetadata(meshFileKey).toFox + meshChunksForUnmappedSegments: List[List[MeshLodInfo]] = listMeshChunksForSegmentsNested(meshFileKey, + segmentIds, + lodScaleMultiplier, + transform) _ <- Fox.fromBool(meshChunksForUnmappedSegments.nonEmpty) ?~> "zero chunks" ?~> Messages( "mesh.file.listChunks.failed", segmentIds.mkString(","), meshFileKey.attachment.name) - wkChunkInfos <- WebknossosSegmentInfo.fromMeshInfosAndMetadata(meshChunksForUnmappedSegments, encoding).toFox + wkChunkInfos <- WebknossosSegmentInfo.fromMeshInfosAndMetadata(meshChunksForUnmappedSegments, meshFormat).toFox } yield wkChunkInfos + def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = { + val datasetPath = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) + val relevantPath = layerNameOpt.map(l => datasetPath.resolve(l)).getOrElse(datasetPath) + meshFileCache.clear(key => key.startsWith(relevantPath.toString)) + } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala index 6c584b2659a..f67ecc7bc7d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala @@ -13,16 +13,16 @@ import com.scalableminds.webknossos.datastore.models.datasource.{ LayerAttachmentDataformat } import com.scalableminds.webknossos.datastore.services.Hdf5HashedArrayUtils -import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, RemoteSourceDescriptorService} +import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.Box.tryo -import net.liftweb.common.{Box, Full} +import net.liftweb.common.Box import org.apache.commons.io.FilenameUtils -import play.api.i18n.{Messages, MessagesProvider} +import play.api.i18n.MessagesProvider import play.api.libs.json.{Format, JsResult, JsString, JsValue, Json, OFormat} import java.net.URI -import java.nio.file.{Path, Paths} +import java.nio.file.Paths import javax.inject.Inject import scala.concurrent.{ExecutionContext, Future} @@ -42,7 +42,7 @@ case class MeshChunkDataRequest( ) case class MeshChunkDataRequestList( - meshFile: MeshFileInfo, + meshFileName: String, requests: Seq[MeshChunkDataRequest] ) @@ -186,12 +186,12 @@ class MeshFileService @Inject()( case LayerAttachmentDataformat.neuroglancerPrecomputed => neuroglancerPrecomputedMeshService.listMeshChunksForMultipleSegments(meshFileKey, segmentIds) case LayerAttachmentDataformat.zarr3 => - zarrMeshFileService.listMeshChunksForMultipleSegments() + zarrMeshFileService.listMeshChunksForMultipleSegments(meshFileKey, segmentIds) case LayerAttachmentDataformat.hdf5 => - hdf5MeshFileService.listMeshChunksForMultipleSegments() + hdf5MeshFileService.listMeshChunksForMultipleSegments(meshFileKey, segmentIds) } - def readMeshChunk(meshFileKey: MeshFileKey, meshChunkDataRequests: MeshChunkDataRequestList, + def readMeshChunk(meshFileKey: MeshFileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest], )(implicit ec: ExecutionContext, tc: TokenContext): Fox[(Array[Byte], String)] = meshFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.hdf5 => hdf5MeshFileService.readMeshChunk(meshFileKey, meshChunkDataRequests).toFox @@ -200,10 +200,19 @@ class MeshFileService @Inject()( neuroglancerPrecomputedMeshService.readMeshChunk(meshFileKey, meshChunkDataRequests) } - def clearCache(organizationId: String, datasetDirectoryName: String, layerNameOpt: Option[String]): Int = { - val datasetPath = dataBaseDir.resolve(organizationId).resolve(datasetDirectoryName) - val relevantPath = layerNameOpt.map(l => datasetPath.resolve(l)).getOrElse(datasetPath) - meshFileCache.clear(key => key.startsWith(relevantPath.toString)) + def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = { + meshFileKeyCache.clear { + case (keyDataSourceId, keyLayerName, _) => + dataSourceId == keyDataSourceId && layerNameOpt.forall(_ == keyLayerName) + } + + val clearedHdf5Count = hdf5MeshFileService.clearCache(dataSourceId, layerNameOpt) + + val clearedZarrCount = zarrMeshFileService.clearCache(dataSourceId, layerNameOpt) + + val clearedNeuroglancerCount = neuroglancerPrecomputedMeshService.clearCache(dataSourceId, layerNameOpt) + + clearedHdf5Count + clearedZarrCount + clearedNeuroglancerCount } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerMeshHelper.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerMeshHelper.scala index c8dd667b284..394a6784988 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerMeshHelper.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerMeshHelper.scala @@ -107,12 +107,12 @@ object WebknossosSegmentInfo { implicit val jsonFormat: OFormat[WebknossosSegmentInfo] = Json.format[WebknossosSegmentInfo] def fromMeshInfosAndMetadata(chunkInfos: List[List[MeshLodInfo]], - encoding: String, + meshFormat: String, chunkScale: Array[Double] = Array(1.0, 1.0, 1.0)): Option[WebknossosSegmentInfo] = chunkInfos.headOption.flatMap { firstChunkInfo => tryo { WebknossosSegmentInfo( - meshFormat = encoding, + meshFormat = meshFormat, lods = chunkInfos.transpose.map(mergeLod), chunkScale = chunkScale ) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala index 7b8a8516a2c..1f242d0092e 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala @@ -12,6 +12,7 @@ import com.scalableminds.webknossos.datastore.models.datasource.{ DataFormat, DataLayer, DataLayerWithMagLocators, + DataSourceId, GenericDataSource } import com.scalableminds.webknossos.datastore.storage.{DataVaultService, RemoteSourceDescriptor} @@ -128,8 +129,7 @@ class NeuroglancerPrecomputedMeshFileService @Inject()(config: DataStoreConfig, def listMeshChunksForMultipleSegments(meshFileKey: MeshFileKey, segmentId: Seq[Long])( implicit tc: TokenContext): Fox[WebknossosSegmentInfo] = for { - meshFilePath <- meshFileKey.attachment.path // TODO - vaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(new URI(meshFilePath), None)) + vaultPath <- dataVaultService.getVaultPath(meshFileKey.attachment) mesh <- neuroglancerPrecomputedMeshInfoCache.getOrLoad(vaultPath, loadRemoteMeshInfo) chunkScale = Array.fill(3)(1 / math.pow(2, mesh.meshInfo.vertex_quantization_bits)) meshSegmentInfos <- Fox.serialCombined(segmentId)(id => listMeshChunks(vaultPath, mesh, id)) @@ -155,11 +155,10 @@ class NeuroglancerPrecomputedMeshFileService @Inject()(config: DataStoreConfig, segmentId) } yield meshSegmentInfo - def readMeshChunk(meshFilePathOpt: Option[String], meshChunkDataRequests: Seq[MeshChunkDataRequest])( + def readMeshChunk(meshFileKey: MeshFileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest])( implicit tc: TokenContext): Fox[(Array[Byte], String)] = for { - meshFilePath <- meshFilePathOpt.toFox ?~> "Mesh file path is required" - vaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(new URI(meshFilePath), None)) + vaultPath <- dataVaultService.getVaultPath(meshFileKey.attachment) segmentId <- meshChunkDataRequests.head.segmentId.toFox ?~> "Segment id parameter is required" _ <- Fox.fromBool(meshChunkDataRequests.flatMap(_.segmentId).distinct.length == 1) ?~> "All requests must have the same segment id" mesh <- neuroglancerPrecomputedMeshInfoCache.getOrLoad(vaultPath, loadRemoteMeshInfo) @@ -170,11 +169,13 @@ class NeuroglancerPrecomputedMeshFileService @Inject()(config: DataStoreConfig, output = chunks.flatten.toArray } yield (output, NeuroglancerMesh.meshEncoding) - def getVertexQuantizationBits(meshFilePathOpt: Option[String])(implicit tc: TokenContext): Fox[Int] = + def getVertexQuantizationBits(meshFileKey: MeshFileKey)(implicit tc: TokenContext): Fox[Int] = for { - meshFilePath <- meshFilePathOpt.toFox ?~> "Mesh file path is required" - vaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(new URI(meshFilePath), None)) + vaultPath <- dataVaultService.getVaultPath(meshFileKey.attachment) mesh <- neuroglancerPrecomputedMeshInfoCache.getOrLoad(vaultPath, loadRemoteMeshInfo) } yield mesh.meshInfo.vertex_quantization_bits + def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = + // TODO + 0 } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala index e05a31305d5..c66f86a260c 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala @@ -5,11 +5,11 @@ import com.scalableminds.util.geometry.Vec3Float import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} import com.scalableminds.webknossos.datastore.datareaders.DatasetArray import com.scalableminds.webknossos.datastore.datareaders.zarr3.{Zarr3Array, Zarr3GroupHeader} -import com.scalableminds.webknossos.datastore.datavault.VaultPath import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId import com.scalableminds.webknossos.datastore.services.{ChunkCacheService, Hdf5HashedArrayUtils} import com.scalableminds.webknossos.datastore.storage.{DataVaultService, RemoteSourceDescriptor} import net.liftweb.common.Box.tryo +import play.api.i18n.{Messages, MessagesProvider} import play.api.libs.json.{Json, OFormat} import ucar.ma2.{Array => MultiArray} @@ -17,12 +17,12 @@ import javax.inject.Inject import scala.concurrent.ExecutionContext case class MeshfileAttributes( - mesh_format: String, + mesh_format: String, // AKA encoding (e.g. "draco") lod_scale_multiplier: Double, transform: Array[Array[Double]], hash_function: String, n_buckets: Int, // TODO camelCase + custom format? - mapping_name: Option[String] // TODO double-check + mapping_name: Option[String] ) extends Hdf5HashedArrayUtils { lazy val applyHashFunction: Long => Long = getHashFunction(hash_function) } @@ -39,12 +39,19 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa private val keyBuckets = "buckets" private val keyNeuroglancer = "neuroglancer" - def readMeshfileMetadata(meshFilePath: VaultPath)(implicit ec: ExecutionContext, - tc: TokenContext): Fox[(String, Double, Array[Array[Double]])] = + def readMeshfileAttributes(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[MeshfileAttributes] = for { - groupHeaderBytes <- (meshFilePath / Zarr3GroupHeader.FILENAME_ZARR_JSON).readBytes() + groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(meshFileKey.attachment.path, None)) + groupHeaderBytes <- (groupVaultPath / Zarr3GroupHeader.FILENAME_ZARR_JSON).readBytes() groupHeader <- JsonHelper.parseAs[Zarr3GroupHeader](groupHeaderBytes).toFox ?~> "Could not parse array header" meshfileAttributes <- groupHeader.meshfileAttributes.toFox ?~> "Could not parse meshfile attributes from zarr group file" + } yield meshfileAttributes + + def readMeshfileMetadata(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[(String, Double, Array[Array[Double]])] = + for { + meshfileAttributes <- readMeshfileAttributes(meshFileKey) } yield (meshfileAttributes.mesh_format, meshfileAttributes.lod_scale_multiplier, meshfileAttributes.transform) def mappingNameForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[String] = @@ -59,15 +66,15 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa } } yield mappingNameOrEmpty - def listMeshChunksForSegment(meshFilePath: VaultPath, segmentId: Long, meshfileAttributes: MeshfileAttributes)( + def listMeshChunksForSegment(meshFileKey: MeshFileKey, segmentId: Long, meshfileAttributes: MeshfileAttributes)( implicit ec: ExecutionContext, tc: TokenContext): Fox[List[MeshLodInfo]] = for { (neuroglancerSegmentManifestStart, neuroglancerSegmentManifestEnd) <- getNeuroglancerSegmentManifestOffsets( - meshFilePath, + meshFileKey, meshfileAttributes, segmentId) - neuroglancerArray <- openZarrArray(meshFilePath, keyNeuroglancer) + neuroglancerArray <- openZarrArray(meshFileKey, keyNeuroglancer) manifestBytes <- neuroglancerArray.readAsMultiArray( offset = neuroglancerSegmentManifestStart, shape = (neuroglancerSegmentManifestEnd - neuroglancerSegmentManifestStart).toInt) @@ -80,18 +87,18 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa segmentId) private def getNeuroglancerSegmentManifestOffsets( - meshFilePath: VaultPath, + meshFileKey: MeshFileKey, meshfileAttributes: MeshfileAttributes, segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[(Long, Long)] = { val bucketIndex = meshfileAttributes.applyHashFunction(segmentId) % meshfileAttributes.n_buckets for { - bucketOffsetsArray <- openZarrArray(meshFilePath, keyBucketOffsets) + bucketOffsetsArray <- openZarrArray(meshFileKey, keyBucketOffsets) bucketRange <- bucketOffsetsArray.readAsMultiArray(offset = bucketIndex, shape = 2) bucketStart <- tryo(bucketRange.getLong(0)).toFox bucketEnd <- tryo(bucketRange.getLong(1)).toFox bucketSize = (bucketEnd - bucketStart).toInt _ <- Fox.fromBool(bucketSize > 0) ?~> s"No entry for segment $segmentId" - bucketsArray <- openZarrArray(meshFilePath, keyBuckets) + bucketsArray <- openZarrArray(meshFileKey, keyBuckets) bucket <- bucketsArray.readAsMultiArray(offset = Array(bucketStart, 0), shape = Array(bucketSize + 1, 3)) bucketLocalOffset <- findLocalOffsetInBucket(bucket, segmentId).toFox _ <- Fox.fromBool(bucketLocalOffset >= 0) ?~> s"SegmentId $segmentId not in bucket list" @@ -101,13 +108,13 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa } private def findLocalOffsetInBucket(bucket: MultiArray, segmentId: Long): Option[Int] = - (0 until (bucket.getShape()(0))).find(idx => bucket.getLong(bucket.getIndex.set(Array(idx, 0))) == segmentId) + (0 until bucket.getShape()(0)).find(idx => bucket.getLong(bucket.getIndex.set(Array(idx, 0))) == segmentId) - private def openZarrArray(meshFilePath: VaultPath, zarrArrayName: String)(implicit ec: ExecutionContext, - tc: TokenContext): Fox[DatasetArray] = { - val arrayPath = meshFilePath / zarrArrayName + private def openZarrArray(meshFileKey: MeshFileKey, zarrArrayName: String)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[DatasetArray] = for { - zarrArray <- Zarr3Array.open(arrayPath, + groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(meshFileKey.attachment.path, None)) + zarrArray <- Zarr3Array.open(groupVaultPath / zarrArrayName, DataSourceId("dummy", "unused"), "layer", None, @@ -115,7 +122,6 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa None, chunkCacheService.sharedChunkContentsCache) } yield zarrArray - } override def computeGlobalPosition(segmentInfo: NeuroglancerSegmentManifest, lod: Int, @@ -128,4 +134,58 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa lodScaleMultiplier: Double, transform: Array[Array[Double]], lod: Int): Array[Array[Double]] = transform + + def listMeshChunksForMultipleSegments(meshFileKey: MeshFileKey, segmentIds: Seq[Long])( + implicit ec: ExecutionContext, + tc: TokenContext, + m: MessagesProvider): Fox[WebknossosSegmentInfo] = + for { + meshfileAttributes <- readMeshfileAttributes(meshFileKey) + meshChunksForUnmappedSegments: List[List[MeshLodInfo]] <- listMeshChunksForSegmentsNested(meshFileKey, + segmentIds, + meshfileAttributes) + _ <- Fox.fromBool(meshChunksForUnmappedSegments.nonEmpty) ?~> "zero chunks" ?~> Messages( + "mesh.file.listChunks.failed", + segmentIds.mkString(","), + meshFileKey.attachment.name) + wkChunkInfos <- WebknossosSegmentInfo + .fromMeshInfosAndMetadata(meshChunksForUnmappedSegments, meshfileAttributes.mesh_format) + .toFox + } yield wkChunkInfos + + private def listMeshChunksForSegmentsNested(meshFileKey: MeshFileKey, + segmentIds: Seq[Long], + meshfileAttributes: MeshfileAttributes)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[List[List[MeshLodInfo]]] = + Fox.serialCombined(segmentIds) { segmentId => + listMeshChunksForSegment(meshFileKey, segmentId, meshfileAttributes) + } + + def readMeshChunk(meshFileKey: MeshFileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest])( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[(Array[Byte], String)] = + for { + meshfileAttributes <- readMeshfileAttributes(meshFileKey) + + // TODO skip sorting in zarr case? use parallel requests instead? + // Sort the requests by byte offset to optimize for spinning disk access + requestsReordered = meshChunkDataRequests.zipWithIndex + .sortBy(requestAndIndex => requestAndIndex._1.byteOffset) + .toList + neuroglancerArray <- openZarrArray(meshFileKey, keyNeuroglancer) + data: List[(Array[Byte], Int)] <- Fox.serialCombined(requestsReordered) { requestAndIndex => + val meshChunkDataRequest = requestAndIndex._1 + for { + dataAsMultiArray <- neuroglancerArray.readAsMultiArray(offset = meshChunkDataRequest.byteOffset, + meshChunkDataRequest.byteSize) + } yield (dataAsMultiArray.getStorage.asInstanceOf[Array[Byte]], requestAndIndex._2) + } + dataSorted = data.sortBy(d => d._2) + dataSortedFlat = dataSorted.flatMap(d => d._1).toArray + } yield (dataSortedFlat, meshfileAttributes.mesh_format) + + def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = + // TODO + 0 } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/DataVaultService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/DataVaultService.scala index 9c27031514e..cfe4119033f 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/DataVaultService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/DataVaultService.scala @@ -11,6 +11,7 @@ import com.scalableminds.webknossos.datastore.datavault.{ S3DataVault, VaultPath } +import com.scalableminds.webknossos.datastore.models.datasource.LayerAttachment import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.Full import play.api.libs.ws.WSClient @@ -34,6 +35,9 @@ class DataVaultService @Inject()(ws: WSClient, config: DataStoreConfig) extends private val vaultCache: AlfuCache[RemoteSourceDescriptor, DataVault] = AlfuCache(maxCapacity = 100) + def getVaultPath(layerAttachment: LayerAttachment)(implicit ec: ExecutionContext): Fox[VaultPath] = + getVaultPath(RemoteSourceDescriptor(layerAttachment.path, None)) + def getVaultPath(remoteSourceDescriptor: RemoteSourceDescriptor)(implicit ec: ExecutionContext): Fox[VaultPath] = for { vault <- vaultCache.getOrLoad(remoteSourceDescriptor, createVault) ?~> "dataVault.setup.failed" From 7d51512345016883e68abb7af466b2d50043141e Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 16 Jun 2025 14:34:56 +0200 Subject: [PATCH 041/100] adapt frontend to simplified protocol --- frontend/javascripts/admin/api/mesh.ts | 4 ++-- frontend/javascripts/types/api_types.ts | 8 +++----- .../viewer/model/sagas/meshes/precomputed_mesh_saga.ts | 2 +- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/frontend/javascripts/admin/api/mesh.ts b/frontend/javascripts/admin/api/mesh.ts index 7b2d7925634..6100be94d69 100644 --- a/frontend/javascripts/admin/api/mesh.ts +++ b/frontend/javascripts/admin/api/mesh.ts @@ -52,7 +52,7 @@ export function getMeshfileChunksForSegment( params.append("editableMappingTracingId", editableMappingTracingId); } const payload: ListMeshChunksRequest = { - meshFile, + meshFile.name, segmentId, }; return Request.sendJSONReceiveJSON( @@ -72,7 +72,7 @@ type MeshChunkDataRequest = { }; type MeshChunkDataRequestList = { - meshFile: APIMeshFileInfo; + meshFileName: string; requests: MeshChunkDataRequest[]; }; diff --git a/frontend/javascripts/types/api_types.ts b/frontend/javascripts/types/api_types.ts index 933fe21060d..6e1893e3c92 100644 --- a/frontend/javascripts/types/api_types.ts +++ b/frontend/javascripts/types/api_types.ts @@ -949,12 +949,10 @@ export type ServerEditableMapping = { export type APIMeshFileInfo = { name: string; - path: string | null | undefined; - fileType: string | null | undefined; mappingName?: string | null | undefined; - // 0 - is the first mesh file version - // 1-2 - the format should behave as v0 (refer to voxelytics for actual differences) - // 3 - is the newer version with draco encoding. + // 0 - unsupported (is the first mesh file version) + // 1-2 - unsupported (the format should behave as v0; refer to voxelytics for actual differences) + // 3+ - is the newer version with draco encoding. formatVersion: number; }; export type APIConnectomeFile = { diff --git a/frontend/javascripts/viewer/model/sagas/meshes/precomputed_mesh_saga.ts b/frontend/javascripts/viewer/model/sagas/meshes/precomputed_mesh_saga.ts index 49b5b47943b..7acc0974139 100644 --- a/frontend/javascripts/viewer/model/sagas/meshes/precomputed_mesh_saga.ts +++ b/frontend/javascripts/viewer/model/sagas/meshes/precomputed_mesh_saga.ts @@ -365,7 +365,7 @@ function* loadPrecomputedMeshesInChunksForLod( dataset, getBaseSegmentationName(segmentationLayer), { - meshFile, + meshFile.name, // Only extract the relevant properties requests: chunks.map(({ byteOffset, byteSize }) => ({ byteOffset, From 5d1b768dd87a83fd4382099cf9251fabb6638a01 Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 16 Jun 2025 14:39:09 +0200 Subject: [PATCH 042/100] keys --- frontend/javascripts/admin/api/mesh.ts | 2 +- .../viewer/model/sagas/meshes/precomputed_mesh_saga.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/javascripts/admin/api/mesh.ts b/frontend/javascripts/admin/api/mesh.ts index 6100be94d69..e0cc053e874 100644 --- a/frontend/javascripts/admin/api/mesh.ts +++ b/frontend/javascripts/admin/api/mesh.ts @@ -52,7 +52,7 @@ export function getMeshfileChunksForSegment( params.append("editableMappingTracingId", editableMappingTracingId); } const payload: ListMeshChunksRequest = { - meshFile.name, + meshFileName: meshFile.name, segmentId, }; return Request.sendJSONReceiveJSON( diff --git a/frontend/javascripts/viewer/model/sagas/meshes/precomputed_mesh_saga.ts b/frontend/javascripts/viewer/model/sagas/meshes/precomputed_mesh_saga.ts index 7acc0974139..4af910b1274 100644 --- a/frontend/javascripts/viewer/model/sagas/meshes/precomputed_mesh_saga.ts +++ b/frontend/javascripts/viewer/model/sagas/meshes/precomputed_mesh_saga.ts @@ -365,7 +365,7 @@ function* loadPrecomputedMeshesInChunksForLod( dataset, getBaseSegmentationName(segmentationLayer), { - meshFile.name, + meshFileName: meshFile.name, // Only extract the relevant properties requests: chunks.map(({ byteOffset, byteSize }) => ({ byteOffset, From 16b38d6165d999f9293bc24e7571082290588ba3 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 17 Jun 2025 10:28:07 +0200 Subject: [PATCH 043/100] explore + list meshfiles --- conf/messages | 3 + .../controllers/DSMeshController.scala | 19 ++- .../controllers/DataSourceController.scala | 2 +- .../explore/PrecomputedExplorer.scala | 32 ++++- .../services/AgglomerateService.scala | 9 +- .../services/mesh/DSFullMeshService.scala | 4 +- .../services/mesh/Hdf5MeshFileService.scala | 32 ++--- .../services/mesh/MeshFileService.scala | 115 ++++++++---------- .../services/mesh/MeshMappingHelper.scala | 4 +- ...uroglancerPrecomputedMeshFileService.scala | 72 ++--------- .../services/mesh/ZarrMeshFileService.scala | 35 +++--- .../datastore/storage/Hdf5FileCache.scala | 3 + 12 files changed, 138 insertions(+), 192 deletions(-) diff --git a/conf/messages b/conf/messages index b7aab81dbd6..c065b30d3ed 100644 --- a/conf/messages +++ b/conf/messages @@ -264,6 +264,9 @@ mesh.file.listChunks.failed=Failed to load chunk list for segment {0} from mesh mesh.file.loadChunk.failed=Failed to load mesh chunk for segment mesh.file.open.failed=Failed to open mesh file for reading mesh.file.readEncoding.failed=Failed to read encoding from mesh file +mesh.file.lookup.failed=Failed to look up mesh file “{0}” +mesh.file.readVersion.failed=Failed to read format version from file “{0}” +mesh.file.readMappingName.failed=Failed to read mapping name from mesh file “{0}” task.create.noTasks=Zero tasks were requested task.create.failed=Failed to create Task diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala index ee911847078..cc6bd947865 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala @@ -9,8 +9,7 @@ import com.scalableminds.webknossos.datastore.services.mesh.{ ListMeshChunksRequest, MeshChunkDataRequestList, MeshFileService, - MeshMappingHelper, - NeuroglancerPrecomputedMeshFileService + MeshMappingHelper } import play.api.libs.json.Json import play.api.mvc.{Action, AnyContent, PlayBodyParsers} @@ -20,7 +19,6 @@ import scala.concurrent.ExecutionContext class DSMeshController @Inject()( accessTokenService: DataStoreAccessTokenService, meshFileService: MeshFileService, - neuroglancerPrecomputedMeshService: NeuroglancerPrecomputedMeshFileService, fullMeshService: DSFullMeshService, dataSourceRepository: DataSourceRepository, val dsRemoteWebknossosClient: DSRemoteWebknossosClient, @@ -37,12 +35,11 @@ class DSMeshController @Inject()( accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { - meshFiles <- meshFileService.exploreMeshFiles(organizationId, datasetDirectoryName, dataLayerName) - neuroglancerMeshFiles <- neuroglancerPrecomputedMeshService.exploreMeshFiles(organizationId, - datasetDirectoryName, - dataLayerName) - allMeshFiles = meshFiles ++ neuroglancerMeshFiles - } yield Ok(Json.toJson(allMeshFiles)) + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) + meshFileInfos <- meshFileService.listMeshFiles(dataSource.id, dataLayer) + } yield Ok(Json.toJson(meshFileInfos)) } } @@ -64,8 +61,8 @@ class DSMeshController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - meshFileKey <- meshFileService.lookUpMeshFile(dataSource.id, dataLayer, request.body.meshFile.name) - mappingNameForMeshFile <- meshFileService.mappingNameForMeshFile(meshFileKey).shiftBox + meshFileKey <- meshFileService.lookUpMeshFile(dataSource.id, dataLayer, request.body.meshFileName) + mappingNameForMeshFile <- meshFileService.mappingNameForMeshFile(meshFileKey) segmentIds: Seq[Long] <- segmentIdsForAgglomerateIdIfNeeded( dataSource.id, dataLayer, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 1ed61591b67..d5c7d47dfe2 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -265,7 +265,7 @@ class DataSourceController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - agglomerateList = agglomerateService.exploreAgglomerates(organizationId, datasetDirectoryName, dataLayer) + agglomerateList = agglomerateService.listAgglomeratesFiles(dataSource.id, dataLayer) } yield Ok(Json.toJson(agglomerateList)) } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala index 7077ccc5e3f..c520ba9f2c5 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala @@ -13,7 +13,14 @@ import com.scalableminds.webknossos.datastore.datareaders.AxisOrder import com.scalableminds.webknossos.datastore.datareaders.precomputed.{PrecomputedHeader, PrecomputedScale} import com.scalableminds.webknossos.datastore.datavault.VaultPath import com.scalableminds.webknossos.datastore.models.VoxelSize -import com.scalableminds.webknossos.datastore.models.datasource.{Category, ElementClass} +import com.scalableminds.webknossos.datastore.models.datasource.{ + Category, + DatasetLayerAttachments, + ElementClass, + LayerAttachment, + LayerAttachmentDataformat +} +import com.scalableminds.webknossos.datastore.services.mesh.{NeuroglancerMesh, NeuroglancerPrecomputedMeshInfo} import scala.concurrent.ExecutionContext @@ -29,9 +36,10 @@ class PrecomputedExplorer(implicit val ec: ExecutionContext) extends RemoteLayer layerAndVoxelSize <- layerFromPrecomputedHeader(precomputedHeader, remotePath, credentialId) } yield List(layerAndVoxelSize) - private def layerFromPrecomputedHeader(precomputedHeader: PrecomputedHeader, - remotePath: VaultPath, - credentialId: Option[String]): Fox[(PrecomputedLayer, VoxelSize)] = + private def layerFromPrecomputedHeader( + precomputedHeader: PrecomputedHeader, + remotePath: VaultPath, + credentialId: Option[String])(implicit tc: TokenContext): Fox[(PrecomputedLayer, VoxelSize)] = for { name <- Fox.successful(guessNameFromPath(remotePath)) firstScale <- precomputedHeader.scales.headOption.toFox @@ -43,9 +51,13 @@ class PrecomputedExplorer(implicit val ec: ExecutionContext) extends RemoteLayer voxelSize <- Vec3Double.fromArray(smallestResolution).toFox mags: List[MagLocator] <- Fox.serialCombined(precomputedHeader.scales)( getMagFromScale(_, smallestResolution, remotePath, credentialId).toFox) + meshAttachments <- exploreMeshesForLayer(remotePath / precomputedHeader.meshPath) + attachmentsGrouped = if (meshAttachments.nonEmpty) Some(DatasetLayerAttachments(meshes = meshAttachments)) + else None layer = if (precomputedHeader.describesSegmentationLayer) { PrecomputedSegmentationLayer(name, boundingBox, elementClass, mags, None) - } else PrecomputedDataLayer(name, boundingBox, Category.color, elementClass, mags) + } else + PrecomputedDataLayer(name, boundingBox, Category.color, elementClass, mags, attachments = attachmentsGrouped) } yield (layer, VoxelSize.fromFactorWithDefaultUnit(voxelSize)) private def elementClassFromPrecomputedDataType(precomputedDataType: String): Option[ElementClass.Value] = @@ -72,4 +84,14 @@ class PrecomputedExplorer(implicit val ec: ExecutionContext) extends RemoteLayer axisOrder = AxisOrder.xyz(0, 1, 2) } yield MagLocator(mag, Some(path.toString), None, Some(axisOrder), channelIndex = None, credentialId) } + + private def exploreMeshesForLayer(meshPath: VaultPath)(implicit tc: TokenContext): Fox[Seq[LayerAttachment]] = + (for { + meshInfo <- (meshPath / NeuroglancerMesh.FILENAME_INFO) + .parseAsJson[NeuroglancerPrecomputedMeshInfo] ?~> "Failed to read mesh info" + _ <- Fox.fromBool(meshInfo.transform.length == 12) ?~> "Invalid mesh info: transform has to be of length 12" + } yield + Seq( + LayerAttachment(NeuroglancerMesh.meshName, meshPath.toUri, LayerAttachmentDataformat.neuroglancerPrecomputed))) + .orElse(Fox.successful(Seq.empty)) } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index b94619e1359..4ff6874d44b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -40,11 +40,12 @@ class AgglomerateService(config: DataStoreConfig, private val agglomerateKeyCache : AlfuCache[(DataSourceId, String, String), AgglomerateFileKey] = AlfuCache() // dataSourceId, layerName, mappingName → AgglomerateFileKey - def exploreAgglomerates(organizationId: String, datasetDirectoryName: String, dataLayer: DataLayer): Set[String] = { + def listAgglomeratesFiles(dataSourceId: DataSourceId, dataLayer: DataLayer): Set[String] = { val attachedAgglomerates = dataLayer.attachments.map(_.agglomerates).getOrElse(Seq.empty).map(_.name).toSet - val layerDir = dataBaseDir.resolve(organizationId).resolve(datasetDirectoryName).resolve(dataLayer.name) - val exploredAgglomerates = PathUtils + val layerDir = + dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName).resolve(dataLayer.name) + val scannedAgglomerates = PathUtils .listFiles(layerDir.resolve(agglomerateDir), silent = true, PathUtils.fileExtensionFilter(agglomerateFileExtension)) @@ -55,7 +56,7 @@ class AgglomerateService(config: DataStoreConfig, .getOrElse(Nil) .toSet - attachedAgglomerates ++ exploredAgglomerates + attachedAgglomerates ++ scannedAgglomerates } def clearCaches(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala index 0c925f6b565..6dc826abc20 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala @@ -135,14 +135,14 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, dataLayerName) meshFileName <- fullMeshRequest.meshFileName.toFox ?~> "meshFileName.required" meshFileKey <- meshFileService.lookUpMeshFile(dataSource.id, dataLayer, meshFileName) - mappingNameForMeshFileBox <- meshFileService.mappingNameForMeshFile(meshFileKey).shiftBox + mappingNameForMeshFile <- meshFileService.mappingNameForMeshFile(meshFileKey) segmentIds <- segmentIdsForAgglomerateIdIfNeeded( dataSource.id, dataLayer, fullMeshRequest.mappingName, fullMeshRequest.editableMappingTracingId, fullMeshRequest.segmentId, - mappingNameForMeshFileBox, + mappingNameForMeshFile, omitMissing = false ) chunkInfos: WebknossosSegmentInfo <- meshFileService.listMeshChunksForSegmentsMerged(meshFileKey, segmentIds) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala index 6364b701506..d00565a5715 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala @@ -1,6 +1,5 @@ package com.scalableminds.webknossos.datastore.services.mesh -import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.geometry.Vec3Float import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.DataStoreConfig @@ -8,10 +7,10 @@ import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache} import jakarta.inject.Inject import net.liftweb.common.Box.tryo -import net.liftweb.common.{Box, Empty, Full} +import net.liftweb.common.{Box, Full} import play.api.i18n.{Messages, MessagesProvider} -import java.nio.file.{Path, Paths} +import java.nio.file.Paths import scala.concurrent.ExecutionContext class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends NeuroglancerMeshHelper with FoxImplicits { @@ -20,20 +19,15 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance private lazy val meshFileCache = new Hdf5FileCache(30) - def mappingNameForMeshFile(meshFileKey: MeshFileKey): Box[String] = { - val asOption = meshFileCache + def mappingNameForMeshFile(meshFileKey: MeshFileKey): Box[Option[String]] = tryo { + meshFileCache .withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => - cachedMeshFile.stringReader.getAttr("/", "mapping_name") + cachedMeshFile.mappingName } .toOption .flatMap { value => Option(value) // catch null } - - asOption match { - case Some(mappingName) => Full(mappingName) - case None => Empty - } } private def readMeshfileMetadata(meshFileKey: MeshFileKey): Box[(String, Double, Array[Array[Double]])] = @@ -93,14 +87,6 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance (neuroglancerStart, neuroglancerEnd) } - // TODO null vs None? - private def mappingNameForMeshFile(meshFilePath: Path, meshFileVersion: Long): Box[String] = { - val attributeName = if (meshFileVersion == 0) "metadata/mapping_name" else "mapping_name" - meshFileCache.withCachedHdf5(meshFilePath) { cachedMeshFile => - cachedMeshFile.stringReader.getAttr("/", attributeName) - } - } - override def computeGlobalPosition(segmentInfo: NeuroglancerSegmentManifest, lod: Int, lodScaleMultiplier: Double, @@ -113,11 +99,10 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance transform: Array[Array[Double]], lod: Int): Array[Array[Double]] = transform - // TODO should we give the version field to the frontend? - private def versionForMeshFile(meshFilePath: Path): Long = + def versionForMeshFile(meshFileKey: MeshFileKey): Long = meshFileCache - .withCachedHdf5(meshFilePath) { cachedMeshFile => - cachedMeshFile.int64Reader.getAttr("/", "artifact_schema_version") + .withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => + cachedMeshFile.artifactSchemaVersion } .toOption .getOrElse(0) @@ -152,7 +137,6 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance def listMeshChunksForMultipleSegments(meshFileKey: MeshFileKey, segmentIds: Seq[Long])( implicit ec: ExecutionContext, - tc: TokenContext, m: MessagesProvider): Fox[WebknossosSegmentInfo] = for { (meshFormat, lodScaleMultiplier, transform) <- readMeshfileMetadata(meshFileKey).toFox diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala index f67ecc7bc7d..6167440368b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala @@ -2,9 +2,8 @@ package com.scalableminds.webknossos.datastore.services.mesh import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.cache.AlfuCache -import com.scalableminds.util.enumeration.ExtendedEnumeration import com.scalableminds.util.io.PathUtils -import com.scalableminds.util.tools.{ByteUtils, Fox, FoxImplicits} +import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.models.datasource.{ DataLayer, @@ -14,20 +13,19 @@ import com.scalableminds.webknossos.datastore.models.datasource.{ } import com.scalableminds.webknossos.datastore.services.Hdf5HashedArrayUtils import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService -import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.Box.tryo import net.liftweb.common.Box import org.apache.commons.io.FilenameUtils -import play.api.i18n.MessagesProvider -import play.api.libs.json.{Format, JsResult, JsString, JsValue, Json, OFormat} +import play.api.i18n.{Messages, MessagesProvider} +import play.api.libs.json.{Json, OFormat} import java.net.URI import java.nio.file.Paths import javax.inject.Inject -import scala.concurrent.{ExecutionContext, Future} +import scala.concurrent.ExecutionContext case class ListMeshChunksRequest( - meshFile: MeshFileInfo, + meshFileName: String, segmentId: Long ) @@ -57,43 +55,24 @@ object MeshChunkDataRequestList { // TODO should this become a generic AttachmentKey? case class MeshFileKey(dataSourceId: DataSourceId, layerName: String, attachment: LayerAttachment) -object MeshFileType extends ExtendedEnumeration { - type MeshFileType = Value - val local, neuroglancerPrecomputed = Value - - implicit object MeshFileTypeFormat extends Format[MeshFileType] { - def reads(json: JsValue): JsResult[MeshFileType] = - json.validate[String].map(MeshFileType.withName) - - def writes(meshFileType: MeshFileType): JsValue = JsString(meshFileType.toString) - } -} - +// Sent to wk frontend case class MeshFileInfo( name: String, - path: Option[String], - fileType: Option[MeshFileType.MeshFileType], mappingName: Option[String], formatVersion: Long -) { - def isNeuroglancerPrecomputed: Boolean = - fileType.contains(MeshFileType.neuroglancerPrecomputed) -} +) object MeshFileInfo { implicit val jsonFormat: OFormat[MeshFileInfo] = Json.format[MeshFileInfo] } -class MeshFileService @Inject()( - config: DataStoreConfig, - hdf5MeshFileService: Hdf5MeshFileService, - zarrMeshFileService: ZarrMeshFileService, - neuroglancerPrecomputedMeshService: NeuroglancerPrecomputedMeshFileService, - remoteSourceDescriptorService: RemoteSourceDescriptorService)(implicit ec: ExecutionContext) +class MeshFileService @Inject()(config: DataStoreConfig, + hdf5MeshFileService: Hdf5MeshFileService, + zarrMeshFileService: ZarrMeshFileService, + neuroglancerPrecomputedMeshService: NeuroglancerPrecomputedMeshFileService, + remoteSourceDescriptorService: RemoteSourceDescriptorService) extends FoxImplicits - with LazyLogging - with Hdf5HashedArrayUtils - with ByteUtils { + with Hdf5HashedArrayUtils { private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) private val meshesDir = "meshes" @@ -134,48 +113,56 @@ class MeshFileService @Inject()( ) } - def exploreMeshFiles(organizationId: String, - datasetDirectoryName: String, - dataLayerName: String): Future[Set[MeshFileInfo]] = { - val layerDir = dataBaseDir.resolve(organizationId).resolve(datasetDirectoryName).resolve(dataLayerName) - val meshFileNames = PathUtils + def listMeshFiles(dataSourceId: DataSourceId, dataLayer: DataLayer)(implicit ec: ExecutionContext, + tc: TokenContext, + m: MessagesProvider): Fox[Seq[MeshFileInfo]] = { + val attachedMeshFileNames = dataLayer.attachments.map(_.meshes).getOrElse(Seq.empty).map(_.name).toSet + + val layerDir = + dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName).resolve(dataLayer.name) + val scannedMeshFileNames = PathUtils .listFiles(layerDir.resolve(meshesDir), silent = true, PathUtils.fileExtensionFilter(hdf5FileExtension)) .map { paths => paths.map(path => FilenameUtils.removeExtension(path.getFileName.toString)) } .toOption .getOrElse(Nil) - - val meshFileVersions = meshFileNames.map { fileName => - val meshFilePath = layerDir.resolve(meshesDir).resolve(s"$fileName.$hdf5FileExtension") - versionForMeshFile(meshFilePath) + .toSet + + val allMeshFileNames = attachedMeshFileNames ++ scannedMeshFileNames + + // TODO skip failures + Fox.serialCombined(allMeshFileNames) { meshFileName => + for { + meshFileKey <- lookUpMeshFile(dataSourceId, dataLayer, meshFileName) ?~> Messages("mesh.file.lookup.failed", + meshFileName) + formatVersion <- versionForMeshFile(meshFileKey) ?~> Messages("mesh.file.readVersion.failed", meshFileName) + mappingName <- mappingNameForMeshFile(meshFileKey) ?~> Messages("mesh.file.readMappingName.failed", + meshFileName) + } yield MeshFileInfo(meshFileName, mappingName, formatVersion) } - - val mappingNameFoxes = meshFileNames.lazyZip(meshFileVersions).map { (fileName, fileVersion) => - val meshFilePath = layerDir.resolve(meshesDir).resolve(s"$fileName.$hdf5FileExtension") - mappingNameForMeshFile(meshFilePath, fileVersion) - } - - for { - mappingNameBoxes: Seq[Box[String]] <- Fox.sequence(mappingNameFoxes) - mappingNameOptions = mappingNameBoxes.map(_.toOption) - zipped = meshFileNames.lazyZip(mappingNameOptions).lazyZip(meshFileVersions) - } yield - zipped - .map({ - case (fileName, mappingName, fileVersion) => - MeshFileInfo(fileName, None, Some(MeshFileType.local), mappingName, fileVersion) - }) - .toSet } // Same as above but this variant constructs the meshFilePath itself and converts null to None - def mappingNameForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[String] = + def mappingNameForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Option[String]] = meshFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrMeshFileService.mappingNameForMeshFile(meshFileKey) case LayerAttachmentDataformat.hdf5 => hdf5MeshFileService.mappingNameForMeshFile(meshFileKey).toFox + case LayerAttachmentDataformat.neuroglancerPrecomputed => + Fox.successful(None) + } + + private def versionForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = + meshFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => + zarrMeshFileService.versionForMeshFile(meshFileKey) + case LayerAttachmentDataformat.hdf5 => + Fox.successful(hdf5MeshFileService.versionForMeshFile(meshFileKey)) + case LayerAttachmentDataformat.neuroglancerPrecomputed => + Fox.successful(NeuroglancerMesh.meshInfoVersion) } def listMeshChunksForSegmentsMerged(meshFileKey: MeshFileKey, segmentIds: Seq[Long])( @@ -183,19 +170,19 @@ class MeshFileService @Inject()( tc: TokenContext, m: MessagesProvider): Fox[WebknossosSegmentInfo] = meshFileKey.attachment.dataFormat match { - case LayerAttachmentDataformat.neuroglancerPrecomputed => - neuroglancerPrecomputedMeshService.listMeshChunksForMultipleSegments(meshFileKey, segmentIds) case LayerAttachmentDataformat.zarr3 => zarrMeshFileService.listMeshChunksForMultipleSegments(meshFileKey, segmentIds) case LayerAttachmentDataformat.hdf5 => hdf5MeshFileService.listMeshChunksForMultipleSegments(meshFileKey, segmentIds) + case LayerAttachmentDataformat.neuroglancerPrecomputed => + neuroglancerPrecomputedMeshService.listMeshChunksForMultipleSegments(meshFileKey, segmentIds) } def readMeshChunk(meshFileKey: MeshFileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest], )(implicit ec: ExecutionContext, tc: TokenContext): Fox[(Array[Byte], String)] = meshFileKey.attachment.dataFormat match { - case LayerAttachmentDataformat.hdf5 => hdf5MeshFileService.readMeshChunk(meshFileKey, meshChunkDataRequests).toFox case LayerAttachmentDataformat.zarr3 => zarrMeshFileService.readMeshChunk(meshFileKey, meshChunkDataRequests) + case LayerAttachmentDataformat.hdf5 => hdf5MeshFileService.readMeshChunk(meshFileKey, meshChunkDataRequests).toFox case LayerAttachmentDataformat.neuroglancerPrecomputed => neuroglancerPrecomputedMeshService.readMeshChunk(meshFileKey, meshChunkDataRequests) } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala index 1be2f1de985..6bb64a978b0 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala @@ -8,7 +8,7 @@ import com.scalableminds.webknossos.datastore.services.{ } import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.models.datasource.{DataLayer, DataSourceId} -import net.liftweb.common.{Box, Full} +import net.liftweb.common.Full import scala.concurrent.ExecutionContext @@ -24,7 +24,7 @@ trait MeshMappingHelper extends FoxImplicits { targetMappingName: Option[String], editableMappingTracingId: Option[String], agglomerateId: Long, - mappingNameForMeshFile: Box[String], + mappingNameForMeshFile: Option[String], omitMissing: Boolean // If true, failing lookups in the agglomerate file will just return empty list. )(implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = (targetMappingName, editableMappingTracingId) match { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala index 1f242d0092e..24a49de6f62 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala @@ -3,24 +3,13 @@ package com.scalableminds.webknossos.datastore.services.mesh import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.geometry.Vec3Float -import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} -import com.scalableminds.webknossos.datastore.DataStoreConfig -import com.scalableminds.webknossos.datastore.datareaders.precomputed.{PrecomputedHeader, ShardingSpecification} +import com.scalableminds.util.tools.{Fox, FoxImplicits} +import com.scalableminds.webknossos.datastore.datareaders.precomputed.ShardingSpecification import com.scalableminds.webknossos.datastore.datavault.VaultPath -import com.scalableminds.webknossos.datastore.models.datasource.{ - Category, - DataFormat, - DataLayer, - DataLayerWithMagLocators, - DataSourceId, - GenericDataSource -} -import com.scalableminds.webknossos.datastore.storage.{DataVaultService, RemoteSourceDescriptor} -import net.liftweb.common.Box.tryo +import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId +import com.scalableminds.webknossos.datastore.storage.DataVaultService import play.api.libs.json.{Json, OFormat} -import java.net.URI -import java.nio.file.Paths import javax.inject.Inject import scala.concurrent.ExecutionContext @@ -34,14 +23,12 @@ object NeuroglancerPrecomputedMeshInfo { implicit val jsonFormat: OFormat[NeuroglancerPrecomputedMeshInfo] = Json.format[NeuroglancerPrecomputedMeshInfo] } -class NeuroglancerPrecomputedMeshFileService @Inject()(config: DataStoreConfig, dataVaultService: DataVaultService)( +class NeuroglancerPrecomputedMeshFileService @Inject()(dataVaultService: DataVaultService)( implicit ec: ExecutionContext) extends FoxImplicits with NeuroglancerMeshHelper { - private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) - - private lazy val neuroglancerPrecomputedMeshInfoCache = AlfuCache[VaultPath, NeuroglancerMesh](100) + private lazy val meshInfoCache = AlfuCache[VaultPath, NeuroglancerMesh](100) private def loadRemoteMeshInfo(meshPath: VaultPath)(implicit tc: TokenContext): Fox[NeuroglancerMesh] = for { @@ -51,47 +38,6 @@ class NeuroglancerPrecomputedMeshFileService @Inject()(config: DataStoreConfig, _ <- Fox.fromBool(meshInfo.transform.length == 12) ?~> "Invalid mesh info: transform has to be of length 12" } yield NeuroglancerMesh(meshInfo) - def exploreMeshFiles(organizationId: String, datasetName: String, dataLayerName: String)( - implicit tc: TokenContext): Fox[Set[MeshFileInfo]] = { - def exploreMeshesForLayer(dataLayer: DataLayer): Fox[(NeuroglancerPrecomputedMeshInfo, VaultPath)] = - for { - dataLayerWithMagLocators <- tryo(dataLayer.asInstanceOf[DataLayerWithMagLocators]).toFox ?~> "Invalid DataLayer: Expected DataLayer to have mag locators" - firstMag <- dataLayerWithMagLocators.mags.headOption.toFox ?~> "No mags found" - magPath <- firstMag.path.toFox ?~> "Mag has no path" - remotePath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(new URI(magPath), None)) - layerPath = remotePath.parent - infoPath = layerPath / PrecomputedHeader.FILENAME_INFO - precomputedHeader <- infoPath - .parseAsJson[PrecomputedHeader] ?~> s"Failed to read neuroglancer precomputed metadata at $infoPath" - meshPath = layerPath / precomputedHeader.meshPath - mesh <- neuroglancerPrecomputedMeshInfoCache.getOrLoad(meshPath, loadRemoteMeshInfo) - } yield (mesh.meshInfo, meshPath) - - def isDataLayerValid(d: DataLayer) = - d.name == dataLayerName && d.category == Category.segmentation && d.dataFormat == DataFormat.neuroglancerPrecomputed - - val datasetDir = dataBaseDir.resolve(organizationId).resolve(datasetName) - val datasetPropertiesFile = datasetDir.resolve(GenericDataSource.FILENAME_DATASOURCE_PROPERTIES_JSON) - for { - datasetProperties <- JsonHelper - .parseFromFileAs[GenericDataSource[DataLayer]](datasetPropertiesFile, datasetDir) - .toFox - meshInfosAndInfoPaths = datasetProperties.dataLayers.filter(isDataLayerValid).map(exploreMeshesForLayer) - meshInfosResolved: List[(NeuroglancerPrecomputedMeshInfo, VaultPath)] <- Fox.fromFuture( - Fox.sequenceOfFulls(meshInfosAndInfoPaths)) - } yield - meshInfosResolved - .map({ - case (_, vaultPath) => - MeshFileInfo(NeuroglancerMesh.meshName, - Some(vaultPath.toString), - Some(MeshFileType.neuroglancerPrecomputed), - None, - NeuroglancerMesh.meshInfoVersion) - }) - .toSet - } - override def computeGlobalPosition(segmentInfo: NeuroglancerSegmentManifest, lod: Int, lodScaleMultiplier: Double, @@ -130,7 +76,7 @@ class NeuroglancerPrecomputedMeshFileService @Inject()(config: DataStoreConfig, implicit tc: TokenContext): Fox[WebknossosSegmentInfo] = for { vaultPath <- dataVaultService.getVaultPath(meshFileKey.attachment) - mesh <- neuroglancerPrecomputedMeshInfoCache.getOrLoad(vaultPath, loadRemoteMeshInfo) + mesh <- meshInfoCache.getOrLoad(vaultPath, loadRemoteMeshInfo) chunkScale = Array.fill(3)(1 / math.pow(2, mesh.meshInfo.vertex_quantization_bits)) meshSegmentInfos <- Fox.serialCombined(segmentId)(id => listMeshChunks(vaultPath, mesh, id)) segmentInfo <- WebknossosSegmentInfo @@ -161,7 +107,7 @@ class NeuroglancerPrecomputedMeshFileService @Inject()(config: DataStoreConfig, vaultPath <- dataVaultService.getVaultPath(meshFileKey.attachment) segmentId <- meshChunkDataRequests.head.segmentId.toFox ?~> "Segment id parameter is required" _ <- Fox.fromBool(meshChunkDataRequests.flatMap(_.segmentId).distinct.length == 1) ?~> "All requests must have the same segment id" - mesh <- neuroglancerPrecomputedMeshInfoCache.getOrLoad(vaultPath, loadRemoteMeshInfo) + mesh <- meshInfoCache.getOrLoad(vaultPath, loadRemoteMeshInfo) minishardInfo = mesh.shardingSpecification.getMinishardInfo(segmentId) shardUrl = mesh.shardingSpecification.getPathForShard(vaultPath, minishardInfo._1) chunks <- Fox.serialCombined(meshChunkDataRequests.toList)(request => @@ -172,7 +118,7 @@ class NeuroglancerPrecomputedMeshFileService @Inject()(config: DataStoreConfig, def getVertexQuantizationBits(meshFileKey: MeshFileKey)(implicit tc: TokenContext): Fox[Int] = for { vaultPath <- dataVaultService.getVaultPath(meshFileKey.attachment) - mesh <- neuroglancerPrecomputedMeshInfoCache.getOrLoad(vaultPath, loadRemoteMeshInfo) + mesh <- meshInfoCache.getOrLoad(vaultPath, loadRemoteMeshInfo) } yield mesh.meshInfo.vertex_quantization_bits def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala index c66f86a260c..10d52016154 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala @@ -22,7 +22,8 @@ case class MeshfileAttributes( transform: Array[Array[Double]], hash_function: String, n_buckets: Int, // TODO camelCase + custom format? - mapping_name: Option[String] + mapping_name: Option[String], + artifact_schema_version: Long ) extends Hdf5HashedArrayUtils { lazy val applyHashFunction: Long => Long = getHashFunction(hash_function) } @@ -39,8 +40,11 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa private val keyBuckets = "buckets" private val keyNeuroglancer = "neuroglancer" - def readMeshfileAttributes(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, - tc: TokenContext): Fox[MeshfileAttributes] = + // TODO rename meshFile to meshfile? + + // TODO cache? + private def readMeshFileAttributes(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[MeshfileAttributes] = for { groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(meshFileKey.attachment.path, None)) groupHeaderBytes <- (groupVaultPath / Zarr3GroupHeader.FILENAME_ZARR_JSON).readBytes() @@ -51,20 +55,19 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa def readMeshfileMetadata(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[(String, Double, Array[Array[Double]])] = for { - meshfileAttributes <- readMeshfileAttributes(meshFileKey) + meshfileAttributes <- readMeshFileAttributes(meshFileKey) } yield (meshfileAttributes.mesh_format, meshfileAttributes.lod_scale_multiplier, meshfileAttributes.transform) - def mappingNameForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[String] = + def versionForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = for { - groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(meshFileKey.attachment.path, None)) - groupHeaderBytes <- (groupVaultPath / Zarr3GroupHeader.FILENAME_ZARR_JSON).readBytes() - groupHeader <- JsonHelper.parseAs[Zarr3GroupHeader](groupHeaderBytes).toFox ?~> "Could not parse array header" - meshfileAttributes <- groupHeader.meshfileAttributes.toFox ?~> "Could not parse meshfile attributes from zarr group file" - mappingNameOrEmpty <- meshfileAttributes.mapping_name match { // TODO Does Fox have a shortcut for this? - case Some(mappingName) => Fox.successful(mappingName) - case None => Fox.empty - } - } yield mappingNameOrEmpty + meshfileAttributes <- readMeshFileAttributes(meshFileKey) + } yield meshfileAttributes.artifact_schema_version + + def mappingNameForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Option[String]] = + for { + meshfileAttributes <- readMeshFileAttributes(meshFileKey) + } yield meshfileAttributes.mapping_name def listMeshChunksForSegment(meshFileKey: MeshFileKey, segmentId: Long, meshfileAttributes: MeshfileAttributes)( implicit ec: ExecutionContext, @@ -140,7 +143,7 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa tc: TokenContext, m: MessagesProvider): Fox[WebknossosSegmentInfo] = for { - meshfileAttributes <- readMeshfileAttributes(meshFileKey) + meshfileAttributes <- readMeshFileAttributes(meshFileKey) meshChunksForUnmappedSegments: List[List[MeshLodInfo]] <- listMeshChunksForSegmentsNested(meshFileKey, segmentIds, meshfileAttributes) @@ -166,7 +169,7 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa implicit ec: ExecutionContext, tc: TokenContext): Fox[(Array[Byte], String)] = for { - meshfileAttributes <- readMeshfileAttributes(meshFileKey) + meshfileAttributes <- readMeshFileAttributes(meshFileKey) // TODO skip sorting in zarr case? use parallel requests instead? // Sort the requests by byte offset to optimize for spinning disk access diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala index 31c3624081b..945ffd06d4b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala @@ -39,9 +39,12 @@ class CachedHdf5File(reader: IHDF5Reader) // For Meshfile lazy val nBuckets: Long = uint64Reader.getAttr("/", "n_buckets") lazy val meshFormat: String = stringReader.getAttr("/", "mesh_format") + lazy val mappingName: String = stringReader.getAttr("/", "mapping_name") // For Meshfile and SegmentIndexFile lazy val hashFunction: Long => Long = getHashFunction(stringReader.getAttr("/", "hash_function")) + + lazy val artifactSchemaVersion: Long = int64Reader.getAttr("/", "artifact_schema_version") } object CachedHdf5File { From 7c94d313296a0a2d06d5bac1baf24a1fc010ea41 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 17 Jun 2025 10:37:21 +0200 Subject: [PATCH 044/100] fix frontend type --- frontend/javascripts/admin/api/mesh.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/javascripts/admin/api/mesh.ts b/frontend/javascripts/admin/api/mesh.ts index e0cc053e874..d7604fb001d 100644 --- a/frontend/javascripts/admin/api/mesh.ts +++ b/frontend/javascripts/admin/api/mesh.ts @@ -22,7 +22,7 @@ type MeshSegmentInfo = { }; type ListMeshChunksRequest = { - meshFile: APIMeshFileInfo; + meshFileName: string; segmentId: number; }; From 93d560c38180254d4ab20540fd5d3da507b8f205 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 17 Jun 2025 11:15:42 +0200 Subject: [PATCH 045/100] adapt schema to neuroglancerPrecomputed dataformat for attachments --- conf/evolutions/135-neuroglancer-attachment.sql | 9 +++++++++ .../reversions/135-neuroglancer-attachment.sql | 1 + conf/messages | 2 +- tools/postgres/schema.sql | 4 ++-- .../datastore/explore/PrecomputedExplorer.scala | 9 +++++++-- 5 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 conf/evolutions/135-neuroglancer-attachment.sql create mode 100644 conf/evolutions/reversions/135-neuroglancer-attachment.sql diff --git a/conf/evolutions/135-neuroglancer-attachment.sql b/conf/evolutions/135-neuroglancer-attachment.sql new file mode 100644 index 00000000000..c917e0327ac --- /dev/null +++ b/conf/evolutions/135-neuroglancer-attachment.sql @@ -0,0 +1,9 @@ +START TRANSACTION; + +do $$ begin ASSERT (select schemaVersion from webknossos.releaseInformation) = 134, 'Previous schema version mismatch'; end; $$ LANGUAGE plpgsql; + +ALTER TYPE webknossos.LAYER_ATTACHMENT_DATAFORMAT ADD VALUE 'neuroglancerPrecomputed'; + +UPDATE webknossos.releaseInformation SET schemaVersion = 135; + +COMMIT TRANSACTION; diff --git a/conf/evolutions/reversions/135-neuroglancer-attachment.sql b/conf/evolutions/reversions/135-neuroglancer-attachment.sql new file mode 100644 index 00000000000..96b15cce2cb --- /dev/null +++ b/conf/evolutions/reversions/135-neuroglancer-attachment.sql @@ -0,0 +1 @@ +-- Removing enum types directly is not possible so no reversion is available for this. diff --git a/conf/messages b/conf/messages index c065b30d3ed..e6fe35be914 100644 --- a/conf/messages +++ b/conf/messages @@ -370,10 +370,10 @@ folder.notFound=Could not find the requested folder folder.delete.root=Cannot delete the organization’s root folder folder.move.root=Cannot move the organization’s root folder folder.update.notAllowed=No write access on this folder +folder.noWriteAccess=No write access in this folder folder.update.name.failed=Failed to update the folder’s name folder.update.teams.failed=Failed to update the folder’s allowed teams folder.create.failed.teams.failed=Failed to create folder in this location -folder.noWriteAccess=No write access in this folder folder.nameMustNotContainSlash=Folder names cannot contain forward slashes segmentAnything.notEnabled=AI based quick select is not enabled for this WEBKNOSSOS instance. diff --git a/tools/postgres/schema.sql b/tools/postgres/schema.sql index 267488b6ca6..134189ed102 100644 --- a/tools/postgres/schema.sql +++ b/tools/postgres/schema.sql @@ -21,7 +21,7 @@ CREATE TABLE webknossos.releaseInformation ( schemaVersion BIGINT NOT NULL ); -INSERT INTO webknossos.releaseInformation(schemaVersion) values(134); +INSERT INTO webknossos.releaseInformation(schemaVersion) values(135); COMMIT TRANSACTION; @@ -163,7 +163,7 @@ CREATE TABLE webknossos.dataset_layer_additionalAxes( ); CREATE TYPE webknossos.LAYER_ATTACHMENT_TYPE AS ENUM ('agglomerate', 'connectome', 'segmentIndex', 'mesh', 'cumsum'); -CREATE TYPE webknossos.LAYER_ATTACHMENT_DATAFORMAT AS ENUM ('hdf5', 'zarr3', 'json'); +CREATE TYPE webknossos.LAYER_ATTACHMENT_DATAFORMAT AS ENUM ('hdf5', 'zarr3', 'json', 'neuroglancerPrecomputed'); CREATE TABLE webknossos.dataset_layer_attachments( _dataset TEXT CONSTRAINT _dataset_objectId CHECK (_dataset ~ '^[0-9a-f]{24}$') NOT NULL, layerName TEXT NOT NULL, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala index c520ba9f2c5..7558e21bbf3 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala @@ -55,9 +55,14 @@ class PrecomputedExplorer(implicit val ec: ExecutionContext) extends RemoteLayer attachmentsGrouped = if (meshAttachments.nonEmpty) Some(DatasetLayerAttachments(meshes = meshAttachments)) else None layer = if (precomputedHeader.describesSegmentationLayer) { - PrecomputedSegmentationLayer(name, boundingBox, elementClass, mags, None) + PrecomputedSegmentationLayer(name, + boundingBox, + elementClass, + mags, + largestSegmentId = None, + attachments = attachmentsGrouped) } else - PrecomputedDataLayer(name, boundingBox, Category.color, elementClass, mags, attachments = attachmentsGrouped) + PrecomputedDataLayer(name, boundingBox, Category.color, elementClass, mags) } yield (layer, VoxelSize.fromFactorWithDefaultUnit(voxelSize)) private def elementClassFromPrecomputedDataType(precomputedDataType: String): Option[ElementClass.Value] = From 1d492d10217dd96eb18c9009f75b89af7ca2bf32 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 17 Jun 2025 12:57:54 +0200 Subject: [PATCH 046/100] adapt to new json format --- .../controllers/ZarrStreamingController.scala | 6 +- ...eader.scala => NgffZarr3GroupHeader.scala} | 23 +++---- .../datastore/explore/NgffV0_5Explorer.scala | 28 ++++---- .../services/DSRemoteTracingstoreClient.scala | 6 +- .../services/mesh/ZarrMeshFileService.scala | 68 +++++++++++++------ ...VolumeTracingZarrStreamingController.scala | 4 +- 6 files changed, 78 insertions(+), 57 deletions(-) rename webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/{Zarr3GroupHeader.scala => NgffZarr3GroupHeader.scala} (51%) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/ZarrStreamingController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/ZarrStreamingController.scala index 2dad0515110..d230341b03b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/ZarrStreamingController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/ZarrStreamingController.scala @@ -13,7 +13,7 @@ import com.scalableminds.webknossos.datastore.datareaders.zarr.{ NgffMetadataV0_5, ZarrHeader } -import com.scalableminds.webknossos.datastore.datareaders.zarr3.{Zarr3ArrayHeader, Zarr3GroupHeader} +import com.scalableminds.webknossos.datastore.datareaders.zarr3.{Zarr3ArrayHeader, NgffZarr3GroupHeader} import com.scalableminds.webknossos.datastore.models.annotation.{AnnotationLayer, AnnotationLayerType, AnnotationSource} import com.scalableminds.webknossos.datastore.models.datasource._ import com.scalableminds.webknossos.datastore.models.requests.{ @@ -83,7 +83,7 @@ class ZarrStreamingController @Inject()( dataSource.scale, dataLayer.sortedMags, dataLayer.additionalAxes) - zarr3GroupHeader = Zarr3GroupHeader(3, "group", Some(omeNgffHeaderV0_5), None) + zarr3GroupHeader = NgffZarr3GroupHeader(3, "group", omeNgffHeaderV0_5) } yield Ok(Json.toJson(zarr3GroupHeader)) } } @@ -132,7 +132,7 @@ class ZarrStreamingController @Inject()( dataSource.scale, dataLayer.sortedMags, dataLayer.additionalAxes) - zarr3GroupHeader = Zarr3GroupHeader(3, "group", Some(dataSourceOmeNgffHeader), None) + zarr3GroupHeader = NgffZarr3GroupHeader(3, "group", dataSourceOmeNgffHeader) } yield Ok(Json.toJson(zarr3GroupHeader)) ) } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3GroupHeader.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/NgffZarr3GroupHeader.scala similarity index 51% rename from webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3GroupHeader.scala rename to webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/NgffZarr3GroupHeader.scala index de1eb77ed65..d90361695db 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3GroupHeader.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/NgffZarr3GroupHeader.scala @@ -1,37 +1,30 @@ package com.scalableminds.webknossos.datastore.datareaders.zarr3 import com.scalableminds.webknossos.datastore.datareaders.zarr.NgffMetadataV0_5 -import com.scalableminds.webknossos.datastore.services.mesh.MeshfileAttributes import play.api.libs.json._ -case class Zarr3GroupHeader( +case class NgffZarr3GroupHeader( zarr_format: Int, // must be 3 node_type: String, // must be "group" - ngffMetadata: Option[NgffMetadataV0_5], - meshfileAttributes: Option[MeshfileAttributes] + ngffMetadata: NgffMetadataV0_5, ) -object Zarr3GroupHeader { - def FILENAME_ZARR_JSON = "zarr.json" - - implicit object Zarr3GroupHeaderFormat extends Format[Zarr3GroupHeader] { - override def reads(json: JsValue): JsResult[Zarr3GroupHeader] = +object NgffZarr3GroupHeader { + implicit object Zarr3GroupHeaderFormat extends Format[NgffZarr3GroupHeader] { + override def reads(json: JsValue): JsResult[NgffZarr3GroupHeader] = for { zarr_format <- (json \ "zarr_format").validate[Int] node_type <- (json \ "node_type").validate[String] // Read the metadata from the correct json path. - ngffMetadata <- (json \ "attributes" \ "ome").validateOpt[NgffMetadataV0_5] - // TODO unify. also, include in Writes - meshfileAttributes <- (json \ "attributes").validateOpt[MeshfileAttributes] + ngffMetadata <- (json \ "attributes" \ "ome").validate[NgffMetadataV0_5] } yield - Zarr3GroupHeader( + NgffZarr3GroupHeader( zarr_format, node_type, ngffMetadata, - meshfileAttributes ) - override def writes(zarrArrayGroup: Zarr3GroupHeader): JsValue = + override def writes(zarrArrayGroup: NgffZarr3GroupHeader): JsValue = Json.obj( "zarr_format" -> zarrArrayGroup.zarr_format, "node_type" -> zarrArrayGroup.node_type, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffV0_5Explorer.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffV0_5Explorer.scala index 6ec2421e76a..67539c1b388 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffV0_5Explorer.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/NgffV0_5Explorer.scala @@ -7,7 +7,7 @@ import com.scalableminds.webknossos.datastore.dataformats.MagLocator import com.scalableminds.webknossos.datastore.dataformats.layers.{Zarr3DataLayer, Zarr3Layer, Zarr3SegmentationLayer} import com.scalableminds.webknossos.datastore.datareaders.AxisOrder import com.scalableminds.webknossos.datastore.datareaders.zarr.{NgffDataset, NgffMultiscalesItem} -import com.scalableminds.webknossos.datastore.datareaders.zarr3.{Zarr3ArrayHeader, Zarr3GroupHeader} +import com.scalableminds.webknossos.datastore.datareaders.zarr3.{Zarr3ArrayHeader, NgffZarr3GroupHeader} import com.scalableminds.webknossos.datastore.datavault.VaultPath import com.scalableminds.webknossos.datastore.models.VoxelSize import com.scalableminds.webknossos.datastore.models.datasource.LayerViewConfiguration.LayerViewConfiguration @@ -26,19 +26,19 @@ class NgffV0_5Explorer(implicit val ec: ExecutionContext) implicit tc: TokenContext): Fox[List[(DataLayerWithMagLocators, VoxelSize)]] = for { zarrJsonPath <- Fox.successful(remotePath / Zarr3ArrayHeader.FILENAME_ZARR_JSON) - groupHeader <- zarrJsonPath.parseAsJson[Zarr3GroupHeader] ?~> s"Failed to read OME NGFF header at $zarrJsonPath" - ngffMetadata <- groupHeader.ngffMetadata.toFox + groupHeader <- zarrJsonPath + .parseAsJson[NgffZarr3GroupHeader] ?~> s"Failed to read OME NGFF header at $zarrJsonPath" labelLayers <- exploreLabelLayers(remotePath, credentialId).orElse( Fox.successful(List[(Zarr3Layer, VoxelSize)]())) - layerLists: List[List[(DataLayerWithMagLocators, VoxelSize)]] <- Fox.serialCombined(ngffMetadata.multiscales)( - multiscale => { - for { - channelCount <- getNgffMultiscaleChannelCount(multiscale, remotePath) - channelAttributes = getChannelAttributes(ngffMetadata.omero) - layers <- layersFromNgffMultiscale(multiscale, remotePath, credentialId, channelCount, channelAttributes) - } yield layers - }) + layerLists: List[List[(DataLayerWithMagLocators, VoxelSize)]] <- Fox.serialCombined( + groupHeader.ngffMetadata.multiscales)(multiscale => { + for { + channelCount <- getNgffMultiscaleChannelCount(multiscale, remotePath) + channelAttributes = getChannelAttributes(groupHeader.ngffMetadata.omero) + layers <- layersFromNgffMultiscale(multiscale, remotePath, credentialId, channelCount, channelAttributes) + } yield layers + }) layers: List[(DataLayerWithMagLocators, VoxelSize)] = layerLists.flatten } yield layers ++ labelLayers @@ -133,9 +133,9 @@ class NgffV0_5Explorer(implicit val ec: ExecutionContext) for { fullLabelPath <- Fox.successful(remotePath / "labels" / labelPath) zarrJsonPath = fullLabelPath / Zarr3ArrayHeader.FILENAME_ZARR_JSON - groupHeader <- zarrJsonPath.parseAsJson[Zarr3GroupHeader] - ngffMetadata <- groupHeader.ngffMetadata.toFox - layers: List[List[(DataLayerWithMagLocators, VoxelSize)]] <- Fox.serialCombined(ngffMetadata.multiscales)( + groupHeader <- zarrJsonPath.parseAsJson[NgffZarr3GroupHeader] + layers: List[List[(DataLayerWithMagLocators, VoxelSize)]] <- Fox.serialCombined( + groupHeader.ngffMetadata.multiscales)( multiscale => layersFromNgffMultiscale(multiscale.copy(name = Some(s"labels-$labelPath")), fullLabelPath, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteTracingstoreClient.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteTracingstoreClient.scala index 49bdae95721..d2aab72cda7 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteTracingstoreClient.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteTracingstoreClient.scala @@ -5,7 +5,7 @@ import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.dataformats.layers.ZarrSegmentationLayer import com.scalableminds.webknossos.datastore.datareaders.zarr.{NgffMetadata, ZarrHeader} -import com.scalableminds.webknossos.datastore.datareaders.zarr3.{Zarr3ArrayHeader, Zarr3GroupHeader} +import com.scalableminds.webknossos.datastore.datareaders.zarr3.{Zarr3ArrayHeader, NgffZarr3GroupHeader} import com.scalableminds.webknossos.datastore.rpc.RPC import com.typesafe.scalalogging.LazyLogging import play.api.inject.ApplicationLifecycle @@ -52,9 +52,9 @@ class DSRemoteTracingstoreClient @Inject()( .getWithJsonResponse[NgffMetadata] def getZarrJsonGroupHeaderWithNgff(tracingId: String, tracingStoreUri: String)( - implicit tc: TokenContext): Fox[Zarr3GroupHeader] = + implicit tc: TokenContext): Fox[NgffZarr3GroupHeader] = rpc(s"$tracingStoreUri/tracings/volume/zarr3_experimental/$tracingId/zarr.json").withTokenFromContext - .getWithJsonResponse[Zarr3GroupHeader] + .getWithJsonResponse[NgffZarr3GroupHeader] def getRawZarrCube(tracingId: String, mag: String, cxyz: String, tracingStoreUri: String)( implicit tc: TokenContext): Fox[Array[Byte]] = diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala index 10d52016154..d6c61df4e23 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala @@ -4,32 +4,59 @@ import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.geometry.Vec3Float import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} import com.scalableminds.webknossos.datastore.datareaders.DatasetArray -import com.scalableminds.webknossos.datastore.datareaders.zarr3.{Zarr3Array, Zarr3GroupHeader} +import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId import com.scalableminds.webknossos.datastore.services.{ChunkCacheService, Hdf5HashedArrayUtils} import com.scalableminds.webknossos.datastore.storage.{DataVaultService, RemoteSourceDescriptor} import net.liftweb.common.Box.tryo import play.api.i18n.{Messages, MessagesProvider} -import play.api.libs.json.{Json, OFormat} +import play.api.libs.json.{JsResult, JsValue, Reads} import ucar.ma2.{Array => MultiArray} import javax.inject.Inject import scala.concurrent.ExecutionContext case class MeshfileAttributes( - mesh_format: String, // AKA encoding (e.g. "draco") - lod_scale_multiplier: Double, + formatVersion: Long, + meshFormat: String, // AKA encoding (e.g. "draco") + lodScaleMultiplier: Double, transform: Array[Array[Double]], - hash_function: String, - n_buckets: Int, // TODO camelCase + custom format? - mapping_name: Option[String], - artifact_schema_version: Long + hashFunction: String, + nBuckets: Int, + mappingName: Option[String] ) extends Hdf5HashedArrayUtils { - lazy val applyHashFunction: Long => Long = getHashFunction(hash_function) + lazy val applyHashFunction: Long => Long = getHashFunction(hashFunction) } object MeshfileAttributes { - implicit val jsonFormat: OFormat[MeshfileAttributes] = Json.format[MeshfileAttributes] + val FILENAME_ZARR_JSON = "zarr.json" + + implicit object MeshfileAttributesZarr3GroupHeaderReads extends Reads[MeshfileAttributes] { + override def reads(json: JsValue): JsResult[MeshfileAttributes] = { + val keyVx = "voxelytics" + val keyFormatVersion = "artifact_schema_version" + val keyArtifactAttrs = "artifact_attributes" + val meshfileAttrs = json \ keyVx \ keyArtifactAttrs + for { + formatVersion <- (json \ keyVx \ keyFormatVersion).validate[Long] + meshFormat <- (meshfileAttrs \ "mesh_format").validate[String] + lodScaleMultiplier <- (meshfileAttrs \ "lod_scale_multiplier").validate[Double] + transform <- (meshfileAttrs \ "transform").validate[Array[Array[Double]]] + hashFunction <- (meshfileAttrs \ "hash_function").validate[String] + nBuckets <- (meshfileAttrs \ "n_buckets").validate[Int] + mappingName <- (meshfileAttrs \ "mappingName").validateOpt[String] + } yield + MeshfileAttributes( + formatVersion, + meshFormat, + lodScaleMultiplier, + transform, + hashFunction, + nBuckets, + mappingName, + ) + } + } } class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVaultService: DataVaultService) @@ -47,27 +74,28 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa tc: TokenContext): Fox[MeshfileAttributes] = for { groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(meshFileKey.attachment.path, None)) - groupHeaderBytes <- (groupVaultPath / Zarr3GroupHeader.FILENAME_ZARR_JSON).readBytes() - groupHeader <- JsonHelper.parseAs[Zarr3GroupHeader](groupHeaderBytes).toFox ?~> "Could not parse array header" - meshfileAttributes <- groupHeader.meshfileAttributes.toFox ?~> "Could not parse meshfile attributes from zarr group file" + groupHeaderBytes <- (groupVaultPath / MeshfileAttributes.FILENAME_ZARR_JSON).readBytes() + meshfileAttributes <- JsonHelper + .parseAs[MeshfileAttributes](groupHeaderBytes) + .toFox ?~> "Could not parse meshfile attributes from zarr group file" } yield meshfileAttributes def readMeshfileMetadata(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[(String, Double, Array[Array[Double]])] = for { meshfileAttributes <- readMeshFileAttributes(meshFileKey) - } yield (meshfileAttributes.mesh_format, meshfileAttributes.lod_scale_multiplier, meshfileAttributes.transform) + } yield (meshfileAttributes.meshFormat, meshfileAttributes.lodScaleMultiplier, meshfileAttributes.transform) def versionForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = for { meshfileAttributes <- readMeshFileAttributes(meshFileKey) - } yield meshfileAttributes.artifact_schema_version + } yield meshfileAttributes.formatVersion def mappingNameForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Option[String]] = for { meshfileAttributes <- readMeshFileAttributes(meshFileKey) - } yield meshfileAttributes.mapping_name + } yield meshfileAttributes.mappingName def listMeshChunksForSegment(meshFileKey: MeshFileKey, segmentId: Long, meshfileAttributes: MeshfileAttributes)( implicit ec: ExecutionContext, @@ -84,7 +112,7 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa segmentManifest <- tryo(NeuroglancerSegmentManifest.fromBytes(manifestBytes.getStorage.asInstanceOf[Array[Byte]])).toFox } yield enrichSegmentInfo(segmentManifest, - meshfileAttributes.lod_scale_multiplier, + meshfileAttributes.lodScaleMultiplier, meshfileAttributes.transform, neuroglancerSegmentManifestStart, segmentId) @@ -93,7 +121,7 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa meshFileKey: MeshFileKey, meshfileAttributes: MeshfileAttributes, segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[(Long, Long)] = { - val bucketIndex = meshfileAttributes.applyHashFunction(segmentId) % meshfileAttributes.n_buckets + val bucketIndex = meshfileAttributes.applyHashFunction(segmentId) % meshfileAttributes.nBuckets for { bucketOffsetsArray <- openZarrArray(meshFileKey, keyBucketOffsets) bucketRange <- bucketOffsetsArray.readAsMultiArray(offset = bucketIndex, shape = 2) @@ -152,7 +180,7 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa segmentIds.mkString(","), meshFileKey.attachment.name) wkChunkInfos <- WebknossosSegmentInfo - .fromMeshInfosAndMetadata(meshChunksForUnmappedSegments, meshfileAttributes.mesh_format) + .fromMeshInfosAndMetadata(meshChunksForUnmappedSegments, meshfileAttributes.meshFormat) .toFox } yield wkChunkInfos @@ -186,7 +214,7 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa } dataSorted = data.sortBy(d => d._2) dataSortedFlat = dataSorted.flatMap(d => d._1).toArray - } yield (dataSortedFlat, meshfileAttributes.mesh_format) + } yield (dataSortedFlat, meshfileAttributes.meshFormat) def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = // TODO diff --git a/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/controllers/VolumeTracingZarrStreamingController.scala b/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/controllers/VolumeTracingZarrStreamingController.scala index 01952f03c97..b1def678d95 100644 --- a/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/controllers/VolumeTracingZarrStreamingController.scala +++ b/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/controllers/VolumeTracingZarrStreamingController.scala @@ -24,7 +24,7 @@ import com.scalableminds.webknossos.datastore.datareaders.zarr3.{ TransposeCodecConfiguration, TransposeSetting, Zarr3ArrayHeader, - Zarr3GroupHeader + NgffZarr3GroupHeader } import com.scalableminds.webknossos.datastore.datareaders.{ArrayOrder, AxisOrder} import com.scalableminds.webknossos.datastore.helpers.ProtoGeometryImplicits @@ -257,7 +257,7 @@ class VolumeTracingZarrStreamingController @Inject()( dataSourceVoxelSize = dataSource.scale, mags = sortedExistingMags, additionalAxes = dataSource.additionalAxesUnion) - zarr3GroupHeader = Zarr3GroupHeader(3, "group", Some(omeNgffHeader), None) + zarr3GroupHeader = NgffZarr3GroupHeader(3, "group", omeNgffHeader) } yield Ok(Json.toJson(zarr3GroupHeader)) } } From 9af3004181169a44eb216449f332963871d285d5 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 17 Jun 2025 14:43:31 +0200 Subject: [PATCH 047/100] clear caches --- .../services/mesh/DSFullMeshService.scala | 14 ++-- .../services/mesh/Hdf5MeshFileService.scala | 12 ++-- .../services/mesh/MeshFileService.scala | 28 ++++---- ...uroglancerPrecomputedMeshFileService.scala | 30 ++++---- .../services/mesh/ZarrMeshFileService.scala | 68 ++++++++++++------- 5 files changed, 85 insertions(+), 67 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala index 6dc826abc20..8301db64f40 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala @@ -155,7 +155,7 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, _ = logMeshingDuration(before, "meshfile", stlOutput.length) } yield stlOutput - private def readMeshChunkAsStl(meshFileKey: MeshFileKey, chunkInfo: MeshChunk, transform: Array[Array[Double]])( + private def readMeshChunkAsStl(meshFileKey: MeshfileKey, chunkInfo: MeshChunk, transform: Array[Array[Double]])( implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Byte]] = for { @@ -168,7 +168,7 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, } yield stlEncodedChunk // TODO delete if above works also for neuroglancer - private def loadFullMeshFromRemoteNeuroglancerMeshFile(meshFileKey: MeshFileKey, fullMeshRequest: FullMeshRequest)( + private def loadFullMeshFromRemoteNeuroglancerMeshFile(meshFileKey: MeshfileKey, fullMeshRequest: FullMeshRequest)( implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Byte]] = for { @@ -201,11 +201,11 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, } yield stlOutput private def readNeuroglancerPrecomputedMeshChunkAsStl( - meshFileKey: MeshFileKey, - chunkInfo: MeshChunk, - transform: Array[Array[Double]], - segmentId: Option[Long], - vertexQuantizationBits: Int)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Byte]] = + meshFileKey: MeshfileKey, + chunkInfo: MeshChunk, + transform: Array[Array[Double]], + segmentId: Option[Long], + vertexQuantizationBits: Int)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Byte]] = for { (dracoMeshChunkBytes, encoding) <- meshFileService.readMeshChunk( meshFileKey, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala index d00565a5715..c5e76ca966b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala @@ -19,7 +19,7 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance private lazy val meshFileCache = new Hdf5FileCache(30) - def mappingNameForMeshFile(meshFileKey: MeshFileKey): Box[Option[String]] = tryo { + def mappingNameForMeshFile(meshFileKey: MeshfileKey): Box[Option[String]] = tryo { meshFileCache .withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => cachedMeshFile.mappingName @@ -30,14 +30,14 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance } } - private def readMeshfileMetadata(meshFileKey: MeshFileKey): Box[(String, Double, Array[Array[Double]])] = + private def readMeshfileMetadata(meshFileKey: MeshfileKey): Box[(String, Double, Array[Array[Double]])] = meshFileCache.withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => val lodScaleMultiplier = cachedMeshFile.float64Reader.getAttr("/", "lod_scale_multiplier") val transform = cachedMeshFile.float64Reader.getMatrixAttr("/", "transform") (cachedMeshFile.meshFormat, lodScaleMultiplier, transform) } - private def listMeshChunksForSegmentsNested(meshFileKey: MeshFileKey, + private def listMeshChunksForSegmentsNested(meshFileKey: MeshfileKey, segmentIds: Seq[Long], lodScaleMultiplier: Double, transform: Array[Array[Double]]): List[List[MeshLodInfo]] = @@ -99,7 +99,7 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance transform: Array[Array[Double]], lod: Int): Array[Array[Double]] = transform - def versionForMeshFile(meshFileKey: MeshFileKey): Long = + def versionForMeshFile(meshFileKey: MeshfileKey): Long = meshFileCache .withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => cachedMeshFile.artifactSchemaVersion @@ -107,7 +107,7 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance .toOption .getOrElse(0) - def readMeshChunk(meshFileKey: MeshFileKey, + def readMeshChunk(meshFileKey: MeshfileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest]): Box[(Array[Byte], String)] = for { resultBox <- meshFileCache.withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => @@ -135,7 +135,7 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance Full((dataSorted.flatMap(d => d._1).toArray, meshFormat)) } - def listMeshChunksForMultipleSegments(meshFileKey: MeshFileKey, segmentIds: Seq[Long])( + def listMeshChunksForMultipleSegments(meshFileKey: MeshfileKey, segmentIds: Seq[Long])( implicit ec: ExecutionContext, m: MessagesProvider): Fox[WebknossosSegmentInfo] = for { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala index 6167440368b..0e1923f5474 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala @@ -53,17 +53,17 @@ object MeshChunkDataRequestList { } // TODO should this become a generic AttachmentKey? -case class MeshFileKey(dataSourceId: DataSourceId, layerName: String, attachment: LayerAttachment) +case class MeshfileKey(dataSourceId: DataSourceId, layerName: String, attachment: LayerAttachment) // Sent to wk frontend -case class MeshFileInfo( +case class MeshfileInfo( name: String, mappingName: Option[String], formatVersion: Long ) -object MeshFileInfo { - implicit val jsonFormat: OFormat[MeshFileInfo] = Json.format[MeshFileInfo] +object MeshfileInfo { + implicit val jsonFormat: OFormat[MeshfileInfo] = Json.format[MeshfileInfo] } class MeshFileService @Inject()(config: DataStoreConfig, @@ -78,16 +78,16 @@ class MeshFileService @Inject()(config: DataStoreConfig, private val meshesDir = "meshes" private val meshFileKeyCache - : AlfuCache[(DataSourceId, String, String), MeshFileKey] = AlfuCache() // dataSourceId, layerName, mappingName → MeshFileKey + : AlfuCache[(DataSourceId, String, String), MeshfileKey] = AlfuCache() // dataSourceId, layerName, mappingName → MeshFileKey def lookUpMeshFile(dataSourceId: DataSourceId, dataLayer: DataLayer, meshFileName: String)( - implicit ec: ExecutionContext): Fox[MeshFileKey] = + implicit ec: ExecutionContext): Fox[MeshfileKey] = meshFileKeyCache.getOrLoad((dataSourceId, dataLayer.name, meshFileName), _ => lookUpMeshFileImpl(dataSourceId, dataLayer, meshFileName).toFox) private def lookUpMeshFileImpl(dataSourceId: DataSourceId, dataLayer: DataLayer, - meshFileName: String): Box[MeshFileKey] = { + meshFileName: String): Box[MeshfileKey] = { val registeredAttachment: Option[LayerAttachment] = dataLayer.attachments match { case Some(attachments) => attachments.meshes.find(_.name == meshFileName) case None => None @@ -100,7 +100,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, remoteSourceDescriptorService.uriFromPathLiteral(attachment.path.toString, localDatsetDir, dataLayer.name)) }) } yield - MeshFileKey( + MeshfileKey( dataSourceId, dataLayer.name, registeredAttachmentNormalized.getOrElse( @@ -115,7 +115,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, def listMeshFiles(dataSourceId: DataSourceId, dataLayer: DataLayer)(implicit ec: ExecutionContext, tc: TokenContext, - m: MessagesProvider): Fox[Seq[MeshFileInfo]] = { + m: MessagesProvider): Fox[Seq[MeshfileInfo]] = { val attachedMeshFileNames = dataLayer.attachments.map(_.meshes).getOrElse(Seq.empty).map(_.name).toSet val layerDir = @@ -139,12 +139,12 @@ class MeshFileService @Inject()(config: DataStoreConfig, formatVersion <- versionForMeshFile(meshFileKey) ?~> Messages("mesh.file.readVersion.failed", meshFileName) mappingName <- mappingNameForMeshFile(meshFileKey) ?~> Messages("mesh.file.readMappingName.failed", meshFileName) - } yield MeshFileInfo(meshFileName, mappingName, formatVersion) + } yield MeshfileInfo(meshFileName, mappingName, formatVersion) } } // Same as above but this variant constructs the meshFilePath itself and converts null to None - def mappingNameForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, + def mappingNameForMeshFile(meshFileKey: MeshfileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Option[String]] = meshFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => @@ -155,7 +155,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, Fox.successful(None) } - private def versionForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = + private def versionForMeshFile(meshFileKey: MeshfileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = meshFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrMeshFileService.versionForMeshFile(meshFileKey) @@ -165,7 +165,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, Fox.successful(NeuroglancerMesh.meshInfoVersion) } - def listMeshChunksForSegmentsMerged(meshFileKey: MeshFileKey, segmentIds: Seq[Long])( + def listMeshChunksForSegmentsMerged(meshFileKey: MeshfileKey, segmentIds: Seq[Long])( implicit ec: ExecutionContext, tc: TokenContext, m: MessagesProvider): Fox[WebknossosSegmentInfo] = @@ -178,7 +178,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, neuroglancerPrecomputedMeshService.listMeshChunksForMultipleSegments(meshFileKey, segmentIds) } - def readMeshChunk(meshFileKey: MeshFileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest], + def readMeshChunk(meshFileKey: MeshfileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest], )(implicit ec: ExecutionContext, tc: TokenContext): Fox[(Array[Byte], String)] = meshFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrMeshFileService.readMeshChunk(meshFileKey, meshChunkDataRequests) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala index 24a49de6f62..f3b29536c06 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala @@ -28,12 +28,12 @@ class NeuroglancerPrecomputedMeshFileService @Inject()(dataVaultService: DataVau extends FoxImplicits with NeuroglancerMeshHelper { - private lazy val meshInfoCache = AlfuCache[VaultPath, NeuroglancerMesh](100) + private lazy val meshInfoCache = AlfuCache[MeshfileKey, NeuroglancerMesh](100) - private def loadRemoteMeshInfo(meshPath: VaultPath)(implicit tc: TokenContext): Fox[NeuroglancerMesh] = + private def loadRemoteMeshInfo(meshfileKey: MeshfileKey)(implicit tc: TokenContext): Fox[NeuroglancerMesh] = for { - _ <- Fox.successful(()) - meshInfoPath = meshPath / NeuroglancerMesh.FILENAME_INFO + vaultPath <- dataVaultService.getVaultPath(meshfileKey.attachment) + meshInfoPath = vaultPath / NeuroglancerMesh.FILENAME_INFO meshInfo <- meshInfoPath.parseAsJson[NeuroglancerPrecomputedMeshInfo] ?~> "Failed to read mesh info" _ <- Fox.fromBool(meshInfo.transform.length == 12) ?~> "Invalid mesh info: transform has to be of length 12" } yield NeuroglancerMesh(meshInfo) @@ -72,11 +72,11 @@ class NeuroglancerPrecomputedMeshFileService @Inject()(dataVaultService: DataVau ) } - def listMeshChunksForMultipleSegments(meshFileKey: MeshFileKey, segmentId: Seq[Long])( + def listMeshChunksForMultipleSegments(meshFileKey: MeshfileKey, segmentId: Seq[Long])( implicit tc: TokenContext): Fox[WebknossosSegmentInfo] = for { vaultPath <- dataVaultService.getVaultPath(meshFileKey.attachment) - mesh <- meshInfoCache.getOrLoad(vaultPath, loadRemoteMeshInfo) + mesh <- meshInfoCache.getOrLoad(meshFileKey, loadRemoteMeshInfo) chunkScale = Array.fill(3)(1 / math.pow(2, mesh.meshInfo.vertex_quantization_bits)) meshSegmentInfos <- Fox.serialCombined(segmentId)(id => listMeshChunks(vaultPath, mesh, id)) segmentInfo <- WebknossosSegmentInfo @@ -101,13 +101,13 @@ class NeuroglancerPrecomputedMeshFileService @Inject()(dataVaultService: DataVau segmentId) } yield meshSegmentInfo - def readMeshChunk(meshFileKey: MeshFileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest])( + def readMeshChunk(meshfileKey: MeshfileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest])( implicit tc: TokenContext): Fox[(Array[Byte], String)] = for { - vaultPath <- dataVaultService.getVaultPath(meshFileKey.attachment) + vaultPath <- dataVaultService.getVaultPath(meshfileKey.attachment) segmentId <- meshChunkDataRequests.head.segmentId.toFox ?~> "Segment id parameter is required" _ <- Fox.fromBool(meshChunkDataRequests.flatMap(_.segmentId).distinct.length == 1) ?~> "All requests must have the same segment id" - mesh <- meshInfoCache.getOrLoad(vaultPath, loadRemoteMeshInfo) + mesh <- meshInfoCache.getOrLoad(meshfileKey, loadRemoteMeshInfo) minishardInfo = mesh.shardingSpecification.getMinishardInfo(segmentId) shardUrl = mesh.shardingSpecification.getPathForShard(vaultPath, minishardInfo._1) chunks <- Fox.serialCombined(meshChunkDataRequests.toList)(request => @@ -115,13 +115,13 @@ class NeuroglancerPrecomputedMeshFileService @Inject()(dataVaultService: DataVau output = chunks.flatten.toArray } yield (output, NeuroglancerMesh.meshEncoding) - def getVertexQuantizationBits(meshFileKey: MeshFileKey)(implicit tc: TokenContext): Fox[Int] = + def getVertexQuantizationBits(meshfileKey: MeshfileKey)(implicit tc: TokenContext): Fox[Int] = for { - vaultPath <- dataVaultService.getVaultPath(meshFileKey.attachment) - mesh <- meshInfoCache.getOrLoad(vaultPath, loadRemoteMeshInfo) - } yield mesh.meshInfo.vertex_quantization_bits + meshInfo <- meshInfoCache.getOrLoad(meshfileKey, loadRemoteMeshInfo) + } yield meshInfo.meshInfo.vertex_quantization_bits def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = - // TODO - 0 + meshInfoCache.clear { meshFileKey => + meshFileKey.dataSourceId == dataSourceId && layerNameOpt.forall(meshFileKey.layerName == _) + } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala index d6c61df4e23..cfabc7b5bac 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala @@ -1,6 +1,7 @@ package com.scalableminds.webknossos.datastore.services.mesh import com.scalableminds.util.accesscontext.TokenContext +import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.geometry.Vec3Float import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} import com.scalableminds.webknossos.datastore.datareaders.DatasetArray @@ -33,18 +34,19 @@ object MeshfileAttributes { implicit object MeshfileAttributesZarr3GroupHeaderReads extends Reads[MeshfileAttributes] { override def reads(json: JsValue): JsResult[MeshfileAttributes] = { + val keyAttributes = "attributes" val keyVx = "voxelytics" val keyFormatVersion = "artifact_schema_version" val keyArtifactAttrs = "artifact_attributes" - val meshfileAttrs = json \ keyVx \ keyArtifactAttrs + val meshfileAttrs = json \ keyAttributes \ keyVx \ keyArtifactAttrs for { - formatVersion <- (json \ keyVx \ keyFormatVersion).validate[Long] + formatVersion <- (json \ keyAttributes \ keyVx \ keyFormatVersion).validate[Long] meshFormat <- (meshfileAttrs \ "mesh_format").validate[String] lodScaleMultiplier <- (meshfileAttrs \ "lod_scale_multiplier").validate[Double] transform <- (meshfileAttrs \ "transform").validate[Array[Array[Double]]] hashFunction <- (meshfileAttrs \ "hash_function").validate[String] nBuckets <- (meshfileAttrs \ "n_buckets").validate[Int] - mappingName <- (meshfileAttrs \ "mappingName").validateOpt[String] + mappingName <- (meshfileAttrs \ "mapping_name").validateOpt[String] } yield MeshfileAttributes( formatVersion, @@ -69,9 +71,11 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa // TODO rename meshFile to meshfile? - // TODO cache? - private def readMeshFileAttributes(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, - tc: TokenContext): Fox[MeshfileAttributes] = + private lazy val openArraysCache = AlfuCache[(MeshfileKey, String), DatasetArray]() + private lazy val attributesCache = AlfuCache[MeshfileKey, MeshfileAttributes]() + + private def readMeshFileAttributesImpl(meshFileKey: MeshfileKey)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[MeshfileAttributes] = for { groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(meshFileKey.attachment.path, None)) groupHeaderBytes <- (groupVaultPath / MeshfileAttributes.FILENAME_ZARR_JSON).readBytes() @@ -80,24 +84,28 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa .toFox ?~> "Could not parse meshfile attributes from zarr group file" } yield meshfileAttributes - def readMeshfileMetadata(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, + private def readMeshfileAttributes(meshfileKey: MeshfileKey)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[MeshfileAttributes] = + attributesCache.getOrLoad(meshfileKey, key => readMeshFileAttributesImpl(key)) + + def readMeshfileMetadata(meshFileKey: MeshfileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[(String, Double, Array[Array[Double]])] = for { - meshfileAttributes <- readMeshFileAttributes(meshFileKey) + meshfileAttributes <- readMeshfileAttributes(meshFileKey) } yield (meshfileAttributes.meshFormat, meshfileAttributes.lodScaleMultiplier, meshfileAttributes.transform) - def versionForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = + def versionForMeshFile(meshFileKey: MeshfileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = for { - meshfileAttributes <- readMeshFileAttributes(meshFileKey) + meshfileAttributes <- readMeshfileAttributes(meshFileKey) } yield meshfileAttributes.formatVersion - def mappingNameForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, + def mappingNameForMeshFile(meshFileKey: MeshfileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Option[String]] = for { - meshfileAttributes <- readMeshFileAttributes(meshFileKey) + meshfileAttributes <- readMeshfileAttributes(meshFileKey) } yield meshfileAttributes.mappingName - def listMeshChunksForSegment(meshFileKey: MeshFileKey, segmentId: Long, meshfileAttributes: MeshfileAttributes)( + def listMeshChunksForSegment(meshFileKey: MeshfileKey, segmentId: Long, meshfileAttributes: MeshfileAttributes)( implicit ec: ExecutionContext, tc: TokenContext): Fox[List[MeshLodInfo]] = for { @@ -118,7 +126,7 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa segmentId) private def getNeuroglancerSegmentManifestOffsets( - meshFileKey: MeshFileKey, + meshFileKey: MeshfileKey, meshfileAttributes: MeshfileAttributes, segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[(Long, Long)] = { val bucketIndex = meshfileAttributes.applyHashFunction(segmentId) % meshfileAttributes.nBuckets @@ -131,8 +139,7 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa _ <- Fox.fromBool(bucketSize > 0) ?~> s"No entry for segment $segmentId" bucketsArray <- openZarrArray(meshFileKey, keyBuckets) bucket <- bucketsArray.readAsMultiArray(offset = Array(bucketStart, 0), shape = Array(bucketSize + 1, 3)) - bucketLocalOffset <- findLocalOffsetInBucket(bucket, segmentId).toFox - _ <- Fox.fromBool(bucketLocalOffset >= 0) ?~> s"SegmentId $segmentId not in bucket list" + bucketLocalOffset <- findLocalOffsetInBucket(bucket, segmentId).toFox ?~> s"SegmentId $segmentId not in bucket list" neuroglancerStart = bucket.getLong(bucket.getIndex.set(Array(bucketLocalOffset, 1))) neuroglancerEnd = bucket.getLong(bucket.getIndex.set(Array(bucketLocalOffset, 2))) } yield (neuroglancerStart, neuroglancerEnd) @@ -141,8 +148,12 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa private def findLocalOffsetInBucket(bucket: MultiArray, segmentId: Long): Option[Int] = (0 until bucket.getShape()(0)).find(idx => bucket.getLong(bucket.getIndex.set(Array(idx, 0))) == segmentId) - private def openZarrArray(meshFileKey: MeshFileKey, zarrArrayName: String)(implicit ec: ExecutionContext, + private def openZarrArray(meshFileKey: MeshfileKey, zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext): Fox[DatasetArray] = + openArraysCache.getOrLoad((meshFileKey, zarrArrayName), _ => openZarrArrayImpl(meshFileKey, zarrArrayName)) + + private def openZarrArrayImpl(meshFileKey: MeshfileKey, zarrArrayName: String)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[DatasetArray] = for { groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(meshFileKey.attachment.path, None)) zarrArray <- Zarr3Array.open(groupVaultPath / zarrArrayName, @@ -166,12 +177,12 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa transform: Array[Array[Double]], lod: Int): Array[Array[Double]] = transform - def listMeshChunksForMultipleSegments(meshFileKey: MeshFileKey, segmentIds: Seq[Long])( + def listMeshChunksForMultipleSegments(meshFileKey: MeshfileKey, segmentIds: Seq[Long])( implicit ec: ExecutionContext, tc: TokenContext, m: MessagesProvider): Fox[WebknossosSegmentInfo] = for { - meshfileAttributes <- readMeshFileAttributes(meshFileKey) + meshfileAttributes <- readMeshfileAttributes(meshFileKey) meshChunksForUnmappedSegments: List[List[MeshLodInfo]] <- listMeshChunksForSegmentsNested(meshFileKey, segmentIds, meshfileAttributes) @@ -184,7 +195,7 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa .toFox } yield wkChunkInfos - private def listMeshChunksForSegmentsNested(meshFileKey: MeshFileKey, + private def listMeshChunksForSegmentsNested(meshFileKey: MeshfileKey, segmentIds: Seq[Long], meshfileAttributes: MeshfileAttributes)( implicit ec: ExecutionContext, @@ -193,11 +204,11 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa listMeshChunksForSegment(meshFileKey, segmentId, meshfileAttributes) } - def readMeshChunk(meshFileKey: MeshFileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest])( + def readMeshChunk(meshFileKey: MeshfileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest])( implicit ec: ExecutionContext, tc: TokenContext): Fox[(Array[Byte], String)] = for { - meshfileAttributes <- readMeshFileAttributes(meshFileKey) + meshfileAttributes <- readMeshfileAttributes(meshFileKey) // TODO skip sorting in zarr case? use parallel requests instead? // Sort the requests by byte offset to optimize for spinning disk access @@ -216,7 +227,14 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa dataSortedFlat = dataSorted.flatMap(d => d._1).toArray } yield (dataSortedFlat, meshfileAttributes.meshFormat) - def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = - // TODO - 0 + def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = { + attributesCache.clear { meshFileKey => + meshFileKey.dataSourceId == dataSourceId && layerNameOpt.forall(meshFileKey.layerName == _) + } + + openArraysCache.clear { + case (meshFileKey, _) => + meshFileKey.dataSourceId == dataSourceId && layerNameOpt.forall(meshFileKey.layerName == _) + } + } } From 331d1787bd54af98c8fd7b9bebbd1b94b4148647 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 17 Jun 2025 14:52:47 +0200 Subject: [PATCH 048/100] some cleanup --- .../services/AgglomerateService.scala | 9 ++++---- .../services/BinaryDataServiceHolder.scala | 21 ++++--------------- .../services/ZarrAgglomerateService.scala | 7 ++++--- .../services/mesh/DSFullMeshService.scala | 5 ++++- .../services/mesh/ZarrMeshFileService.scala | 4 +--- .../tracings/IdWithBoolUtils.scala | 1 - 6 files changed, 18 insertions(+), 29 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 4ff6874d44b..431d33af4bd 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -24,13 +24,14 @@ import org.apache.commons.io.FilenameUtils import java.net.URI import java.nio.file.Paths +import javax.inject.Inject import scala.concurrent.ExecutionContext import scala.concurrent.duration.DurationInt -class AgglomerateService(config: DataStoreConfig, - zarrAgglomerateService: ZarrAgglomerateService, - hdf5AgglomerateService: Hdf5AgglomerateService, - remoteSourceDescriptorService: RemoteSourceDescriptorService) +class AgglomerateService @Inject()(config: DataStoreConfig, + zarrAgglomerateService: ZarrAgglomerateService, + hdf5AgglomerateService: Hdf5AgglomerateService, + remoteSourceDescriptorService: RemoteSourceDescriptorService) extends LazyLogging with FoxImplicits { private val agglomerateDir = "agglomerates" diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala index db99771d13b..cb14a24e924 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala @@ -2,37 +2,24 @@ package com.scalableminds.webknossos.datastore.services import java.nio.file.Paths import com.scalableminds.webknossos.datastore.DataStoreConfig -import com.scalableminds.webknossos.datastore.storage.{DataVaultService, RemoteSourceDescriptorService} -import com.typesafe.scalalogging.LazyLogging +import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService import javax.inject.Inject import scala.concurrent.ExecutionContext /* - * The BinaryDataService needs to be instantiated as singleton to provide a shared DataCubeCache. + * The BinaryDataService needs to be instantiated as singleton to provide a shared bucketProviderCache. * There is, however an additional instance for volume tracings in the TracingStore * The TracingStore one (for VolumeTracings) already is a singleton, since the surrounding VolumeTracingService is a singleton. * The DataStore one is singleton-ized via this holder. + * Also, this allows giving the datastore-only sharedChunkContentsCache to the datastore one, while passing None to the tracingstore one. */ class BinaryDataServiceHolder @Inject()(config: DataStoreConfig, remoteSourceDescriptorService: RemoteSourceDescriptorService, datasetErrorLoggingService: DSDatasetErrorLoggingService, chunkCacheService: ChunkCacheService, - dataVaultService: DataVaultService)(implicit ec: ExecutionContext) - extends LazyLogging { - - // TODO make them injectable again - val zarrAgglomerateService = - new ZarrAgglomerateService(config, dataVaultService, chunkCacheService.sharedChunkContentsCache) - val hdf5AgglomerateService = new Hdf5AgglomerateService(config) - - val agglomerateService = new AgglomerateService( - config, - zarrAgglomerateService, - hdf5AgglomerateService, - remoteSourceDescriptorService - ) + agglomerateService: AgglomerateService)(implicit ec: ExecutionContext) { val binaryDataService: BinaryDataService = new BinaryDataService( Paths.get(config.Datastore.baseDirectory), diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala index 868c1ca70d6..dd9fad44e06 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala @@ -18,12 +18,13 @@ import net.liftweb.common.Box.tryo import ucar.ma2.{Array => MultiArray} import java.nio.{ByteBuffer, ByteOrder, LongBuffer} +import javax.inject.Inject import scala.collection.compat.immutable.ArraySeq import scala.concurrent.ExecutionContext -class ZarrAgglomerateService(config: DataStoreConfig, - dataVaultService: DataVaultService, - sharedChunkContentsCache: AlfuCache[String, MultiArray]) +class ZarrAgglomerateService @Inject()(config: DataStoreConfig, + dataVaultService: DataVaultService, + sharedChunkContentsCache: AlfuCache[String, MultiArray]) extends DataConverter with LazyLogging { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala index 8301db64f40..5c0fa2c5132 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala @@ -37,7 +37,6 @@ object FullMeshRequest { class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, meshFileService: MeshFileService, - neuroglancerPrecomputedMeshService: NeuroglancerPrecomputedMeshFileService, val binaryDataServiceHolder: BinaryDataServiceHolder, val dsRemoteWebknossosClient: DSRemoteWebknossosClient, val dsRemoteTracingstoreClient: DSRemoteTracingstoreClient, @@ -167,6 +166,7 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, stlEncodedChunk <- getStlEncodedChunkFromDraco(chunkInfo, transform, dracoMeshChunkBytes) } yield stlEncodedChunk + /* // TODO delete if above works also for neuroglancer private def loadFullMeshFromRemoteNeuroglancerMeshFile(meshFileKey: MeshfileKey, fullMeshRequest: FullMeshRequest)( implicit ec: ExecutionContext, @@ -215,6 +215,9 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, stlEncodedChunk <- getStlEncodedChunkFromDraco(chunkInfo, transform, dracoMeshChunkBytes, vertexQuantizationBits) } yield stlEncodedChunk + + */ + private def getStlEncodedChunkFromDraco( chunkInfo: MeshChunk, transform: Array[Array[Double]], diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala index cfabc7b5bac..7a41858d614 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala @@ -69,9 +69,7 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa private val keyBuckets = "buckets" private val keyNeuroglancer = "neuroglancer" - // TODO rename meshFile to meshfile? - - private lazy val openArraysCache = AlfuCache[(MeshfileKey, String), DatasetArray]() + private lazy val openArraysCache = AlfuCache[(MeshfileKey, String), DatasetArray]() private lazy val attributesCache = AlfuCache[MeshfileKey, MeshfileAttributes]() private def readMeshFileAttributesImpl(meshFileKey: MeshfileKey)(implicit ec: ExecutionContext, diff --git a/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/tracings/IdWithBoolUtils.scala b/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/tracings/IdWithBoolUtils.scala index 07d3096ea0a..ccf2c255cfa 100644 --- a/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/tracings/IdWithBoolUtils.scala +++ b/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/tracings/IdWithBoolUtils.scala @@ -5,7 +5,6 @@ import com.scalableminds.webknossos.datastore.IdWithBool.{Id32WithBool, Id64With trait IdWithBoolUtils { // Protobuf classes Id32ToBool and Id64ToBool are used to store maps from id to boolean flags in annotation user state // This trait provides utility methods to mutate sequences of these, and conversions to and from Map and mutableMap - // TODO naming! protected def id32WithBoolsToMutableMap(idWithBools: Seq[Id32WithBool]): collection.mutable.Map[Int, Boolean] = idWithBools.map { idWithBool => From 43d90519056639ebeb7caae777f5765d3a0e2b7a Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 17 Jun 2025 14:56:41 +0200 Subject: [PATCH 049/100] in list request, only return successes --- .../services/mesh/MeshFileService.scala | 24 ++++++++++--------- .../services/mesh/ZarrMeshFileService.scala | 3 +-- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala index 0e1923f5474..e11a43bd996 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala @@ -52,7 +52,6 @@ object MeshChunkDataRequestList { implicit val jsonFormat: OFormat[MeshChunkDataRequestList] = Json.format[MeshChunkDataRequestList] } -// TODO should this become a generic AttachmentKey? case class MeshfileKey(dataSourceId: DataSourceId, layerName: String, attachment: LayerAttachment) // Sent to wk frontend @@ -131,16 +130,19 @@ class MeshFileService @Inject()(config: DataStoreConfig, val allMeshFileNames = attachedMeshFileNames ++ scannedMeshFileNames - // TODO skip failures - Fox.serialCombined(allMeshFileNames) { meshFileName => - for { - meshFileKey <- lookUpMeshFile(dataSourceId, dataLayer, meshFileName) ?~> Messages("mesh.file.lookup.failed", - meshFileName) - formatVersion <- versionForMeshFile(meshFileKey) ?~> Messages("mesh.file.readVersion.failed", meshFileName) - mappingName <- mappingNameForMeshFile(meshFileKey) ?~> Messages("mesh.file.readMappingName.failed", - meshFileName) - } yield MeshfileInfo(meshFileName, mappingName, formatVersion) - } + Fox.fromFuture( + Fox + .serialSequence(allMeshFileNames.toSeq) { meshFileName => + for { + meshFileKey <- lookUpMeshFile(dataSourceId, dataLayer, meshFileName) ?~> Messages("mesh.file.lookup.failed", + meshFileName) + formatVersion <- versionForMeshFile(meshFileKey) ?~> Messages("mesh.file.readVersion.failed", meshFileName) + mappingName <- mappingNameForMeshFile(meshFileKey) ?~> Messages("mesh.file.readMappingName.failed", + meshFileName) + } yield MeshfileInfo(meshFileName, mappingName, formatVersion) + } + // Only return successes, we don’t want a malformed file breaking the list request. + .map(_.flatten)) } // Same as above but this variant constructs the meshFilePath itself and converts null to None diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala index 7a41858d614..9e0c0591239 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala @@ -69,7 +69,7 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa private val keyBuckets = "buckets" private val keyNeuroglancer = "neuroglancer" - private lazy val openArraysCache = AlfuCache[(MeshfileKey, String), DatasetArray]() + private lazy val openArraysCache = AlfuCache[(MeshfileKey, String), DatasetArray]() private lazy val attributesCache = AlfuCache[MeshfileKey, MeshfileAttributes]() private def readMeshFileAttributesImpl(meshFileKey: MeshfileKey)(implicit ec: ExecutionContext, @@ -208,7 +208,6 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa for { meshfileAttributes <- readMeshfileAttributes(meshFileKey) - // TODO skip sorting in zarr case? use parallel requests instead? // Sort the requests by byte offset to optimize for spinning disk access requestsReordered = meshChunkDataRequests.zipWithIndex .sortBy(requestAndIndex => requestAndIndex._1.byteOffset) From d6a98179a1be0103dd367bb47f683b64b0f242a1 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 17 Jun 2025 14:59:33 +0200 Subject: [PATCH 050/100] add migration to guide --- MIGRATIONS.unreleased.md | 1 + 1 file changed, 1 insertion(+) diff --git a/MIGRATIONS.unreleased.md b/MIGRATIONS.unreleased.md index e908e75053d..25484e25f87 100644 --- a/MIGRATIONS.unreleased.md +++ b/MIGRATIONS.unreleased.md @@ -12,3 +12,4 @@ User-facing changes are documented in the [changelog](CHANGELOG.released.md). ### Postgres Evolutions: - [134-dataset-layer-attachments.sql](conf/evolutions/134-dataset-layer-attachments.sql) +- [135-neuroglancer-attachment.sql](conf/evolutions/135-neuroglancer-attachment.sql) From 73a7ac20940f7c72daa4fd1fd91d748205b13932 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 17 Jun 2025 15:07:46 +0200 Subject: [PATCH 051/100] unify spelling meshFile --- .../controllers/DSMeshController.scala | 4 +- .../services/mesh/DSFullMeshService.scala | 14 +-- .../services/mesh/Hdf5MeshFileService.scala | 18 +-- .../services/mesh/MeshFileService.scala | 28 ++--- .../services/mesh/MeshMappingHelper.scala | 6 +- ...uroglancerPrecomputedMeshFileService.scala | 18 +-- .../services/mesh/ZarrMeshFileService.scala | 104 +++++++++--------- .../datastore/storage/Hdf5FileCache.scala | 4 +- .../tracings/volume/TSFullMeshService.scala | 4 +- 9 files changed, 100 insertions(+), 100 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala index cc6bd947865..fb2eeba0081 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala @@ -46,10 +46,10 @@ class DSMeshController @Inject()( def listMeshChunksForSegment(organizationId: String, datasetDirectoryName: String, dataLayerName: String, - /* If targetMappingName is set, assume that meshfile contains meshes for + /* If targetMappingName is set, assume that meshFile contains meshes for the oversegmentation. Collect mesh chunks of all *unmapped* segment ids belonging to the supplied agglomerate id. - If it is not set, use meshfile as is, assume passed id is present in meshfile + If it is not set, use meshFile as is, assume passed id is present in meshFile Note: in case of an editable mapping, targetMappingName is its baseMapping name. */ targetMappingName: Option[String], diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala index 5c0fa2c5132..2c9a5f497e8 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala @@ -60,7 +60,7 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, m: MessagesProvider, tc: TokenContext): Fox[Array[Byte]] = if (fullMeshRequest.meshFileName.isDefined) - loadFullMeshFromMeshfile(organizationId, datasetDirectoryName, dataLayerName, fullMeshRequest) + loadFullMeshFromMeshFile(organizationId, datasetDirectoryName, dataLayerName, fullMeshRequest) else loadFullMeshFromAdHoc(organizationId, datasetDirectoryName, dataLayerName, fullMeshRequest) @@ -121,7 +121,7 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, } // TODO make sure this also works for the remote neuroglancer variant. if so, delete other implementation - private def loadFullMeshFromMeshfile(organizationId: String, + private def loadFullMeshFromMeshFile(organizationId: String, datasetDirectoryName: String, dataLayerName: String, fullMeshRequest: FullMeshRequest)(implicit ec: ExecutionContext, @@ -151,10 +151,10 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, readMeshChunkAsStl(meshFileKey, chunkRange, transform) } stlOutput = combineEncodedChunksToStl(stlEncodedChunks) - _ = logMeshingDuration(before, "meshfile", stlOutput.length) + _ = logMeshingDuration(before, "meshFile", stlOutput.length) } yield stlOutput - private def readMeshChunkAsStl(meshFileKey: MeshfileKey, chunkInfo: MeshChunk, transform: Array[Array[Double]])( + private def readMeshChunkAsStl(meshFileKey: MeshFileKey, chunkInfo: MeshChunk, transform: Array[Array[Double]])( implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Byte]] = for { @@ -168,7 +168,7 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, /* // TODO delete if above works also for neuroglancer - private def loadFullMeshFromRemoteNeuroglancerMeshFile(meshFileKey: MeshfileKey, fullMeshRequest: FullMeshRequest)( + private def loadFullMeshFromRemoteNeuroglancerMeshFile(meshFileKey: MeshFileKey, fullMeshRequest: FullMeshRequest)( implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Byte]] = for { @@ -201,7 +201,7 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, } yield stlOutput private def readNeuroglancerPrecomputedMeshChunkAsStl( - meshFileKey: MeshfileKey, + meshFileKey: MeshFileKey, chunkInfo: MeshChunk, transform: Array[Array[Double]], segmentId: Option[Long], @@ -224,7 +224,7 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, dracoBytes: Array[Byte], vertexQuantizationBits: Int = 0)(implicit ec: ExecutionContext): Fox[Array[Byte]] = for { - scale <- tryo(Vec3Double(transform(0)(0), transform(1)(1), transform(2)(2))).toFox ?~> "could not extract scale from meshfile transform attribute" + scale <- tryo(Vec3Double(transform(0)(0), transform(1)(1), transform(2)(2))).toFox ?~> "could not extract scale from mesh file transform attribute" stlEncodedChunk <- tryo( dracoToStlConverter.dracoToStl(dracoBytes, chunkInfo.position.x, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala index c5e76ca966b..97b700303ba 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala @@ -19,7 +19,7 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance private lazy val meshFileCache = new Hdf5FileCache(30) - def mappingNameForMeshFile(meshFileKey: MeshfileKey): Box[Option[String]] = tryo { + def mappingNameForMeshFile(meshFileKey: MeshFileKey): Box[Option[String]] = tryo { meshFileCache .withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => cachedMeshFile.mappingName @@ -30,14 +30,14 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance } } - private def readMeshfileMetadata(meshFileKey: MeshfileKey): Box[(String, Double, Array[Array[Double]])] = + private def readMeshFileMetadata(meshFileKey: MeshFileKey): Box[(String, Double, Array[Array[Double]])] = meshFileCache.withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => val lodScaleMultiplier = cachedMeshFile.float64Reader.getAttr("/", "lod_scale_multiplier") val transform = cachedMeshFile.float64Reader.getMatrixAttr("/", "transform") (cachedMeshFile.meshFormat, lodScaleMultiplier, transform) } - private def listMeshChunksForSegmentsNested(meshFileKey: MeshfileKey, + private def listMeshChunksForSegmentsNested(meshFileKey: MeshFileKey, segmentIds: Seq[Long], lodScaleMultiplier: Double, transform: Array[Array[Double]]): List[List[MeshLodInfo]] = @@ -99,7 +99,7 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance transform: Array[Array[Double]], lod: Int): Array[Array[Double]] = transform - def versionForMeshFile(meshFileKey: MeshfileKey): Long = + def versionForMeshFile(meshFileKey: MeshFileKey): Long = meshFileCache .withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => cachedMeshFile.artifactSchemaVersion @@ -107,16 +107,16 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance .toOption .getOrElse(0) - def readMeshChunk(meshFileKey: MeshfileKey, + def readMeshChunk(meshFileKey: MeshFileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest]): Box[(Array[Byte], String)] = for { resultBox <- meshFileCache.withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => - readMeshChunkFromCachedMeshfile(cachedMeshFile, meshChunkDataRequests) + readMeshChunkFromCachedMeshFile(cachedMeshFile, meshChunkDataRequests) } (output, encoding) <- resultBox } yield (output, encoding) - private def readMeshChunkFromCachedMeshfile( + private def readMeshChunkFromCachedMeshFile( cachedMeshFile: CachedHdf5File, meshChunkDataRequests: Seq[MeshChunkDataRequest]): Box[(Array[Byte], String)] = { val meshFormat = cachedMeshFile.meshFormat @@ -135,11 +135,11 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance Full((dataSorted.flatMap(d => d._1).toArray, meshFormat)) } - def listMeshChunksForMultipleSegments(meshFileKey: MeshfileKey, segmentIds: Seq[Long])( + def listMeshChunksForMultipleSegments(meshFileKey: MeshFileKey, segmentIds: Seq[Long])( implicit ec: ExecutionContext, m: MessagesProvider): Fox[WebknossosSegmentInfo] = for { - (meshFormat, lodScaleMultiplier, transform) <- readMeshfileMetadata(meshFileKey).toFox + (meshFormat, lodScaleMultiplier, transform) <- readMeshFileMetadata(meshFileKey).toFox meshChunksForUnmappedSegments: List[List[MeshLodInfo]] = listMeshChunksForSegmentsNested(meshFileKey, segmentIds, lodScaleMultiplier, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala index e11a43bd996..a712f532556 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala @@ -52,17 +52,17 @@ object MeshChunkDataRequestList { implicit val jsonFormat: OFormat[MeshChunkDataRequestList] = Json.format[MeshChunkDataRequestList] } -case class MeshfileKey(dataSourceId: DataSourceId, layerName: String, attachment: LayerAttachment) +case class MeshFileKey(dataSourceId: DataSourceId, layerName: String, attachment: LayerAttachment) // Sent to wk frontend -case class MeshfileInfo( +case class MeshFileInfo( name: String, mappingName: Option[String], formatVersion: Long ) -object MeshfileInfo { - implicit val jsonFormat: OFormat[MeshfileInfo] = Json.format[MeshfileInfo] +object MeshFileInfo { + implicit val jsonFormat: OFormat[MeshFileInfo] = Json.format[MeshFileInfo] } class MeshFileService @Inject()(config: DataStoreConfig, @@ -77,16 +77,16 @@ class MeshFileService @Inject()(config: DataStoreConfig, private val meshesDir = "meshes" private val meshFileKeyCache - : AlfuCache[(DataSourceId, String, String), MeshfileKey] = AlfuCache() // dataSourceId, layerName, mappingName → MeshFileKey + : AlfuCache[(DataSourceId, String, String), MeshFileKey] = AlfuCache() // dataSourceId, layerName, mappingName → MeshFileKey def lookUpMeshFile(dataSourceId: DataSourceId, dataLayer: DataLayer, meshFileName: String)( - implicit ec: ExecutionContext): Fox[MeshfileKey] = + implicit ec: ExecutionContext): Fox[MeshFileKey] = meshFileKeyCache.getOrLoad((dataSourceId, dataLayer.name, meshFileName), _ => lookUpMeshFileImpl(dataSourceId, dataLayer, meshFileName).toFox) private def lookUpMeshFileImpl(dataSourceId: DataSourceId, dataLayer: DataLayer, - meshFileName: String): Box[MeshfileKey] = { + meshFileName: String): Box[MeshFileKey] = { val registeredAttachment: Option[LayerAttachment] = dataLayer.attachments match { case Some(attachments) => attachments.meshes.find(_.name == meshFileName) case None => None @@ -99,7 +99,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, remoteSourceDescriptorService.uriFromPathLiteral(attachment.path.toString, localDatsetDir, dataLayer.name)) }) } yield - MeshfileKey( + MeshFileKey( dataSourceId, dataLayer.name, registeredAttachmentNormalized.getOrElse( @@ -114,7 +114,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, def listMeshFiles(dataSourceId: DataSourceId, dataLayer: DataLayer)(implicit ec: ExecutionContext, tc: TokenContext, - m: MessagesProvider): Fox[Seq[MeshfileInfo]] = { + m: MessagesProvider): Fox[Seq[MeshFileInfo]] = { val attachedMeshFileNames = dataLayer.attachments.map(_.meshes).getOrElse(Seq.empty).map(_.name).toSet val layerDir = @@ -139,14 +139,14 @@ class MeshFileService @Inject()(config: DataStoreConfig, formatVersion <- versionForMeshFile(meshFileKey) ?~> Messages("mesh.file.readVersion.failed", meshFileName) mappingName <- mappingNameForMeshFile(meshFileKey) ?~> Messages("mesh.file.readMappingName.failed", meshFileName) - } yield MeshfileInfo(meshFileName, mappingName, formatVersion) + } yield MeshFileInfo(meshFileName, mappingName, formatVersion) } // Only return successes, we don’t want a malformed file breaking the list request. .map(_.flatten)) } // Same as above but this variant constructs the meshFilePath itself and converts null to None - def mappingNameForMeshFile(meshFileKey: MeshfileKey)(implicit ec: ExecutionContext, + def mappingNameForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Option[String]] = meshFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => @@ -157,7 +157,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, Fox.successful(None) } - private def versionForMeshFile(meshFileKey: MeshfileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = + private def versionForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = meshFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrMeshFileService.versionForMeshFile(meshFileKey) @@ -167,7 +167,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, Fox.successful(NeuroglancerMesh.meshInfoVersion) } - def listMeshChunksForSegmentsMerged(meshFileKey: MeshfileKey, segmentIds: Seq[Long])( + def listMeshChunksForSegmentsMerged(meshFileKey: MeshFileKey, segmentIds: Seq[Long])( implicit ec: ExecutionContext, tc: TokenContext, m: MessagesProvider): Fox[WebknossosSegmentInfo] = @@ -180,7 +180,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, neuroglancerPrecomputedMeshService.listMeshChunksForMultipleSegments(meshFileKey, segmentIds) } - def readMeshChunk(meshFileKey: MeshfileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest], + def readMeshChunk(meshFileKey: MeshFileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest], )(implicit ec: ExecutionContext, tc: TokenContext): Fox[(Array[Byte], String)] = meshFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrMeshFileService.readMeshChunk(meshFileKey, meshChunkDataRequests) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala index 6bb64a978b0..256e23d0149 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala @@ -29,13 +29,13 @@ trait MeshMappingHelper extends FoxImplicits { )(implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = (targetMappingName, editableMappingTracingId) match { case (None, None) => - // No mapping selected, assume id matches meshfile + // No mapping selected, assume id matches meshFile Fox.successful(List(agglomerateId)) case (Some(mappingName), None) if mappingNameForMeshFile.contains(mappingName) => - // Mapping selected, but meshfile has the same mapping name in its metadata, assume id matches meshfile + // Mapping selected, but meshFile has the same mapping name in its metadata, assume id matches meshFile Fox.successful(List(agglomerateId)) case (Some(mappingName), None) => - // Mapping selected, but meshfile does not have matching mapping name in its metadata, + // Mapping selected, but meshFile does not have matching mapping name in its metadata, // assume agglomerate id, fetch oversegmentation segment ids for it for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala index f3b29536c06..629791cb33d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/NeuroglancerPrecomputedMeshFileService.scala @@ -28,11 +28,11 @@ class NeuroglancerPrecomputedMeshFileService @Inject()(dataVaultService: DataVau extends FoxImplicits with NeuroglancerMeshHelper { - private lazy val meshInfoCache = AlfuCache[MeshfileKey, NeuroglancerMesh](100) + private lazy val meshInfoCache = AlfuCache[MeshFileKey, NeuroglancerMesh](100) - private def loadRemoteMeshInfo(meshfileKey: MeshfileKey)(implicit tc: TokenContext): Fox[NeuroglancerMesh] = + private def loadRemoteMeshInfo(meshFileKey: MeshFileKey)(implicit tc: TokenContext): Fox[NeuroglancerMesh] = for { - vaultPath <- dataVaultService.getVaultPath(meshfileKey.attachment) + vaultPath <- dataVaultService.getVaultPath(meshFileKey.attachment) meshInfoPath = vaultPath / NeuroglancerMesh.FILENAME_INFO meshInfo <- meshInfoPath.parseAsJson[NeuroglancerPrecomputedMeshInfo] ?~> "Failed to read mesh info" _ <- Fox.fromBool(meshInfo.transform.length == 12) ?~> "Invalid mesh info: transform has to be of length 12" @@ -72,7 +72,7 @@ class NeuroglancerPrecomputedMeshFileService @Inject()(dataVaultService: DataVau ) } - def listMeshChunksForMultipleSegments(meshFileKey: MeshfileKey, segmentId: Seq[Long])( + def listMeshChunksForMultipleSegments(meshFileKey: MeshFileKey, segmentId: Seq[Long])( implicit tc: TokenContext): Fox[WebknossosSegmentInfo] = for { vaultPath <- dataVaultService.getVaultPath(meshFileKey.attachment) @@ -101,13 +101,13 @@ class NeuroglancerPrecomputedMeshFileService @Inject()(dataVaultService: DataVau segmentId) } yield meshSegmentInfo - def readMeshChunk(meshfileKey: MeshfileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest])( + def readMeshChunk(meshFileKey: MeshFileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest])( implicit tc: TokenContext): Fox[(Array[Byte], String)] = for { - vaultPath <- dataVaultService.getVaultPath(meshfileKey.attachment) + vaultPath <- dataVaultService.getVaultPath(meshFileKey.attachment) segmentId <- meshChunkDataRequests.head.segmentId.toFox ?~> "Segment id parameter is required" _ <- Fox.fromBool(meshChunkDataRequests.flatMap(_.segmentId).distinct.length == 1) ?~> "All requests must have the same segment id" - mesh <- meshInfoCache.getOrLoad(meshfileKey, loadRemoteMeshInfo) + mesh <- meshInfoCache.getOrLoad(meshFileKey, loadRemoteMeshInfo) minishardInfo = mesh.shardingSpecification.getMinishardInfo(segmentId) shardUrl = mesh.shardingSpecification.getPathForShard(vaultPath, minishardInfo._1) chunks <- Fox.serialCombined(meshChunkDataRequests.toList)(request => @@ -115,9 +115,9 @@ class NeuroglancerPrecomputedMeshFileService @Inject()(dataVaultService: DataVau output = chunks.flatten.toArray } yield (output, NeuroglancerMesh.meshEncoding) - def getVertexQuantizationBits(meshfileKey: MeshfileKey)(implicit tc: TokenContext): Fox[Int] = + def getVertexQuantizationBits(meshFileKey: MeshFileKey)(implicit tc: TokenContext): Fox[Int] = for { - meshInfo <- meshInfoCache.getOrLoad(meshfileKey, loadRemoteMeshInfo) + meshInfo <- meshInfoCache.getOrLoad(meshFileKey, loadRemoteMeshInfo) } yield meshInfo.meshInfo.vertex_quantization_bits def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala index 9e0c0591239..6d58f7c0c09 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala @@ -17,7 +17,7 @@ import ucar.ma2.{Array => MultiArray} import javax.inject.Inject import scala.concurrent.ExecutionContext -case class MeshfileAttributes( +case class MeshFileAttributes( formatVersion: Long, meshFormat: String, // AKA encoding (e.g. "draco") lodScaleMultiplier: Double, @@ -29,26 +29,26 @@ case class MeshfileAttributes( lazy val applyHashFunction: Long => Long = getHashFunction(hashFunction) } -object MeshfileAttributes { +object MeshFileAttributes { val FILENAME_ZARR_JSON = "zarr.json" - implicit object MeshfileAttributesZarr3GroupHeaderReads extends Reads[MeshfileAttributes] { - override def reads(json: JsValue): JsResult[MeshfileAttributes] = { + implicit object MeshFileAttributesZarr3GroupHeaderReads extends Reads[MeshFileAttributes] { + override def reads(json: JsValue): JsResult[MeshFileAttributes] = { val keyAttributes = "attributes" val keyVx = "voxelytics" val keyFormatVersion = "artifact_schema_version" val keyArtifactAttrs = "artifact_attributes" - val meshfileAttrs = json \ keyAttributes \ keyVx \ keyArtifactAttrs + val meshFileAttrs = json \ keyAttributes \ keyVx \ keyArtifactAttrs for { formatVersion <- (json \ keyAttributes \ keyVx \ keyFormatVersion).validate[Long] - meshFormat <- (meshfileAttrs \ "mesh_format").validate[String] - lodScaleMultiplier <- (meshfileAttrs \ "lod_scale_multiplier").validate[Double] - transform <- (meshfileAttrs \ "transform").validate[Array[Array[Double]]] - hashFunction <- (meshfileAttrs \ "hash_function").validate[String] - nBuckets <- (meshfileAttrs \ "n_buckets").validate[Int] - mappingName <- (meshfileAttrs \ "mapping_name").validateOpt[String] + meshFormat <- (meshFileAttrs \ "mesh_format").validate[String] + lodScaleMultiplier <- (meshFileAttrs \ "lod_scale_multiplier").validate[Double] + transform <- (meshFileAttrs \ "transform").validate[Array[Array[Double]]] + hashFunction <- (meshFileAttrs \ "hash_function").validate[String] + nBuckets <- (meshFileAttrs \ "n_buckets").validate[Int] + mappingName <- (meshFileAttrs \ "mapping_name").validateOpt[String] } yield - MeshfileAttributes( + MeshFileAttributes( formatVersion, meshFormat, lodScaleMultiplier, @@ -69,47 +69,47 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa private val keyBuckets = "buckets" private val keyNeuroglancer = "neuroglancer" - private lazy val openArraysCache = AlfuCache[(MeshfileKey, String), DatasetArray]() - private lazy val attributesCache = AlfuCache[MeshfileKey, MeshfileAttributes]() + private lazy val openArraysCache = AlfuCache[(MeshFileKey, String), DatasetArray]() + private lazy val attributesCache = AlfuCache[MeshFileKey, MeshFileAttributes]() - private def readMeshFileAttributesImpl(meshFileKey: MeshfileKey)(implicit ec: ExecutionContext, - tc: TokenContext): Fox[MeshfileAttributes] = + private def readMeshFileAttributesImpl(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[MeshFileAttributes] = for { groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(meshFileKey.attachment.path, None)) - groupHeaderBytes <- (groupVaultPath / MeshfileAttributes.FILENAME_ZARR_JSON).readBytes() - meshfileAttributes <- JsonHelper - .parseAs[MeshfileAttributes](groupHeaderBytes) - .toFox ?~> "Could not parse meshfile attributes from zarr group file" - } yield meshfileAttributes + groupHeaderBytes <- (groupVaultPath / MeshFileAttributes.FILENAME_ZARR_JSON).readBytes() + meshFileAttributes <- JsonHelper + .parseAs[MeshFileAttributes](groupHeaderBytes) + .toFox ?~> "Could not parse meshFile attributes from zarr group file" + } yield meshFileAttributes - private def readMeshfileAttributes(meshfileKey: MeshfileKey)(implicit ec: ExecutionContext, - tc: TokenContext): Fox[MeshfileAttributes] = - attributesCache.getOrLoad(meshfileKey, key => readMeshFileAttributesImpl(key)) + private def readMeshFileAttributes(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[MeshFileAttributes] = + attributesCache.getOrLoad(meshFileKey, key => readMeshFileAttributesImpl(key)) - def readMeshfileMetadata(meshFileKey: MeshfileKey)(implicit ec: ExecutionContext, + def readMeshFileMetadata(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[(String, Double, Array[Array[Double]])] = for { - meshfileAttributes <- readMeshfileAttributes(meshFileKey) - } yield (meshfileAttributes.meshFormat, meshfileAttributes.lodScaleMultiplier, meshfileAttributes.transform) + meshFileAttributes <- readMeshFileAttributes(meshFileKey) + } yield (meshFileAttributes.meshFormat, meshFileAttributes.lodScaleMultiplier, meshFileAttributes.transform) - def versionForMeshFile(meshFileKey: MeshfileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = + def versionForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = for { - meshfileAttributes <- readMeshfileAttributes(meshFileKey) - } yield meshfileAttributes.formatVersion + meshFileAttributes <- readMeshFileAttributes(meshFileKey) + } yield meshFileAttributes.formatVersion - def mappingNameForMeshFile(meshFileKey: MeshfileKey)(implicit ec: ExecutionContext, + def mappingNameForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Option[String]] = for { - meshfileAttributes <- readMeshfileAttributes(meshFileKey) - } yield meshfileAttributes.mappingName + meshFileAttributes <- readMeshFileAttributes(meshFileKey) + } yield meshFileAttributes.mappingName - def listMeshChunksForSegment(meshFileKey: MeshfileKey, segmentId: Long, meshfileAttributes: MeshfileAttributes)( + def listMeshChunksForSegment(meshFileKey: MeshFileKey, segmentId: Long, meshFileAttributes: MeshFileAttributes)( implicit ec: ExecutionContext, tc: TokenContext): Fox[List[MeshLodInfo]] = for { (neuroglancerSegmentManifestStart, neuroglancerSegmentManifestEnd) <- getNeuroglancerSegmentManifestOffsets( meshFileKey, - meshfileAttributes, + meshFileAttributes, segmentId) neuroglancerArray <- openZarrArray(meshFileKey, keyNeuroglancer) manifestBytes <- neuroglancerArray.readAsMultiArray( @@ -118,16 +118,16 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa segmentManifest <- tryo(NeuroglancerSegmentManifest.fromBytes(manifestBytes.getStorage.asInstanceOf[Array[Byte]])).toFox } yield enrichSegmentInfo(segmentManifest, - meshfileAttributes.lodScaleMultiplier, - meshfileAttributes.transform, + meshFileAttributes.lodScaleMultiplier, + meshFileAttributes.transform, neuroglancerSegmentManifestStart, segmentId) private def getNeuroglancerSegmentManifestOffsets( - meshFileKey: MeshfileKey, - meshfileAttributes: MeshfileAttributes, + meshFileKey: MeshFileKey, + meshFileAttributes: MeshFileAttributes, segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[(Long, Long)] = { - val bucketIndex = meshfileAttributes.applyHashFunction(segmentId) % meshfileAttributes.nBuckets + val bucketIndex = meshFileAttributes.applyHashFunction(segmentId) % meshFileAttributes.nBuckets for { bucketOffsetsArray <- openZarrArray(meshFileKey, keyBucketOffsets) bucketRange <- bucketOffsetsArray.readAsMultiArray(offset = bucketIndex, shape = 2) @@ -146,11 +146,11 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa private def findLocalOffsetInBucket(bucket: MultiArray, segmentId: Long): Option[Int] = (0 until bucket.getShape()(0)).find(idx => bucket.getLong(bucket.getIndex.set(Array(idx, 0))) == segmentId) - private def openZarrArray(meshFileKey: MeshfileKey, zarrArrayName: String)(implicit ec: ExecutionContext, + private def openZarrArray(meshFileKey: MeshFileKey, zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext): Fox[DatasetArray] = openArraysCache.getOrLoad((meshFileKey, zarrArrayName), _ => openZarrArrayImpl(meshFileKey, zarrArrayName)) - private def openZarrArrayImpl(meshFileKey: MeshfileKey, zarrArrayName: String)(implicit ec: ExecutionContext, + private def openZarrArrayImpl(meshFileKey: MeshFileKey, zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext): Fox[DatasetArray] = for { groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(meshFileKey.attachment.path, None)) @@ -175,38 +175,38 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa transform: Array[Array[Double]], lod: Int): Array[Array[Double]] = transform - def listMeshChunksForMultipleSegments(meshFileKey: MeshfileKey, segmentIds: Seq[Long])( + def listMeshChunksForMultipleSegments(meshFileKey: MeshFileKey, segmentIds: Seq[Long])( implicit ec: ExecutionContext, tc: TokenContext, m: MessagesProvider): Fox[WebknossosSegmentInfo] = for { - meshfileAttributes <- readMeshfileAttributes(meshFileKey) + meshFileAttributes <- readMeshFileAttributes(meshFileKey) meshChunksForUnmappedSegments: List[List[MeshLodInfo]] <- listMeshChunksForSegmentsNested(meshFileKey, segmentIds, - meshfileAttributes) + meshFileAttributes) _ <- Fox.fromBool(meshChunksForUnmappedSegments.nonEmpty) ?~> "zero chunks" ?~> Messages( "mesh.file.listChunks.failed", segmentIds.mkString(","), meshFileKey.attachment.name) wkChunkInfos <- WebknossosSegmentInfo - .fromMeshInfosAndMetadata(meshChunksForUnmappedSegments, meshfileAttributes.meshFormat) + .fromMeshInfosAndMetadata(meshChunksForUnmappedSegments, meshFileAttributes.meshFormat) .toFox } yield wkChunkInfos - private def listMeshChunksForSegmentsNested(meshFileKey: MeshfileKey, + private def listMeshChunksForSegmentsNested(meshFileKey: MeshFileKey, segmentIds: Seq[Long], - meshfileAttributes: MeshfileAttributes)( + meshFileAttributes: MeshFileAttributes)( implicit ec: ExecutionContext, tc: TokenContext): Fox[List[List[MeshLodInfo]]] = Fox.serialCombined(segmentIds) { segmentId => - listMeshChunksForSegment(meshFileKey, segmentId, meshfileAttributes) + listMeshChunksForSegment(meshFileKey, segmentId, meshFileAttributes) } - def readMeshChunk(meshFileKey: MeshfileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest])( + def readMeshChunk(meshFileKey: MeshFileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest])( implicit ec: ExecutionContext, tc: TokenContext): Fox[(Array[Byte], String)] = for { - meshfileAttributes <- readMeshfileAttributes(meshFileKey) + meshFileAttributes <- readMeshFileAttributes(meshFileKey) // Sort the requests by byte offset to optimize for spinning disk access requestsReordered = meshChunkDataRequests.zipWithIndex @@ -222,7 +222,7 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa } dataSorted = data.sortBy(d => d._2) dataSortedFlat = dataSorted.flatMap(d => d._1).toArray - } yield (dataSortedFlat, meshfileAttributes.meshFormat) + } yield (dataSortedFlat, meshFileAttributes.meshFormat) def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = { attributesCache.clear { meshFileKey => diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala index 945ffd06d4b..79530a557b4 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala @@ -36,12 +36,12 @@ class CachedHdf5File(reader: IHDF5Reader) lazy val stringReader: IHDF5StringReader = reader.string() lazy val float64Reader: IHDF5DoubleReader = reader.float64() - // For Meshfile + // For MeshFile lazy val nBuckets: Long = uint64Reader.getAttr("/", "n_buckets") lazy val meshFormat: String = stringReader.getAttr("/", "mesh_format") lazy val mappingName: String = stringReader.getAttr("/", "mapping_name") - // For Meshfile and SegmentIndexFile + // For MeshFile and SegmentIndexFile lazy val hashFunction: Long => Long = getHashFunction(stringReader.getAttr("/", "hash_function")) lazy val artifactSchemaVersion: Long = int64Reader.getAttr("/", "artifact_schema_version") diff --git a/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/tracings/volume/TSFullMeshService.scala b/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/tracings/volume/TSFullMeshService.scala index 317df386a24..ef390efcdcb 100644 --- a/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/tracings/volume/TSFullMeshService.scala +++ b/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/tracings/volume/TSFullMeshService.scala @@ -42,11 +42,11 @@ class TSFullMeshService @Inject()(volumeTracingService: VolumeTracingService, for { tracing <- annotationService.findVolume(annotationId, tracingId) ?~> "tracing.notFound" data <- if (fullMeshRequest.meshFileName.isDefined) - loadFullMeshFromMeshfile(annotationId, tracingId, tracing, fullMeshRequest) + loadFullMeshFromMeshFile(annotationId, tracingId, tracing, fullMeshRequest) else loadFullMeshFromAdHoc(annotationId, tracingId, tracing, fullMeshRequest) } yield data - private def loadFullMeshFromMeshfile( + private def loadFullMeshFromMeshFile( annotationId: String, tracingId: String, tracing: VolumeTracing, From 16ae1344fd3118cc497df77a54130f7ca3d5d385 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 17 Jun 2025 15:11:06 +0200 Subject: [PATCH 052/100] fix class injection --- .../datastore/services/ZarrAgglomerateService.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala index dd9fad44e06..8ce7ef2ece2 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala @@ -24,7 +24,7 @@ import scala.concurrent.ExecutionContext class ZarrAgglomerateService @Inject()(config: DataStoreConfig, dataVaultService: DataVaultService, - sharedChunkContentsCache: AlfuCache[String, MultiArray]) + chunkCacheService: ChunkCacheService) extends DataConverter with LazyLogging { @@ -65,7 +65,7 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, None, None, None, - sharedChunkContentsCache) + chunkCacheService.sharedChunkContentsCache) } yield zarrArray def applyAgglomerate(agglomerateFileKey: AgglomerateFileKey, elementClass: ElementClass.Value)( From 80778587928c52da5e6cd6c655388ba66f5efbaa Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 18 Jun 2025 11:59:53 +0200 Subject: [PATCH 053/100] Adapt full mesh service; introduce credentials for attachments --- .../WKRemoteWorkerController.scala | 8 ++- .../explore/PrecomputedExplorer.scala | 11 +-- .../datasource/DatasetLayerAttachments.scala | 9 ++- .../services/AgglomerateService.scala | 3 +- .../services/DataSourceService.scala | 2 + .../services/ZarrAgglomerateService.scala | 11 ++- .../services/mesh/DSFullMeshService.scala | 68 ++++--------------- .../services/mesh/MeshFileService.scala | 9 ++- .../services/mesh/ZarrMeshFileService.scala | 9 +-- .../RemoteSourceDescriptorService.scala | 35 ++++++++-- 10 files changed, 84 insertions(+), 81 deletions(-) diff --git a/app/controllers/WKRemoteWorkerController.scala b/app/controllers/WKRemoteWorkerController.scala index 174aa6864c1..7c0f849c645 100644 --- a/app/controllers/WKRemoteWorkerController.scala +++ b/app/controllers/WKRemoteWorkerController.scala @@ -79,16 +79,18 @@ class WKRemoteWorkerController @Inject()(jobDAO: JobDAO, for { _ <- workerDAO.findOneByKey(key) ?~> "job.worker.notFound" jobBeforeChange <- jobDAO.findOne(id)(GlobalAccessContext) - _ <- jobDAO.updateStatus(id, request.body) + _ <- jobDAO.updateStatus(id, request.body) ?~> "job.updateStatus.failed" jobAfterChange <- jobDAO.findOne(id)(GlobalAccessContext) ?~> "job.notFound" _ = jobService.trackStatusChange(jobBeforeChange, jobAfterChange) _ <- jobService.cleanUpIfFailed(jobAfterChange) ?~> "job.cleanup.failed" _ <- Fox.runIf(request.body.state == JobState.SUCCESS) { - creditTransactionService.completeTransactionOfJob(jobAfterChange._id)(GlobalAccessContext) + creditTransactionService + .completeTransactionOfJob(jobAfterChange._id)(GlobalAccessContext) ?~> "job.creditTransaction.failed" } _ <- Fox.runIf( jobAfterChange.state != request.body.state && (request.body.state == JobState.FAILURE || request.body.state == JobState.CANCELLED)) { - creditTransactionService.refundTransactionForJob(jobAfterChange._id)(GlobalAccessContext) + creditTransactionService + .refundTransactionForJob(jobAfterChange._id)(GlobalAccessContext) ?~> "job.creditTransaction.refund.failed" } } yield Ok } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala index 7558e21bbf3..c36754f45cc 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala @@ -51,7 +51,7 @@ class PrecomputedExplorer(implicit val ec: ExecutionContext) extends RemoteLayer voxelSize <- Vec3Double.fromArray(smallestResolution).toFox mags: List[MagLocator] <- Fox.serialCombined(precomputedHeader.scales)( getMagFromScale(_, smallestResolution, remotePath, credentialId).toFox) - meshAttachments <- exploreMeshesForLayer(remotePath / precomputedHeader.meshPath) + meshAttachments <- exploreMeshesForLayer(remotePath / precomputedHeader.meshPath, credentialId) attachmentsGrouped = if (meshAttachments.nonEmpty) Some(DatasetLayerAttachments(meshes = meshAttachments)) else None layer = if (precomputedHeader.describesSegmentationLayer) { @@ -90,13 +90,16 @@ class PrecomputedExplorer(implicit val ec: ExecutionContext) extends RemoteLayer } yield MagLocator(mag, Some(path.toString), None, Some(axisOrder), channelIndex = None, credentialId) } - private def exploreMeshesForLayer(meshPath: VaultPath)(implicit tc: TokenContext): Fox[Seq[LayerAttachment]] = + private def exploreMeshesForLayer(meshPath: VaultPath, credentialId: Option[String])( + implicit tc: TokenContext): Fox[Seq[LayerAttachment]] = (for { meshInfo <- (meshPath / NeuroglancerMesh.FILENAME_INFO) .parseAsJson[NeuroglancerPrecomputedMeshInfo] ?~> "Failed to read mesh info" _ <- Fox.fromBool(meshInfo.transform.length == 12) ?~> "Invalid mesh info: transform has to be of length 12" } yield Seq( - LayerAttachment(NeuroglancerMesh.meshName, meshPath.toUri, LayerAttachmentDataformat.neuroglancerPrecomputed))) - .orElse(Fox.successful(Seq.empty)) + LayerAttachment(NeuroglancerMesh.meshName, + meshPath.toUri, + LayerAttachmentDataformat.neuroglancerPrecomputed, + credentialId))).orElse(Fox.successful(Seq.empty)) } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala index 5673becb400..41b2f3c3e8a 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala @@ -15,7 +15,9 @@ case class DatasetLayerAttachments( segmentIndex: Option[LayerAttachment] = None, connectomes: Seq[LayerAttachment] = Seq.empty, cumsum: Option[LayerAttachment] = None -) +) { + def allAttachments: Seq[LayerAttachment] = meshes ++ agglomerates ++ segmentIndex ++ connectomes ++ cumsum +} object DatasetLayerAttachments { implicit val jsonFormat: Format[DatasetLayerAttachments] = @@ -32,7 +34,10 @@ object LayerAttachmentType extends ExtendedEnumeration { val mesh, agglomerate, segmentIndex, connectome, cumsum = Value } -case class LayerAttachment(name: String, path: URI, dataFormat: LayerAttachmentDataformat.LayerAttachmentDataformat) +case class LayerAttachment(name: String, + path: URI, + dataFormat: LayerAttachmentDataformat.LayerAttachmentDataformat, + credentialId: Option[String] = None) object LayerAttachment { implicit val jsonFormat: Format[LayerAttachment] = Json.format[LayerAttachment] diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 431d33af4bd..1849e2cd7fb 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -22,7 +22,6 @@ import net.liftweb.common.Box import net.liftweb.common.Box.tryo import org.apache.commons.io.FilenameUtils -import java.net.URI import java.nio.file.Paths import javax.inject.Inject import scala.concurrent.ExecutionContext @@ -104,7 +103,7 @@ class AgglomerateService @Inject()(config: DataStoreConfig, registeredAttachmentNormalized.getOrElse( LayerAttachment( mappingName, - new URI(dataBaseDir.resolve(dataLayer.name).resolve(agglomerateDir).toString), + dataBaseDir.resolve(dataLayer.name).resolve(agglomerateDir).toUri, LayerAttachmentDataformat.hdf5 ) ) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala index f7df1834983..d14c74d38b9 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala @@ -366,6 +366,8 @@ class DataSourceService @Inject()( dataLayer <- dataLayerOpt _ = dataLayer.mags.foreach(mag => remoteSourceDescriptorService.removeVaultFromCache(dataBaseDir, dataSource.id, dataLayer.name, mag)) + _ = dataLayer.attachments.foreach(_.allAttachments.foreach(attachment => + remoteSourceDescriptorService.removeVaultFromCache(attachment))) } yield dataLayer.mags.length } yield removedEntriesList.sum } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala index 8ce7ef2ece2..939d95a70e4 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala @@ -12,7 +12,12 @@ import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array import com.scalableminds.webknossos.datastore.geometry.Vec3IntProto import com.scalableminds.webknossos.datastore.helpers.{NativeBucketScanner, NodeDefaults, SkeletonTracingDefaults} import com.scalableminds.webknossos.datastore.models.datasource.{DataSourceId, ElementClass} -import com.scalableminds.webknossos.datastore.storage.{AgglomerateFileKey, DataVaultService, RemoteSourceDescriptor} +import com.scalableminds.webknossos.datastore.storage.{ + AgglomerateFileKey, + DataVaultService, + RemoteSourceDescriptor, + RemoteSourceDescriptorService +} import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.Box.tryo import ucar.ma2.{Array => MultiArray} @@ -23,7 +28,7 @@ import scala.collection.compat.immutable.ArraySeq import scala.concurrent.ExecutionContext class ZarrAgglomerateService @Inject()(config: DataStoreConfig, - dataVaultService: DataVaultService, + remoteSourceDescriptorService: RemoteSourceDescriptorService, chunkCacheService: ChunkCacheService) extends DataConverter with LazyLogging { @@ -57,7 +62,7 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, private def openZarrArray(agglomerateFileKey: AgglomerateFileKey, zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext): Fox[DatasetArray] = for { - groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(agglomerateFileKey.attachment.path, None)) + groupVaultPath <- remoteSourceDescriptorService.vaultPathFor(agglomerateFileKey.attachment) segmentToAgglomeratePath = groupVaultPath / zarrArrayName zarrArray <- Zarr3Array.open(segmentToAgglomeratePath, DataSourceId("dummy", "unused"), diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala index 2c9a5f497e8..991a783c99a 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala @@ -26,9 +26,7 @@ case class FullMeshRequest( editableMappingTracingId: Option[String], mag: Option[Vec3Int], // required for ad-hoc meshing seedPosition: Option[Vec3Int], // required for ad-hoc meshing - additionalCoordinates: Option[Seq[AdditionalCoordinate]], - meshFilePath: Option[String], // required for remote neuroglancer precomputed mesh files - meshFileType: Option[String] + additionalCoordinates: Option[Seq[AdditionalCoordinate]] ) object FullMeshRequest { @@ -120,7 +118,6 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, } yield allVertices } - // TODO make sure this also works for the remote neuroglancer variant. if so, delete other implementation private def loadFullMeshFromMeshFile(organizationId: String, datasetDirectoryName: String, dataLayerName: String, @@ -144,42 +141,10 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, mappingNameForMeshFile, omitMissing = false ) + vertexQuantizationBits <- meshFileService.getVertexQuantizationBits(meshFileKey) chunkInfos: WebknossosSegmentInfo <- meshFileService.listMeshChunksForSegmentsMerged(meshFileKey, segmentIds) - allChunkRanges: List[MeshChunk] = chunkInfos.lods.head.chunks - transform = chunkInfos.lods.head.transform - stlEncodedChunks: Seq[Array[Byte]] <- Fox.serialCombined(allChunkRanges) { chunkRange: MeshChunk => - readMeshChunkAsStl(meshFileKey, chunkRange, transform) - } - stlOutput = combineEncodedChunksToStl(stlEncodedChunks) - _ = logMeshingDuration(before, "meshFile", stlOutput.length) - } yield stlOutput - - private def readMeshChunkAsStl(meshFileKey: MeshFileKey, chunkInfo: MeshChunk, transform: Array[Array[Double]])( - implicit ec: ExecutionContext, - tc: TokenContext): Fox[Array[Byte]] = - for { - (dracoMeshChunkBytes, encoding) <- meshFileService.readMeshChunk( - meshFileKey, - List(MeshChunkDataRequest(chunkInfo.byteOffset, chunkInfo.byteSize, None)) - ) ?~> "mesh.file.loadChunk.failed" - _ <- Fox.fromBool(encoding == "draco") ?~> s"mesh file encoding is $encoding, only draco is supported" - stlEncodedChunk <- getStlEncodedChunkFromDraco(chunkInfo, transform, dracoMeshChunkBytes) - } yield stlEncodedChunk - - /* - // TODO delete if above works also for neuroglancer - private def loadFullMeshFromRemoteNeuroglancerMeshFile(meshFileKey: MeshFileKey, fullMeshRequest: FullMeshRequest)( - implicit ec: ExecutionContext, - tc: TokenContext): Fox[Array[Byte]] = - for { - chunkInfos: WebknossosSegmentInfo <- neuroglancerPrecomputedMeshService.listMeshChunksForMultipleSegments( - meshFileKey, - List(fullMeshRequest.segmentId) - ) - _ <- Fox.fromBool(fullMeshRequest.mappingName.isEmpty) ?~> "Mapping is not supported for remote neuroglancer mesh files" selectedLod = fullMeshRequest.lod.getOrElse(0) allChunkRanges: List[MeshChunk] = chunkInfos.lods(selectedLod).chunks - meshFileName <- fullMeshRequest.meshFileName.toFox ?~> "mesh file name needed" // Right now only the scale is used, so we only need to supply these values lodTransform = chunkInfos.lods(selectedLod).transform transform = Array( @@ -187,42 +152,33 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, Array(0, lodTransform(1)(1), 0), Array(0, 0, lodTransform(2)(2)) ) - vertexQuantizationBits <- neuroglancerPrecomputedMeshService.getVertexQuantizationBits(meshFileKey) stlEncodedChunks: Seq[Array[Byte]] <- Fox.serialCombined(allChunkRanges) { chunkRange: MeshChunk => - readNeuroglancerPrecomputedMeshChunkAsStl( - meshFileKey, - chunkRange, - transform, - Some(fullMeshRequest.segmentId), - vertexQuantizationBits - ) + readMeshChunkAsStl(fullMeshRequest.segmentId, meshFileKey, chunkRange, transform, vertexQuantizationBits) } stlOutput = combineEncodedChunksToStl(stlEncodedChunks) + _ = logMeshingDuration(before, "meshFile", stlOutput.length) } yield stlOutput - private def readNeuroglancerPrecomputedMeshChunkAsStl( - meshFileKey: MeshFileKey, - chunkInfo: MeshChunk, - transform: Array[Array[Double]], - segmentId: Option[Long], - vertexQuantizationBits: Int)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Byte]] = + private def readMeshChunkAsStl( + segmentId: Long, // only used in neuroglancerPrecomputed case + meshFileKey: MeshFileKey, + chunkInfo: MeshChunk, + transform: Array[Array[Double]], + vertexQuantizationBits: Int)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Byte]] = for { (dracoMeshChunkBytes, encoding) <- meshFileService.readMeshChunk( meshFileKey, - Seq(MeshChunkDataRequest(chunkInfo.byteOffset, chunkInfo.byteSize, segmentId)) + List(MeshChunkDataRequest(chunkInfo.byteOffset, chunkInfo.byteSize, Some(segmentId))) ) ?~> "mesh.file.loadChunk.failed" _ <- Fox.fromBool(encoding == "draco") ?~> s"mesh file encoding is $encoding, only draco is supported" stlEncodedChunk <- getStlEncodedChunkFromDraco(chunkInfo, transform, dracoMeshChunkBytes, vertexQuantizationBits) } yield stlEncodedChunk - - */ - private def getStlEncodedChunkFromDraco( chunkInfo: MeshChunk, transform: Array[Array[Double]], dracoBytes: Array[Byte], - vertexQuantizationBits: Int = 0)(implicit ec: ExecutionContext): Fox[Array[Byte]] = + vertexQuantizationBits: Int)(implicit ec: ExecutionContext): Fox[Array[Byte]] = for { scale <- tryo(Vec3Double(transform(0)(0), transform(1)(1), transform(2)(2))).toFox ?~> "could not extract scale from mesh file transform attribute" stlEncodedChunk <- tryo( diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala index a712f532556..0b89435ac78 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala @@ -105,7 +105,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, registeredAttachmentNormalized.getOrElse( LayerAttachment( meshFileName, - new URI(dataBaseDir.resolve(dataLayer.name).resolve(meshesDir).toString), + dataBaseDir.resolve(dataLayer.name).resolve(meshesDir).toUri, LayerAttachmentDataformat.hdf5 ) ) @@ -167,6 +167,13 @@ class MeshFileService @Inject()(config: DataStoreConfig, Fox.successful(NeuroglancerMesh.meshInfoVersion) } + def getVertexQuantizationBits(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Int] = + meshFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.neuroglancerPrecomputed => + neuroglancerPrecomputedMeshService.getVertexQuantizationBits(meshFileKey) + case _ => Fox.successful(0) + } + def listMeshChunksForSegmentsMerged(meshFileKey: MeshFileKey, segmentIds: Seq[Long])( implicit ec: ExecutionContext, tc: TokenContext, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala index 6d58f7c0c09..e3b1599015e 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala @@ -8,7 +8,7 @@ import com.scalableminds.webknossos.datastore.datareaders.DatasetArray import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId import com.scalableminds.webknossos.datastore.services.{ChunkCacheService, Hdf5HashedArrayUtils} -import com.scalableminds.webknossos.datastore.storage.{DataVaultService, RemoteSourceDescriptor} +import com.scalableminds.webknossos.datastore.storage.{DataVaultService, RemoteSourceDescriptorService} import net.liftweb.common.Box.tryo import play.api.i18n.{Messages, MessagesProvider} import play.api.libs.json.{JsResult, JsValue, Reads} @@ -61,7 +61,8 @@ object MeshFileAttributes { } } -class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVaultService: DataVaultService) +class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, + remoteSourceDescriptorService: RemoteSourceDescriptorService) extends FoxImplicits with NeuroglancerMeshHelper { @@ -75,7 +76,7 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa private def readMeshFileAttributesImpl(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[MeshFileAttributes] = for { - groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(meshFileKey.attachment.path, None)) + groupVaultPath <- remoteSourceDescriptorService.vaultPathFor(meshFileKey.attachment) groupHeaderBytes <- (groupVaultPath / MeshFileAttributes.FILENAME_ZARR_JSON).readBytes() meshFileAttributes <- JsonHelper .parseAs[MeshFileAttributes](groupHeaderBytes) @@ -153,7 +154,7 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, dataVa private def openZarrArrayImpl(meshFileKey: MeshFileKey, zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext): Fox[DatasetArray] = for { - groupVaultPath <- dataVaultService.getVaultPath(RemoteSourceDescriptor(meshFileKey.attachment.path, None)) + groupVaultPath <- remoteSourceDescriptorService.vaultPathFor(meshFileKey.attachment) zarrArray <- Zarr3Array.open(groupVaultPath / zarrArrayName, DataSourceId("dummy", "unused"), "layer", diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/RemoteSourceDescriptorService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/RemoteSourceDescriptorService.scala index 84dda12083c..f4fc2141f08 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/RemoteSourceDescriptorService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/RemoteSourceDescriptorService.scala @@ -4,7 +4,7 @@ import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.dataformats.MagLocator import com.scalableminds.webknossos.datastore.datavault.VaultPath -import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId +import com.scalableminds.webknossos.datastore.models.datasource.{DataSourceId, LayerAttachment} import com.scalableminds.webknossos.datastore.services.DSRemoteWebknossosClient import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.Box @@ -33,8 +33,23 @@ class RemoteSourceDescriptorService @Inject()(dSRemoteWebknossosClient: DSRemote def removeVaultFromCache(baseDir: Path, datasetId: DataSourceId, layerName: String, magLocator: MagLocator)( implicit ec: ExecutionContext): Fox[Unit] = for { - remoteSource <- remoteSourceDescriptorFor(baseDir, datasetId, layerName, magLocator) - _ = dataVaultService.removeVaultFromCache(remoteSource) + remoteSourceDescriptor <- remoteSourceDescriptorFor(baseDir, datasetId, layerName, magLocator) + _ = dataVaultService.removeVaultFromCache(remoteSourceDescriptor) + } yield () + + // Note that attachment paths are already resolved with baseDir in local case so we don’t need to do it here. + def vaultPathFor(attachment: LayerAttachment)(implicit ec: ExecutionContext): Fox[VaultPath] = + for { + credentialBox <- credentialFor(attachment).shiftBox + remoteSourceDescriptor = RemoteSourceDescriptor(attachment.path, credentialBox.toOption) + vaultPath <- dataVaultService.getVaultPath(remoteSourceDescriptor) + } yield vaultPath + + def removeVaultFromCache(attachment: LayerAttachment)(implicit ec: ExecutionContext): Fox[Unit] = + for { + credentialBox <- credentialFor(attachment).shiftBox + remoteSourceDescriptor = RemoteSourceDescriptor(attachment.path, credentialBox.toOption) + _ = dataVaultService.removeVaultFromCache(remoteSourceDescriptor) } yield () private def remoteSourceDescriptorFor( @@ -111,8 +126,8 @@ class RemoteSourceDescriptorService @Inject()(dSRemoteWebknossosClient: DSRemote res } - private def findGlobalCredentialFor(magLocator: MagLocator)(implicit ec: ExecutionContext) = - magLocator.path match { + private def findGlobalCredentialFor(pathOpt: Option[String])(implicit ec: ExecutionContext) = + pathOpt match { case Some(magPath) => globalCredentials.find(c => magPath.startsWith(c.name)).toFox case None => Fox.empty } @@ -124,7 +139,15 @@ class RemoteSourceDescriptorService @Inject()(dSRemoteWebknossosClient: DSRemote case None => magLocator.credentials match { case Some(credential) => Fox.successful(credential) - case None => findGlobalCredentialFor(magLocator) + case None => findGlobalCredentialFor(magLocator.path) } } + + private def credentialFor(attachment: LayerAttachment)(implicit ec: ExecutionContext): Fox[DataVaultCredential] = + attachment.credentialId match { + case Some(credentialId) => + dSRemoteWebknossosClient.getCredential(credentialId) + case None => + findGlobalCredentialFor(Some(attachment.path.toString)) + } } From 7d676bf1ec4ac8e97f69018c739b4c1f46578d7c Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 18 Jun 2025 14:09:00 +0200 Subject: [PATCH 054/100] fix updating job status for jobs with no credit transactions --- .../CreditTransactionService.scala | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/app/models/organization/CreditTransactionService.scala b/app/models/organization/CreditTransactionService.scala index ee125dbcac8..7becd81e2ea 100644 --- a/app/models/organization/CreditTransactionService.scala +++ b/app/models/organization/CreditTransactionService.scala @@ -4,6 +4,7 @@ import com.scalableminds.util.accesscontext.DBAccessContext import com.scalableminds.util.objectid.ObjectId import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.typesafe.scalalogging.LazyLogging +import net.liftweb.common.{Failure, Full, Empty} import play.api.libs.json.{JsObject, Json} import javax.inject.Inject @@ -42,15 +43,30 @@ class CreditTransactionService @Inject()(creditTransactionDAO: CreditTransaction def completeTransactionOfJob(jobId: ObjectId)(implicit ctx: DBAccessContext): Fox[Unit] = for { - transaction <- creditTransactionDAO.findTransactionForJob(jobId) - _ <- organizationService.assertOrganizationHasPaidPlan(transaction._organization) - _ <- creditTransactionDAO.commitTransaction(transaction._id) + transactionBox <- creditTransactionDAO.findTransactionForJob(jobId).shiftBox + _ <- transactionBox match { + case Full(transaction) => + for { + _ <- organizationService.assertOrganizationHasPaidPlan(transaction._organization) + _ <- creditTransactionDAO.commitTransaction(transaction._id) + } yield () + case Empty => Fox.successful(()) // Assume transaction-less Job + case f: Failure => f.toFox + } + } yield () def refundTransactionForJob(jobId: ObjectId)(implicit ctx: DBAccessContext): Fox[Unit] = for { - transaction <- creditTransactionDAO.findTransactionForJob(jobId) - _ <- refundTransaction(transaction) + transactionBox <- creditTransactionDAO.findTransactionForJob(jobId).shiftBox + _ <- transactionBox match { + case Full(transaction) => + for { + _ <- refundTransaction(transaction) + } yield () + case Empty => Fox.successful(()) // Assume transaction-less Job + case f: Failure => f.toFox + } } yield () private def refundTransaction(creditTransaction: CreditTransaction)(implicit ctx: DBAccessContext): Fox[Unit] = From 76cd9d656150bbe294f257c300a3021d591ce5dd Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 18 Jun 2025 14:27:56 +0200 Subject: [PATCH 055/100] fix adhocMag selection in create animation modal --- .../viewer/view/action-bar/create_animation_modal.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/javascripts/viewer/view/action-bar/create_animation_modal.tsx b/frontend/javascripts/viewer/view/action-bar/create_animation_modal.tsx index 66fd9312feb..53461f39c2a 100644 --- a/frontend/javascripts/viewer/view/action-bar/create_animation_modal.tsx +++ b/frontend/javascripts/viewer/view/action-bar/create_animation_modal.tsx @@ -258,7 +258,7 @@ function CreateAnimationModal(props: Props) { const adhocMagIndex = getMagInfo(layer.resolutions).getClosestExistingIndex( preferredQualityForMeshAdHocComputation, ); - const adhocMag = layer.resolutions[adhocMagIndex]; + const adhocMag = getMagInfo(layer.resolutions).getMagByIndex(adhocMagIndex); return Object.values(meshInfos) .filter((meshInfo: MeshInformation) => meshInfo.isVisible) From 09567ebc7c8f9d720caf13fa59a63598fdefde09 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 18 Jun 2025 14:38:42 +0200 Subject: [PATCH 056/100] make typechecker happy --- .../viewer/view/action-bar/create_animation_modal.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/javascripts/viewer/view/action-bar/create_animation_modal.tsx b/frontend/javascripts/viewer/view/action-bar/create_animation_modal.tsx index 53461f39c2a..759aa501221 100644 --- a/frontend/javascripts/viewer/view/action-bar/create_animation_modal.tsx +++ b/frontend/javascripts/viewer/view/action-bar/create_animation_modal.tsx @@ -258,7 +258,7 @@ function CreateAnimationModal(props: Props) { const adhocMagIndex = getMagInfo(layer.resolutions).getClosestExistingIndex( preferredQualityForMeshAdHocComputation, ); - const adhocMag = getMagInfo(layer.resolutions).getMagByIndex(adhocMagIndex); + const adhocMag = getMagInfo(layer.resolutions).getMagByIndexOrThrow(adhocMagIndex); return Object.values(meshInfos) .filter((meshInfo: MeshInformation) => meshInfo.isVisible) From e170ebe19e5af83de25a4348c9fbe02ac44dd9ba Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 18 Jun 2025 15:03:02 +0200 Subject: [PATCH 057/100] pr feedback; add services as singletons for proper cache use --- conf/messages | 1 + .../datastore/DataStoreModule.scala | 3 ++ .../controllers/DataSourceController.scala | 2 +- .../datastore/datareaders/DatasetArray.scala | 10 ++-- .../datareaders/zarr3/Zarr3Array.scala | 2 +- .../services/AgglomerateService.scala | 53 ++++++++++++------- .../services/ZarrAgglomerateService.scala | 2 +- 7 files changed, 46 insertions(+), 27 deletions(-) diff --git a/conf/messages b/conf/messages index b7aab81dbd6..60bbba21dbc 100644 --- a/conf/messages +++ b/conf/messages @@ -149,6 +149,7 @@ zarr.invalidFirstChunkCoord=First Channel must be 0 zarr.chunkNotFound=Could not find the requested chunk zarr.notEnoughCoordinates=Invalid number of chunk coordinates. Expected to get at least 3 dimensions and channel 0. zarr.invalidAdditionalCoordinates=Invalid additional coordinates for this data layer. +zarr.readShardIndex.failed=Failed to read shard information for zarr data. This may indicate missing data. nml.file.uploadSuccess=Successfully uploaded file nml.file.notFound=Could not extract NML file diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala index 2c5f1c63239..76472ba638c 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala @@ -26,6 +26,9 @@ class DataStoreModule extends AbstractModule { bind(classOf[DataVaultService]).asEagerSingleton() bind(classOf[DSRemoteWebknossosClient]).asEagerSingleton() bind(classOf[BinaryDataServiceHolder]).asEagerSingleton() + bind(classOf[ZarrAgglomerateService]).asEagerSingleton() + bind(classOf[Hdf5AgglomerateService]).asEagerSingleton() + bind(classOf[AgglomerateService]).asEagerSingleton() bind(classOf[MappingService]).asEagerSingleton() bind(classOf[AdHocMeshServiceHolder]).asEagerSingleton() bind(classOf[ApplicationHealthService]).asEagerSingleton() diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 342b53e5836..50bd985e2cd 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -265,7 +265,7 @@ class DataSourceController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - agglomerateList = agglomerateService.exploreAgglomerates(organizationId, datasetDirectoryName, dataLayer) + agglomerateList = agglomerateService.listAgglomerates(dataSource.id, dataLayer) } yield Ok(Json.toJson(agglomerateList)) } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala index 2a3a33b2b17..9bb779dc2a9 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala @@ -71,7 +71,7 @@ class DatasetArray(vaultPath: VaultPath, additionalCoordinatesOpt: Option[Seq[AdditionalCoordinate]], shouldReadUint24: Boolean)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Byte]] = for { - (offsetArray, shapeArray) <- tryo(constructShapeAndOffsetArrays( + (offsetArray, shapeArray) <- tryo(constructOffsetAndShapeArrays( offsetXYZ, shapeXYZ, additionalCoordinatesOpt, @@ -79,7 +79,7 @@ class DatasetArray(vaultPath: VaultPath, bytes <- readBytes(offsetArray, shapeArray) } yield bytes - private def constructShapeAndOffsetArrays(offsetXYZ: Vec3Int, + private def constructOffsetAndShapeArrays(offsetXYZ: Vec3Int, shapeXYZ: Vec3Int, additionalCoordinatesOpt: Option[Seq[AdditionalCoordinate]], shouldReadUint24: Boolean): (Array[Int], Array[Int]) = { @@ -168,7 +168,7 @@ class DatasetArray(vaultPath: VaultPath, } else { val targetBuffer = MultiArrayUtils.createDataBuffer(header.resolvedDataType, shape) val targetMultiArray = MultiArrayUtils.createArrayWithGivenStorage(targetBuffer, shape.reverse) - val copiedFuture = Fox.combined(chunkIndices.map { chunkIndex: Array[Int] => + val copiedFox = Fox.combined(chunkIndices.map { chunkIndex: Array[Int] => for { sourceChunk: MultiArray <- getSourceChunkDataWithCache(fullAxisOrder.permuteIndicesWkToArray(chunkIndex)) sourceChunkInWkFOrder: MultiArray = MultiArrayUtils @@ -181,7 +181,7 @@ class DatasetArray(vaultPath: VaultPath, } yield () }) for { - _ <- copiedFuture + _ <- copiedFox } yield targetMultiArray } } @@ -294,7 +294,7 @@ class DatasetArray(vaultPath: VaultPath, util.Arrays.equals(bufferShape, chunkShape) private def isZeroOffset(offset: Array[Int]): Boolean = - util.Arrays.equals(offset, new Array[Int](offset.length)) + offset.forall(_ == 0) private def computeOffsetInChunk(chunkIndex: Array[Int], globalOffset: Array[Int]): Array[Int] = chunkIndex.indices.map { dim => diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala index bef05eaa774..4fe00316dc6 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala @@ -120,7 +120,7 @@ class Zarr3Array(vaultPath: VaultPath, private def readAndParseShardIndex(shardPath: VaultPath)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[(Long, Long)]] = for { - shardIndexRaw <- readShardIndex(shardPath) ?~> "readShardIndex.failed" + shardIndexRaw <- readShardIndex(shardPath) ?~> "zarr.readShardIndex.failed" parsed = parseShardIndex(shardIndexRaw) } yield parsed diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 6fc7a73a7c7..4831cb2b705 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -22,7 +22,6 @@ import net.liftweb.common.Box import net.liftweb.common.Box.tryo import org.apache.commons.io.FilenameUtils -import java.net.URI import java.nio.file.Paths import scala.concurrent.ExecutionContext import scala.concurrent.duration.DurationInt @@ -34,20 +33,21 @@ class AgglomerateService(config: DataStoreConfig, extends LazyLogging with FoxImplicits { private val agglomerateDir = "agglomerates" - private val agglomerateFileExtension = "hdf5" + private val hdf5AgglomerateFileExtension = "hdf5" private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) - private val agglomerateKeyCache + private val agglomerateFileKeyCache : AlfuCache[(DataSourceId, String, String), AgglomerateFileKey] = AlfuCache() // dataSourceId, layerName, mappingName → AgglomerateFileKey - def exploreAgglomerates(organizationId: String, datasetDirectoryName: String, dataLayer: DataLayer): Set[String] = { + def listAgglomerates(dataSourceId: DataSourceId, dataLayer: DataLayer): Set[String] = { val attachedAgglomerates = dataLayer.attachments.map(_.agglomerates).getOrElse(Seq.empty).map(_.name).toSet - val layerDir = dataBaseDir.resolve(organizationId).resolve(datasetDirectoryName).resolve(dataLayer.name) + val layerDir = + dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName).resolve(dataLayer.name) val exploredAgglomerates = PathUtils .listFiles(layerDir.resolve(agglomerateDir), silent = true, - PathUtils.fileExtensionFilter(agglomerateFileExtension)) + PathUtils.fileExtensionFilter(hdf5AgglomerateFileExtension)) .map { paths => paths.map(path => FilenameUtils.removeExtension(path.getFileName.toString)) } @@ -59,7 +59,7 @@ class AgglomerateService(config: DataStoreConfig, } def clearCaches(dataSourceId: DataSourceId, layerName: Option[String]): Int = { - agglomerateKeyCache.clear { + agglomerateFileKeyCache.clear { case (keyDataSourceId, keyLayerName, _) => dataSourceId == keyDataSourceId && layerName.forall(_ == keyLayerName) } @@ -77,8 +77,8 @@ class AgglomerateService(config: DataStoreConfig, def lookUpAgglomerateFile(dataSourceId: DataSourceId, dataLayer: DataLayer, mappingName: String)( implicit ec: ExecutionContext): Fox[AgglomerateFileKey] = - agglomerateKeyCache.getOrLoad((dataSourceId, dataLayer.name, mappingName), - _ => lookUpAgglomerateFileImpl(dataSourceId, dataLayer, mappingName).toFox) + agglomerateFileKeyCache.getOrLoad((dataSourceId, dataLayer.name, mappingName), + _ => lookUpAgglomerateFileImpl(dataSourceId, dataLayer, mappingName).toFox) private def lookUpAgglomerateFileImpl(dataSourceId: DataSourceId, dataLayer: DataLayer, @@ -87,12 +87,12 @@ class AgglomerateService(config: DataStoreConfig, case Some(attachments) => attachments.agglomerates.find(_.name == mappingName) case None => None } - val localDatsetDir = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) + val localDatasetDir = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) for { registeredAttachmentNormalized <- tryo(registeredAttachment.map { attachment => attachment.copy( path = - remoteSourceDescriptorService.uriFromPathLiteral(attachment.path.toString, localDatsetDir, dataLayer.name)) + remoteSourceDescriptorService.uriFromPathLiteral(attachment.path.toString, localDatasetDir, dataLayer.name)) }) } yield AgglomerateFileKey( @@ -101,7 +101,7 @@ class AgglomerateService(config: DataStoreConfig, registeredAttachmentNormalized.getOrElse( LayerAttachment( mappingName, - new URI(dataBaseDir.resolve(dataLayer.name).resolve(agglomerateDir).toString), + dataBaseDir.resolve(dataLayer.name).resolve(agglomerateDir).toUri, LayerAttachmentDataformat.hdf5 ) ) @@ -117,7 +117,9 @@ class AgglomerateService(config: DataStoreConfig, data <- agglomerateFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrAgglomerateService.applyAgglomerate(agglomerateFileKey, elementClass)(data) - case _ => hdf5AgglomerateService.applyAgglomerate(agglomerateFileKey, request)(data).toFox + case LayerAttachmentDataformat.hdf5 => + hdf5AgglomerateService.applyAgglomerate(agglomerateFileKey, request)(data).toFox + case _ => unsupportedDataFormat(agglomerateFileKey) } } yield data @@ -128,7 +130,9 @@ class AgglomerateService(config: DataStoreConfig, skeleton <- agglomerateFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrAgglomerateService.generateSkeleton(agglomerateFileKey, agglomerateId) - case _ => hdf5AgglomerateService.generateSkeleton(agglomerateFileKey, agglomerateId).toFox + case LayerAttachmentDataformat.hdf5 => + hdf5AgglomerateService.generateSkeleton(agglomerateFileKey, agglomerateId).toFox + case _ => unsupportedDataFormat(agglomerateFileKey) } _ = if (Instant.since(before) > (100 milliseconds)) { Instant.logSince( @@ -145,7 +149,8 @@ class AgglomerateService(config: DataStoreConfig, tc: TokenContext): Fox[Long] = agglomerateFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrAgglomerateService.largestAgglomerateId(agglomerateFileKey) - case _ => hdf5AgglomerateService.largestAgglomerateId(agglomerateFileKey).toFox + case LayerAttachmentDataformat.hdf5 => hdf5AgglomerateService.largestAgglomerateId(agglomerateFileKey).toFox + case _ => unsupportedDataFormat(agglomerateFileKey) } def segmentIdsForAgglomerateId(agglomerateFileKey: AgglomerateFileKey, @@ -153,7 +158,9 @@ class AgglomerateService(config: DataStoreConfig, agglomerateFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrAgglomerateService.segmentIdsForAgglomerateId(agglomerateFileKey, agglomerateId) - case _ => hdf5AgglomerateService.segmentIdsForAgglomerateId(agglomerateFileKey, agglomerateId).toFox + case LayerAttachmentDataformat.hdf5 => + hdf5AgglomerateService.segmentIdsForAgglomerateId(agglomerateFileKey, agglomerateId).toFox + case _ => unsupportedDataFormat(agglomerateFileKey) } def agglomerateIdsForSegmentIds(agglomerateFileKey: AgglomerateFileKey, segmentIds: Seq[Long])( @@ -162,7 +169,9 @@ class AgglomerateService(config: DataStoreConfig, agglomerateFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrAgglomerateService.agglomerateIdsForSegmentIds(agglomerateFileKey, segmentIds) - case _ => hdf5AgglomerateService.agglomerateIdsForSegmentIds(agglomerateFileKey, segmentIds).toFox + case LayerAttachmentDataformat.hdf5 => + hdf5AgglomerateService.agglomerateIdsForSegmentIds(agglomerateFileKey, segmentIds).toFox + case _ => unsupportedDataFormat(agglomerateFileKey) } def positionForSegmentId(agglomerateFileKey: AgglomerateFileKey, segmentId: Long)(implicit ec: ExecutionContext, @@ -170,7 +179,9 @@ class AgglomerateService(config: DataStoreConfig, agglomerateFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrAgglomerateService.positionForSegmentId(agglomerateFileKey, segmentId) - case _ => hdf5AgglomerateService.positionForSegmentId(agglomerateFileKey, segmentId).toFox + case LayerAttachmentDataformat.hdf5 => + hdf5AgglomerateService.positionForSegmentId(agglomerateFileKey, segmentId).toFox + case _ => unsupportedDataFormat(agglomerateFileKey) } def generateAgglomerateGraph(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long)( @@ -179,8 +190,12 @@ class AgglomerateService(config: DataStoreConfig, agglomerateFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrAgglomerateService.generateAgglomerateGraph(agglomerateFileKey, agglomerateId) - case _ => + case LayerAttachmentDataformat.hdf5 => hdf5AgglomerateService.generateAgglomerateGraph(agglomerateFileKey, agglomerateId).toFox + case _ => unsupportedDataFormat(agglomerateFileKey) } + private def unsupportedDataFormat(agglomerateFileKey: AgglomerateFileKey)(implicit ec: ExecutionContext) = + Fox.failure( + s"Trying to load agglomerate file with unsupported data format ${agglomerateFileKey.attachment.dataFormat}") } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala index 868c1ca70d6..8c3ebe93b3d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala @@ -266,7 +266,7 @@ class ZarrAgglomerateService(config: DataStoreConfig, segmentId, agglomerateToSegments) agglomerateToPositions <- openZarrArrayCached(agglomerateFileKey, keyAgglomerateToPositions) - position <- agglomerateToPositions.readAsMultiArray(offset = Array(segmentIndex, 0), shape = Array(3, 1)) + position <- agglomerateToPositions.readAsMultiArray(offset = Array(segmentIndex, 0), shape = Array(1, 3)) } yield Vec3Int(position.getInt(0), position.getInt(1), position.getInt(2)) private def binarySearchForSegment( From 1360184b21f4f4fead6c15d4224f4599e9fc0b3f Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 18 Jun 2025 15:13:20 +0200 Subject: [PATCH 058/100] address coderabbit review suggestions --- .../webknossos/datastore/datareaders/DatasetArray.scala | 2 +- .../webknossos/datastore/services/AgglomerateService.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala index 9bb779dc2a9..bccdf6449df 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala @@ -191,7 +191,7 @@ class DatasetArray(vaultPath: VaultPath, def readAsMultiArray(offset: Array[Long], shape: Array[Int])(implicit ec: ExecutionContext, tc: TokenContext): Fox[MultiArray] = - if (shape.product == 0) { + if (shape.contains(0)) { Fox.successful(MultiArrayUtils.createEmpty(rank)) } else { val totalOffset: Array[Long] = offset.zip(header.voxelOffset).map { case (o, v) => o - v }.padTo(offset.length, 0) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 4831cb2b705..da53804b61d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -101,7 +101,7 @@ class AgglomerateService(config: DataStoreConfig, registeredAttachmentNormalized.getOrElse( LayerAttachment( mappingName, - dataBaseDir.resolve(dataLayer.name).resolve(agglomerateDir).toUri, + localDatasetDir.resolve(dataLayer.name).resolve(agglomerateDir).toUri, LayerAttachmentDataformat.hdf5 ) ) From 72d5c5bf09323b5534bab5293fe134c8d9ee1378 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 18 Jun 2025 15:14:01 +0200 Subject: [PATCH 059/100] typo --- .../webknossos/datastore/services/AgglomerateService.scala | 2 +- .../datastore/services/mesh/MeshFileService.scala | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index 9b247cb2bb3..673fd6dde51 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -103,7 +103,7 @@ class AgglomerateService @Inject()(config: DataStoreConfig, registeredAttachmentNormalized.getOrElse( LayerAttachment( mappingName, - dataBaseDir.resolve(dataLayer.name).resolve(agglomerateDir).toUri, + localDatasetDir.resolve(dataLayer.name).resolve(agglomerateDir).toUri, LayerAttachmentDataformat.hdf5 ) ) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala index 0b89435ac78..508abbc2a5e 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala @@ -19,7 +19,6 @@ import org.apache.commons.io.FilenameUtils import play.api.i18n.{Messages, MessagesProvider} import play.api.libs.json.{Json, OFormat} -import java.net.URI import java.nio.file.Paths import javax.inject.Inject import scala.concurrent.ExecutionContext @@ -91,12 +90,12 @@ class MeshFileService @Inject()(config: DataStoreConfig, case Some(attachments) => attachments.meshes.find(_.name == meshFileName) case None => None } - val localDatsetDir = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) + val localDatasetDir = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) for { registeredAttachmentNormalized <- tryo(registeredAttachment.map { attachment => attachment.copy( path = - remoteSourceDescriptorService.uriFromPathLiteral(attachment.path.toString, localDatsetDir, dataLayer.name)) + remoteSourceDescriptorService.uriFromPathLiteral(attachment.path.toString, localDatasetDir, dataLayer.name)) }) } yield MeshFileKey( @@ -105,7 +104,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, registeredAttachmentNormalized.getOrElse( LayerAttachment( meshFileName, - dataBaseDir.resolve(dataLayer.name).resolve(meshesDir).toUri, + localDatasetDir.resolve(dataLayer.name).resolve(meshesDir).toUri, LayerAttachmentDataformat.hdf5 ) ) From 0c961ebc98b0b65ac52825021ce6cc1f4d4a030b Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 19 Jun 2025 08:29:16 +0200 Subject: [PATCH 060/100] Fix dtype bug, remove singleton instantiations again --- .../webknossos/datastore/DataStoreModule.scala | 3 --- .../datastore/datareaders/DatasetArray.scala | 2 +- .../datastore/datareaders/MultiArrayUtils.scala | 14 +++++++++++--- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala index 76472ba638c..2c5f1c63239 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala @@ -26,9 +26,6 @@ class DataStoreModule extends AbstractModule { bind(classOf[DataVaultService]).asEagerSingleton() bind(classOf[DSRemoteWebknossosClient]).asEagerSingleton() bind(classOf[BinaryDataServiceHolder]).asEagerSingleton() - bind(classOf[ZarrAgglomerateService]).asEagerSingleton() - bind(classOf[Hdf5AgglomerateService]).asEagerSingleton() - bind(classOf[AgglomerateService]).asEagerSingleton() bind(classOf[MappingService]).asEagerSingleton() bind(classOf[AdHocMeshServiceHolder]).asEagerSingleton() bind(classOf[ApplicationHealthService]).asEagerSingleton() diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala index bccdf6449df..1a2b37e1ef8 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala @@ -192,7 +192,7 @@ class DatasetArray(vaultPath: VaultPath, def readAsMultiArray(offset: Array[Long], shape: Array[Int])(implicit ec: ExecutionContext, tc: TokenContext): Fox[MultiArray] = if (shape.contains(0)) { - Fox.successful(MultiArrayUtils.createEmpty(rank)) + Fox.successful(MultiArrayUtils.createEmpty(header.resolvedDataType, rank)) } else { val totalOffset: Array[Long] = offset.zip(header.voxelOffset).map { case (o, v) => o - v }.padTo(offset.length, 0) val chunkIndices = ChunkUtils.computeChunkIndices(datasetShape, chunkShape, shape, totalOffset) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala index a5820990e28..d845a6d7270 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/MultiArrayUtils.scala @@ -45,9 +45,17 @@ object MultiArrayUtils extends LazyLogging { } } - def createEmpty(rank: Int): MultiArray = - // The data type of the empty MultiArray doesn’t matter, since we’re not accessing any actual elements. - MultiArray.factory(MADataType.INT, Array.fill(rank)(0)) + def createEmpty(dataType: ArrayDataType, rank: Int): MultiArray = { + val datyTypeMA = dataType match { + case ArrayDataType.i1 | ArrayDataType.u1 => MADataType.BYTE + case ArrayDataType.i2 | ArrayDataType.u2 => MADataType.SHORT + case ArrayDataType.i4 | ArrayDataType.u4 => MADataType.INT + case ArrayDataType.i8 | ArrayDataType.u8 => MADataType.LONG + case ArrayDataType.f4 => MADataType.FLOAT + case ArrayDataType.f8 => MADataType.DOUBLE + } + MultiArray.factory(datyTypeMA, Array.fill(rank)(0)) + } def toLongArray(multiArray: MultiArray): Box[Array[Long]] = multiArray.getDataType match { From 365ec5dd8805d1d7a32f9a8dc11a4893c298696d Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 19 Jun 2025 08:31:37 +0200 Subject: [PATCH 061/100] rename lookup function as suggested in pr review --- .../datastore/controllers/DataSourceController.scala | 10 +++++----- .../datastore/services/AgglomerateService.scala | 4 ++-- .../datastore/services/SegmentIndexFileService.scala | 2 +- .../datastore/services/mesh/MeshMappingHelper.scala | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 50bd985e2cd..c794a7ff253 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -284,7 +284,7 @@ class DataSourceController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - agglomerateFileKey <- agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + agglomerateFileKey <- agglomerateService.lookUpAgglomerateFileKey(dataSource.id, dataLayer, mappingName) skeleton <- agglomerateService .generateSkeleton(agglomerateFileKey, agglomerateId) ?~> "agglomerateSkeleton.failed" } yield Ok(skeleton.toByteArray).as(protobufMimeType) @@ -305,7 +305,7 @@ class DataSourceController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - agglomerateFileKey <- agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + agglomerateFileKey <- agglomerateService.lookUpAgglomerateFileKey(dataSource.id, dataLayer, mappingName) agglomerateGraph <- agglomerateService .generateAgglomerateGraph(agglomerateFileKey, agglomerateId) ?~> "agglomerateGraph.failed" } yield Ok(agglomerateGraph.toByteArray).as(protobufMimeType) @@ -326,7 +326,7 @@ class DataSourceController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - agglomerateFileKey <- agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + agglomerateFileKey <- agglomerateService.lookUpAgglomerateFileKey(dataSource.id, dataLayer, mappingName) position <- agglomerateService .positionForSegmentId(agglomerateFileKey, segmentId) ?~> "getSegmentPositionFromAgglomerateFile.failed" } yield Ok(Json.toJson(position)) @@ -346,7 +346,7 @@ class DataSourceController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - agglomerateFileKey <- agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + agglomerateFileKey <- agglomerateService.lookUpAgglomerateFileKey(dataSource.id, dataLayer, mappingName) largestAgglomerateId: Long <- agglomerateService.largestAgglomerateId(agglomerateFileKey) } yield Ok(Json.toJson(largestAgglomerateId)) } @@ -365,7 +365,7 @@ class DataSourceController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - agglomerateFileKey <- agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + agglomerateFileKey <- agglomerateService.lookUpAgglomerateFileKey(dataSource.id, dataLayer, mappingName) agglomerateIds: Seq[Long] <- agglomerateService.agglomerateIdsForSegmentIds( agglomerateFileKey, request.body.items diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index da53804b61d..4238e4816a4 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -75,7 +75,7 @@ class AgglomerateService(config: DataStoreConfig, clearedHdf5Count + clearedZarrCount } - def lookUpAgglomerateFile(dataSourceId: DataSourceId, dataLayer: DataLayer, mappingName: String)( + def lookUpAgglomerateFileKey(dataSourceId: DataSourceId, dataLayer: DataLayer, mappingName: String)( implicit ec: ExecutionContext): Fox[AgglomerateFileKey] = agglomerateFileKeyCache.getOrLoad((dataSourceId, dataLayer.name, mappingName), _ => lookUpAgglomerateFileImpl(dataSourceId, dataLayer, mappingName).toFox) @@ -113,7 +113,7 @@ class AgglomerateService(config: DataStoreConfig, for { mappingName <- request.settings.appliedAgglomerate.toFox elementClass = request.dataLayer.elementClass - agglomerateFileKey <- lookUpAgglomerateFile(request.dataSourceIdOrVolumeDummy, request.dataLayer, mappingName) + agglomerateFileKey <- lookUpAgglomerateFileKey(request.dataSourceIdOrVolumeDummy, request.dataLayer, mappingName) data <- agglomerateFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrAgglomerateService.applyAgglomerate(agglomerateFileKey, elementClass)(data) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala index fde4e56a8b8..52c9d19c6b8 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala @@ -227,7 +227,7 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, datasetDirectoryName, dataLayerName) agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - agglomerateFileKey <- agglomerateService.lookUpAgglomerateFile(dataSource.id, dataLayer, mappingName) + agglomerateFileKey <- agglomerateService.lookUpAgglomerateFileKey(dataSource.id, dataLayer, mappingName) largestAgglomerateId <- agglomerateService.largestAgglomerateId(agglomerateFileKey) segmentIds <- if (segmentOrAgglomerateId <= largestAgglomerateId) { agglomerateService.segmentIdsForAgglomerateId( diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala index 6bb64a978b0..fee4a7209ab 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshMappingHelper.scala @@ -39,7 +39,7 @@ trait MeshMappingHelper extends FoxImplicits { // assume agglomerate id, fetch oversegmentation segment ids for it for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - agglomerateFileKey <- agglomerateService.lookUpAgglomerateFile(dataSourceId, dataLayer, mappingName) + agglomerateFileKey <- agglomerateService.lookUpAgglomerateFileKey(dataSourceId, dataLayer, mappingName) segmentIdsBox <- agglomerateService.segmentIdsForAgglomerateId(agglomerateFileKey, agglomerateId).shiftBox segmentIds <- segmentIdsBox match { case Full(segmentIds) => Fox.successful(segmentIds) @@ -59,7 +59,7 @@ trait MeshMappingHelper extends FoxImplicits { else // the agglomerate id is not present in the editable mapping. Fetch its info from the base mapping. for { agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - agglomerateFileKey <- agglomerateService.lookUpAgglomerateFile(dataSourceId, dataLayer, mappingName) + agglomerateFileKey <- agglomerateService.lookUpAgglomerateFileKey(dataSourceId, dataLayer, mappingName) localSegmentIds <- agglomerateService.segmentIdsForAgglomerateId(agglomerateFileKey, agglomerateId) } yield localSegmentIds } yield segmentIds From be77185d189dfcd702d4d8755bd94a843ebcae7a Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 19 Jun 2025 10:52:14 +0200 Subject: [PATCH 062/100] add ucar dependency resolver --- project/DependencyResolvers.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/project/DependencyResolvers.scala b/project/DependencyResolvers.scala index c072dc60268..d40113f1949 100644 --- a/project/DependencyResolvers.scala +++ b/project/DependencyResolvers.scala @@ -3,6 +3,7 @@ import sbt._ object DependencyResolvers { val atlassian = "Atlassian Releases" at "https://maven.atlassian.com/public/" val sciJava = "SciJava Public" at "https://maven.scijava.org/content/repositories/public/" + val ucar = "UCAR" at "https://artifacts.unidata.ucar.edu/content/repositories/unidata-releases/" val senbox = "Senbox (for Zarr)" at "https://nexus.senbox.net/nexus/content/groups/public/" val dependencyResolvers: Seq[MavenRepository] = @@ -11,6 +12,7 @@ object DependencyResolvers { Seq( Resolver.typesafeRepo("releases"), sciJava, + ucar, atlassian, senbox ) From 5920905a9a7f089bdf5dd92a761f253b3c2a2a2a Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 19 Jun 2025 11:01:35 +0200 Subject: [PATCH 063/100] remove sciJava resolver --- project/DependencyResolvers.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/DependencyResolvers.scala b/project/DependencyResolvers.scala index d40113f1949..6cc57cf3278 100644 --- a/project/DependencyResolvers.scala +++ b/project/DependencyResolvers.scala @@ -11,7 +11,7 @@ object DependencyResolvers { Resolver.sonatypeOssRepos("snapshots") ++ Seq( Resolver.typesafeRepo("releases"), - sciJava, + // sciJava, ucar, atlassian, senbox From 753c25b53fc1e6fec8a525d2b5f13032cd3e25f8 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 19 Jun 2025 11:04:56 +0200 Subject: [PATCH 064/100] Revert "remove sciJava resolver" This reverts commit 5920905a9a7f089bdf5dd92a761f253b3c2a2a2a. --- project/DependencyResolvers.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/DependencyResolvers.scala b/project/DependencyResolvers.scala index 6cc57cf3278..d40113f1949 100644 --- a/project/DependencyResolvers.scala +++ b/project/DependencyResolvers.scala @@ -11,7 +11,7 @@ object DependencyResolvers { Resolver.sonatypeOssRepos("snapshots") ++ Seq( Resolver.typesafeRepo("releases"), - // sciJava, + sciJava, ucar, atlassian, senbox From 11d26118ee51a990638dfa897b81a6e120ef4570 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 19 Jun 2025 11:05:02 +0200 Subject: [PATCH 065/100] Revert "add ucar dependency resolver" This reverts commit be77185d189dfcd702d4d8755bd94a843ebcae7a. --- project/DependencyResolvers.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/project/DependencyResolvers.scala b/project/DependencyResolvers.scala index d40113f1949..c072dc60268 100644 --- a/project/DependencyResolvers.scala +++ b/project/DependencyResolvers.scala @@ -3,7 +3,6 @@ import sbt._ object DependencyResolvers { val atlassian = "Atlassian Releases" at "https://maven.atlassian.com/public/" val sciJava = "SciJava Public" at "https://maven.scijava.org/content/repositories/public/" - val ucar = "UCAR" at "https://artifacts.unidata.ucar.edu/content/repositories/unidata-releases/" val senbox = "Senbox (for Zarr)" at "https://nexus.senbox.net/nexus/content/groups/public/" val dependencyResolvers: Seq[MavenRepository] = @@ -12,7 +11,6 @@ object DependencyResolvers { Seq( Resolver.typesafeRepo("releases"), sciJava, - ucar, atlassian, senbox ) From 0fddb3deb38cde7a474fc3c6b5c3f129f5376511 Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 23 Jun 2025 09:42:19 +0200 Subject: [PATCH 066/100] unify function names --- .../datastore/controllers/DSMeshController.scala | 4 ++-- .../datastore/services/mesh/DSFullMeshService.scala | 2 +- .../datastore/services/mesh/MeshFileService.scala | 7 ++++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala index fb2eeba0081..272c7cabacb 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSMeshController.scala @@ -61,7 +61,7 @@ class DSMeshController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - meshFileKey <- meshFileService.lookUpMeshFile(dataSource.id, dataLayer, request.body.meshFileName) + meshFileKey <- meshFileService.lookUpMeshFileKey(dataSource.id, dataLayer, request.body.meshFileName) mappingNameForMeshFile <- meshFileService.mappingNameForMeshFile(meshFileKey) segmentIds: Seq[Long] <- segmentIdsForAgglomerateIdIfNeeded( dataSource.id, @@ -87,7 +87,7 @@ class DSMeshController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - meshFileKey <- meshFileService.lookUpMeshFile(dataSource.id, dataLayer, request.body.meshFileName) + meshFileKey <- meshFileService.lookUpMeshFileKey(dataSource.id, dataLayer, request.body.meshFileName) (data, encoding) <- meshFileService.readMeshChunk(meshFileKey, request.body.requests) ?~> "mesh.file.loadChunk.failed" } yield { if (encoding.contains("gzip")) { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala index 991a783c99a..bba11a88e97 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala @@ -130,7 +130,7 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, datasetDirectoryName, dataLayerName) meshFileName <- fullMeshRequest.meshFileName.toFox ?~> "meshFileName.required" - meshFileKey <- meshFileService.lookUpMeshFile(dataSource.id, dataLayer, meshFileName) + meshFileKey <- meshFileService.lookUpMeshFileKey(dataSource.id, dataLayer, meshFileName) mappingNameForMeshFile <- meshFileService.mappingNameForMeshFile(meshFileKey) segmentIds <- segmentIdsForAgglomerateIdIfNeeded( dataSource.id, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala index 508abbc2a5e..8308e3c1899 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala @@ -78,7 +78,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, private val meshFileKeyCache : AlfuCache[(DataSourceId, String, String), MeshFileKey] = AlfuCache() // dataSourceId, layerName, mappingName → MeshFileKey - def lookUpMeshFile(dataSourceId: DataSourceId, dataLayer: DataLayer, meshFileName: String)( + def lookUpMeshFileKey(dataSourceId: DataSourceId, dataLayer: DataLayer, meshFileName: String)( implicit ec: ExecutionContext): Fox[MeshFileKey] = meshFileKeyCache.getOrLoad((dataSourceId, dataLayer.name, meshFileName), _ => lookUpMeshFileImpl(dataSourceId, dataLayer, meshFileName).toFox) @@ -133,8 +133,9 @@ class MeshFileService @Inject()(config: DataStoreConfig, Fox .serialSequence(allMeshFileNames.toSeq) { meshFileName => for { - meshFileKey <- lookUpMeshFile(dataSourceId, dataLayer, meshFileName) ?~> Messages("mesh.file.lookup.failed", - meshFileName) + meshFileKey <- lookUpMeshFileKey(dataSourceId, dataLayer, meshFileName) ?~> Messages( + "mesh.file.lookup.failed", + meshFileName) formatVersion <- versionForMeshFile(meshFileKey) ?~> Messages("mesh.file.readVersion.failed", meshFileName) mappingName <- mappingNameForMeshFile(meshFileKey) ?~> Messages("mesh.file.readMappingName.failed", meshFileName) From c5f12f5ce15356cd1876b8d9c6efad04167e45eb Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 24 Jun 2025 11:01:29 +0200 Subject: [PATCH 067/100] WIP: Read Zarr Segment Index Files --- .../controllers/DataSourceController.scala | 14 +++----------- .../SegmentIndexFileService.scala | 7 ++++++- 2 files changed, 9 insertions(+), 12 deletions(-) rename webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/{ => segmentindex}/SegmentIndexFileService.scala (98%) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index ae8a0fab746..5c63d58f674 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -5,21 +5,13 @@ import com.scalableminds.util.geometry.Vec3Int import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.Fox import com.scalableminds.webknossos.datastore.ListOfLong.ListOfLong -import com.scalableminds.webknossos.datastore.explore.{ - ExploreRemoteDatasetRequest, - ExploreRemoteDatasetResponse, - ExploreRemoteLayerService -} -import com.scalableminds.webknossos.datastore.helpers.{ - GetMultipleSegmentIndexParameters, - GetSegmentIndexParameters, - SegmentIndexData, - SegmentStatisticsParameters -} +import com.scalableminds.webknossos.datastore.explore.{ExploreRemoteDatasetRequest, ExploreRemoteDatasetResponse, ExploreRemoteLayerService} +import com.scalableminds.webknossos.datastore.helpers.{GetMultipleSegmentIndexParameters, GetSegmentIndexParameters, SegmentIndexData, SegmentStatisticsParameters} import com.scalableminds.webknossos.datastore.models.datasource.inbox.InboxDataSource import com.scalableminds.webknossos.datastore.models.datasource.{DataLayer, DataSource, DataSourceId, GenericDataSource} import com.scalableminds.webknossos.datastore.services._ import com.scalableminds.webknossos.datastore.services.mesh.{MeshFileService, MeshMappingHelper} +import com.scalableminds.webknossos.datastore.services.segmentindex.SegmentIndexFileService import com.scalableminds.webknossos.datastore.services.uploading._ import com.scalableminds.webknossos.datastore.storage.DataVaultService import net.liftweb.common.Box.tryo diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala similarity index 98% rename from webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala rename to webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala index 52c9d19c6b8..ccf9ec2d080 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/SegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala @@ -1,4 +1,4 @@ -package com.scalableminds.webknossos.datastore.services +package com.scalableminds.webknossos.datastore.services.segmentindex import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.geometry.{BoundingBox, Vec3Int} @@ -14,6 +14,11 @@ import com.scalableminds.webknossos.datastore.models.requests.{ DataServiceRequestSettings } import com.scalableminds.webknossos.datastore.models.{AdditionalCoordinate, VoxelPosition} +import com.scalableminds.webknossos.datastore.services.{ + BinaryDataServiceHolder, + DataSourceRepository, + Hdf5HashedArrayUtils +} import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache} import net.liftweb.common.Box.tryo import net.liftweb.common.{Box, Full} From b29406113b6bc21e2dcebffa8dcd9078b4dedb06 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 24 Jun 2025 12:04:30 +0200 Subject: [PATCH 068/100] introduce abstraction for attached segment index files --- .../controllers/DataSourceController.scala | 75 +++-- .../datasource/DatasetLayerAttachments.scala | 11 +- .../services/AgglomerateService.scala | 6 +- .../services/Hdf5AgglomerateService.scala | 9 +- .../services/mesh/MeshFileService.scala | 6 +- .../Hdf5SegmentIndexFileService.scala | 64 ++++ .../SegmentIndexFileService.scala | 280 ++++++++---------- .../ZarrSegmentIndexFileService.scala | 13 + .../datastore/storage/Hdf5FileCache.scala | 12 +- 9 files changed, 283 insertions(+), 193 deletions(-) create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/Hdf5SegmentIndexFileService.scala create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 5c63d58f674..3de7263e959 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -5,8 +5,17 @@ import com.scalableminds.util.geometry.Vec3Int import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.Fox import com.scalableminds.webknossos.datastore.ListOfLong.ListOfLong -import com.scalableminds.webknossos.datastore.explore.{ExploreRemoteDatasetRequest, ExploreRemoteDatasetResponse, ExploreRemoteLayerService} -import com.scalableminds.webknossos.datastore.helpers.{GetMultipleSegmentIndexParameters, GetSegmentIndexParameters, SegmentIndexData, SegmentStatisticsParameters} +import com.scalableminds.webknossos.datastore.explore.{ + ExploreRemoteDatasetRequest, + ExploreRemoteDatasetResponse, + ExploreRemoteLayerService +} +import com.scalableminds.webknossos.datastore.helpers.{ + GetMultipleSegmentIndexParameters, + GetSegmentIndexParameters, + SegmentIndexData, + SegmentStatisticsParameters +} import com.scalableminds.webknossos.datastore.models.datasource.inbox.InboxDataSource import com.scalableminds.webknossos.datastore.models.datasource.{DataLayer, DataSource, DataSourceId, GenericDataSource} import com.scalableminds.webknossos.datastore.services._ @@ -27,7 +36,7 @@ import java.io.File import java.net.URI import scala.collection.mutable.ListBuffer import scala.concurrent.duration._ -import scala.concurrent.{ExecutionContext, Future} +import scala.concurrent.ExecutionContext class DataSourceController @Inject()( dataSourceRepository: DataSourceRepository, @@ -36,6 +45,7 @@ class DataSourceController @Inject()( val binaryDataServiceHolder: BinaryDataServiceHolder, connectomeFileService: ConnectomeFileService, segmentIndexFileService: SegmentIndexFileService, + agglomerateService: AgglomerateService, storageUsageService: DSUsedStorageService, datasetErrorLoggingService: DSDatasetErrorLoggingService, exploreRemoteLayerService: ExploreRemoteLayerService, @@ -253,7 +263,6 @@ class DataSourceController @Inject()( accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { - agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) @@ -272,7 +281,6 @@ class DataSourceController @Inject()( accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { - agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) @@ -293,7 +301,6 @@ class DataSourceController @Inject()( accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { - agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) @@ -314,7 +321,6 @@ class DataSourceController @Inject()( accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { - agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) @@ -334,7 +340,6 @@ class DataSourceController @Inject()( accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { - agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) @@ -353,7 +358,6 @@ class DataSourceController @Inject()( accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { - agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) @@ -589,9 +593,12 @@ class DataSourceController @Inject()( Action.async { implicit request => accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { - val segmentIndexFileOpt = - segmentIndexFileService.getSegmentIndexFile(organizationId, datasetDirectoryName, dataLayerName).toOption - Future.successful(Ok(Json.toJson(segmentIndexFileOpt.isDefined))) + for { + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) + segmentIndexFileKeyBox <- segmentIndexFileService.lookUpSegmentIndexFileKey(dataSource.id, dataLayer).shiftBox + } yield Ok(Json.toJson(segmentIndexFileKeyBox.isDefined)) } } @@ -610,6 +617,7 @@ class DataSourceController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) + segmentIndexFileKey <- segmentIndexFileService.lookUpSegmentIndexFileKey(dataSource.id, dataLayer) segmentIds <- segmentIdsForAgglomerateIdIfNeeded( dataSource.id, dataLayer, @@ -619,9 +627,9 @@ class DataSourceController @Inject()( mappingNameForMeshFile = None, omitMissing = false ) - fileMag <- segmentIndexFileService.readFileMag(organizationId, datasetDirectoryName, dataLayerName) + fileMag <- segmentIndexFileService.readFileMag(segmentIndexFileKey) topLeftsNested: Seq[Array[Vec3Int]] <- Fox.serialCombined(segmentIds)(sId => - segmentIndexFileService.readSegmentIndex(organizationId, datasetDirectoryName, dataLayerName, sId)) + segmentIndexFileService.readSegmentIndex(segmentIndexFileKey, sId)) topLefts: Array[Vec3Int] = topLeftsNested.toArray.flatten bucketPositions = segmentIndexFileService.topLeftsToDistinctBucketPositions(topLefts, request.body.mag, @@ -649,6 +657,7 @@ class DataSourceController @Inject()( (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) + segmentIndexFileKey <- segmentIndexFileService.lookUpSegmentIndexFileKey(dataSource.id, dataLayer) segmentIdsAndBucketPositions <- Fox.serialCombined(request.body.segmentIds) { segmentOrAgglomerateId => for { segmentIds <- segmentIdsForAgglomerateIdIfNeeded( @@ -660,9 +669,9 @@ class DataSourceController @Inject()( mappingNameForMeshFile = None, omitMissing = true // assume agglomerate ids not present in the mapping belong to user-brushed segments ) - fileMag <- segmentIndexFileService.readFileMag(organizationId, datasetDirectoryName, dataLayerName) + fileMag <- segmentIndexFileService.readFileMag(segmentIndexFileKey) topLeftsNested: Seq[Array[Vec3Int]] <- Fox.serialCombined(segmentIds)(sId => - segmentIndexFileService.readSegmentIndex(organizationId, datasetDirectoryName, dataLayerName, sId)) + segmentIndexFileService.readSegmentIndex(segmentIndexFileKey, sId)) topLefts: Array[Vec3Int] = topLeftsNested.toArray.flatten bucketPositions = segmentIndexFileService.topLeftsToDistinctBucketPositions(topLefts, request.body.mag, @@ -680,15 +689,20 @@ class DataSourceController @Inject()( accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { - _ <- segmentIndexFileService.assertSegmentIndexFileExists(organizationId, datasetDirectoryName, dataLayerName) + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) + segmentIndexFileKey <- segmentIndexFileService.lookUpSegmentIndexFileKey(dataSource.id, dataLayer) + agglomerateFileKeyOpt <- Fox.runOptional(request.body.mappingName)( + agglomerateService.lookUpAgglomerateFileKey(dataSource.id, dataLayer, _)) volumes <- Fox.serialCombined(request.body.segmentIds) { segmentId => segmentIndexFileService.getSegmentVolume( - organizationId, - datasetDirectoryName, - dataLayerName, + dataSource.id, + dataLayer, + segmentIndexFileKey, + agglomerateFileKeyOpt, segmentId, - request.body.mag, - request.body.mappingName + request.body.mag ) } } yield Ok(Json.toJson(volumes)) @@ -702,14 +716,19 @@ class DataSourceController @Inject()( accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { - _ <- segmentIndexFileService.assertSegmentIndexFileExists(organizationId, datasetDirectoryName, dataLayerName) + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) + segmentIndexFileKey <- segmentIndexFileService.lookUpSegmentIndexFileKey(dataSource.id, dataLayer) + agglomerateFileKeyOpt <- Fox.runOptional(request.body.mappingName)( + agglomerateService.lookUpAgglomerateFileKey(dataSource.id, dataLayer, _)) boxes <- Fox.serialCombined(request.body.segmentIds) { segmentId => - segmentIndexFileService.getSegmentBoundingBox(organizationId, - datasetDirectoryName, - dataLayerName, + segmentIndexFileService.getSegmentBoundingBox(dataSource.id, + dataLayer, + segmentIndexFileKey, + agglomerateFileKeyOpt, segmentId, - request.body.mag, - request.body.mappingName) + request.body.mag) } } yield Ok(Json.toJson(boxes)) } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala index 41b2f3c3e8a..f883be4a96e 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala @@ -37,7 +37,16 @@ object LayerAttachmentType extends ExtendedEnumeration { case class LayerAttachment(name: String, path: URI, dataFormat: LayerAttachmentDataformat.LayerAttachmentDataformat, - credentialId: Option[String] = None) + credentialId: Option[String] = None) { + // Warning: throws! Use inside of tryo + def localPath: Path = { + if (path.getScheme.nonEmpty && path.getScheme != "file") { + throw new Exception( + "Trying to open non-local hdf5 file. Hdf5 files are only supported on the datastore-local file system") + } + Path.of(path) + } +} object LayerAttachment { implicit val jsonFormat: Format[LayerAttachment] = Json.format[LayerAttachment] diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index d8397830b31..b0cdbe259ed 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -33,7 +33,7 @@ class AgglomerateService @Inject()(config: DataStoreConfig, remoteSourceDescriptorService: RemoteSourceDescriptorService) extends LazyLogging with FoxImplicits { - private val agglomerateDir = "agglomerates" + private val localAgglomeratesDir = "agglomerates" private val hdf5AgglomerateFileExtension = "hdf5" private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) @@ -46,7 +46,7 @@ class AgglomerateService @Inject()(config: DataStoreConfig, val layerDir = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName).resolve(dataLayer.name) val scannedAgglomerateFileNames = PathUtils - .listFiles(layerDir.resolve(agglomerateDir), + .listFiles(layerDir.resolve(localAgglomeratesDir), silent = true, PathUtils.fileExtensionFilter(hdf5AgglomerateFileExtension)) .map { paths => @@ -103,7 +103,7 @@ class AgglomerateService @Inject()(config: DataStoreConfig, registeredAttachmentNormalized.getOrElse( LayerAttachment( mappingName, - localDatasetDir.resolve(dataLayer.name).resolve(agglomerateDir).toUri, + localDatasetDir.resolve(dataLayer.name).resolve(localAgglomeratesDir).toUri, LayerAttachmentDataformat.hdf5 ) ) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala index 4cb636c8472..dac8ac33dbd 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala @@ -43,13 +43,8 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv def clearCache(predicate: AgglomerateFileKey => Boolean): Int = agglomerateFileCache.clear(predicate) - private def openHdf5(agglomerateFileKey: AgglomerateFileKey): IHDF5Reader = { - if (agglomerateFileKey.attachment.path.getScheme.nonEmpty && agglomerateFileKey.attachment.path.getScheme != "file") { - throw new Exception( - "Trying to open non-local hdf5 agglomerate file. Hdf5 agglomerate files are only supported on the datastore-local file system") - } - HDF5FactoryProvider.get.openForReading(Path.of(agglomerateFileKey.attachment.path).toFile) - } + private def openHdf5(agglomerateFileKey: AgglomerateFileKey): IHDF5Reader = + HDF5FactoryProvider.get.openForReading(agglomerateFileKey.attachment.localPath.toFile) def largestAgglomerateId(agglomerateFileKey: AgglomerateFileKey): Box[Long] = tryo { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala index 8308e3c1899..b9de26d4136 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala @@ -73,7 +73,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, with Hdf5HashedArrayUtils { private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) - private val meshesDir = "meshes" + private val localMeshesDir = "meshes" private val meshFileKeyCache : AlfuCache[(DataSourceId, String, String), MeshFileKey] = AlfuCache() // dataSourceId, layerName, mappingName → MeshFileKey @@ -104,7 +104,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, registeredAttachmentNormalized.getOrElse( LayerAttachment( meshFileName, - localDatasetDir.resolve(dataLayer.name).resolve(meshesDir).toUri, + localDatasetDir.resolve(dataLayer.name).resolve(localMeshesDir).toUri, LayerAttachmentDataformat.hdf5 ) ) @@ -119,7 +119,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, val layerDir = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName).resolve(dataLayer.name) val scannedMeshFileNames = PathUtils - .listFiles(layerDir.resolve(meshesDir), silent = true, PathUtils.fileExtensionFilter(hdf5FileExtension)) + .listFiles(layerDir.resolve(localMeshesDir), silent = true, PathUtils.fileExtensionFilter(hdf5FileExtension)) .map { paths => paths.map(path => FilenameUtils.removeExtension(path.getFileName.toString)) } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/Hdf5SegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/Hdf5SegmentIndexFileService.scala new file mode 100644 index 00000000000..32a2d32991d --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/Hdf5SegmentIndexFileService.scala @@ -0,0 +1,64 @@ +package com.scalableminds.webknossos.datastore.services.segmentindex + +import com.scalableminds.util.geometry.Vec3Int +import com.scalableminds.util.tools.{Fox, FoxImplicits} +import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache} +import net.liftweb.common.Box.tryo + +import javax.inject.Inject +import scala.concurrent.ExecutionContext + +class Hdf5SegmentIndexFileService @Inject()() extends FoxImplicits { + private lazy val fileHandleCache = new Hdf5FileCache(100) + + def readSegmentIndex(segmentIndexFileKey: SegmentIndexFileKey, segmentId: Long)( + implicit ec: ExecutionContext): Fox[Array[Vec3Int]] = + for { + segmentIndex <- fileHandleCache.getCachedHdf5File(segmentIndexFileKey.attachment)(CachedHdf5File.fromPath).toFox + nBuckets = segmentIndex.uint64Reader.getAttr("/", "n_hash_buckets") + + bucketIndex = segmentIndex.hashFunction(segmentId) % nBuckets + bucketOffsets = segmentIndex.uint64Reader.readArrayBlockWithOffset("hash_bucket_offsets", 2, bucketIndex) + bucketStart = bucketOffsets(0) + bucketEnd = bucketOffsets(1) + + hashBucketExists = bucketEnd - bucketStart != 0 + topLeftsOpt <- Fox.runIf(hashBucketExists)(readTopLefts(segmentIndex, bucketStart, bucketEnd, segmentId)) + topLefts = topLeftsOpt.flatten + } yield + topLefts match { + case Some(topLefts) => topLefts.flatMap(topLeft => Vec3Int.fromArray(topLeft.map(_.toInt))) + case None => Array.empty + } + + def readFileMag(segmentIndexFileKey: SegmentIndexFileKey)(implicit ec: ExecutionContext): Fox[Vec3Int] = + for { + segmentIndex <- fileHandleCache.getCachedHdf5File(segmentIndexFileKey.attachment)(CachedHdf5File.fromPath).toFox + mag <- Vec3Int.fromArray(segmentIndex.uint64Reader.getArrayAttr("/", "mag").map(_.toInt)).toFox + } yield mag + + private def readTopLefts(segmentIndex: CachedHdf5File, bucketStart: Long, bucketEnd: Long, segmentId: Long)( + implicit ec: ExecutionContext): Fox[Option[Array[Array[Short]]]] = + for { + _ <- Fox.successful(()) + buckets = segmentIndex.uint64Reader.readMatrixBlockWithOffset("hash_buckets", + (bucketEnd - bucketStart + 1).toInt, + 3, + bucketStart, + 0) + bucketLocalOffset = buckets.map(_(0)).indexOf(segmentId) + topLeftOpts <- Fox.runIf(bucketLocalOffset >= 0)(for { + _ <- Fox.successful(()) + topLeftStart = buckets(bucketLocalOffset)(1) + topLeftEnd = buckets(bucketLocalOffset)(2) + bucketEntriesDtype <- tryo(segmentIndex.stringReader.getAttr("/", "dtype_bucket_entries")).toFox + _ <- Fox + .fromBool(bucketEntriesDtype == "uint16") ?~> "value for dtype_bucket_entries in segment index file is not supported, only uint16 is supported" + topLefts = segmentIndex.uint16Reader.readMatrixBlockWithOffset("top_lefts", + (topLeftEnd - topLeftStart).toInt, + 3, + topLeftStart, + 0) + } yield topLefts) + } yield topLeftOpts +} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala index ccf9ec2d080..379f5db007d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala @@ -1,13 +1,20 @@ package com.scalableminds.webknossos.datastore.services.segmentindex import com.scalableminds.util.accesscontext.TokenContext +import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.geometry.{BoundingBox, Vec3Int} import com.scalableminds.util.io.PathUtils import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.geometry.Vec3IntProto import com.scalableminds.webknossos.datastore.helpers.{NativeBucketScanner, SegmentStatistics} -import com.scalableminds.webknossos.datastore.models.datasource.{DataLayer, ElementClass} +import com.scalableminds.webknossos.datastore.models.datasource.{ + DataLayer, + DataSourceId, + ElementClass, + LayerAttachment, + LayerAttachmentDataformat +} import com.scalableminds.webknossos.datastore.models.requests.{ Cuboid, DataServiceDataRequest, @@ -15,101 +22,96 @@ import com.scalableminds.webknossos.datastore.models.requests.{ } import com.scalableminds.webknossos.datastore.models.{AdditionalCoordinate, VoxelPosition} import com.scalableminds.webknossos.datastore.services.{ + AgglomerateService, BinaryDataServiceHolder, - DataSourceRepository, Hdf5HashedArrayUtils } -import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache} +import com.scalableminds.webknossos.datastore.storage.{AgglomerateFileKey, RemoteSourceDescriptorService} import net.liftweb.common.Box.tryo -import net.liftweb.common.{Box, Full} -import play.api.i18n.MessagesProvider +import net.liftweb.common.Box import java.nio.file.{Path, Paths} import javax.inject.Inject import scala.concurrent.ExecutionContext +case class SegmentIndexFileKey(dataSourceId: DataSourceId, layerName: String, attachment: LayerAttachment) + class SegmentIndexFileService @Inject()(config: DataStoreConfig, - binaryDataServiceHolder: BinaryDataServiceHolder, - dataSourceRepository: DataSourceRepository)(implicit ec: ExecutionContext) + hdf5SegmentIndexFileService: Hdf5SegmentIndexFileService, + zarrSegmentIndexFileService: ZarrSegmentIndexFileService, + remoteSourceDescriptorService: RemoteSourceDescriptorService, + agglomerateService: AgglomerateService, + binaryDataServiceHolder: BinaryDataServiceHolder)(implicit ec: ExecutionContext) extends FoxImplicits with Hdf5HashedArrayUtils with SegmentStatistics { private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) - private val segmentIndexDir = "segmentIndex" - - private lazy val fileHandleCache = new Hdf5FileCache(10) + private val localSegmentIndexDir = "segmentIndex" protected lazy val bucketScanner = new NativeBucketScanner() - def getSegmentIndexFile(organizationId: String, datasetDirectoryName: String, dataLayerName: String): Box[Path] = + private val segmentIndexFileKeyCache + : AlfuCache[(DataSourceId, String), SegmentIndexFileKey] = AlfuCache() // dataSourceId, layerName → SegmentIndexFileKey + + def lookUpSegmentIndexFileKey(dataSourceId: DataSourceId, dataLayer: DataLayer)( + implicit ec: ExecutionContext): Fox[SegmentIndexFileKey] = + segmentIndexFileKeyCache.getOrLoad((dataSourceId, dataLayer.name), + _ => lookUpSegmentIndexFileKeyImpl(dataSourceId, dataLayer)) + + private def lookUpSegmentIndexFileKeyImpl(dataSourceId: DataSourceId, + dataLayer: DataLayer): Fox[SegmentIndexFileKey] = { + val registeredAttachment: Option[LayerAttachment] = dataLayer.attachments.flatMap(_.segmentIndex) + val localDatasetDir = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) + val localAttachment: Option[LayerAttachment] = findLocalSegmentIndexFile(localDatasetDir, dataLayer).toOption + for { + registeredAttachmentNormalized <- tryo(registeredAttachment.map { attachment => + attachment.copy( + path = + remoteSourceDescriptorService.uriFromPathLiteral(attachment.path.toString, localDatasetDir, dataLayer.name)) + }).toFox + selectedAttachment <- registeredAttachmentNormalized.orElse(localAttachment).toFox ?~> "segmentIndexFile.notFound" + } yield + SegmentIndexFileKey( + dataSourceId, + dataLayer.name, + selectedAttachment + ) + } + + private def findLocalSegmentIndexFile(localDatasetDir: Path, dataLayer: DataLayer): Box[LayerAttachment] = { + val segmentIndexDir = localDatasetDir.resolve(dataLayer.name).resolve(this.localSegmentIndexDir) for { - _ <- Full("") - layerDir = dataBaseDir.resolve(organizationId).resolve(datasetDirectoryName).resolve(dataLayerName) - segmentIndexDir = layerDir.resolve(this.segmentIndexDir) files <- PathUtils.listFiles(segmentIndexDir, silent = true, PathUtils.fileExtensionFilter(hdf5FileExtension)) file <- files.headOption - } yield file + } yield + LayerAttachment( + file.getFileName.toString, + file.toUri, + LayerAttachmentDataformat.hdf5 + ) + } /** * Read the segment index file and return the bucket positions for the given segment id. * The bucket positions are the top left corners of the buckets that contain the segment in the file mag. */ - def readSegmentIndex(organizationId: String, - datasetDirectoryName: String, - dataLayerName: String, - segmentId: Long): Fox[Array[Vec3Int]] = - for { - segmentIndexPath <- getSegmentIndexFile(organizationId, datasetDirectoryName, dataLayerName).toFox - segmentIndex = fileHandleCache.getCachedHdf5File(segmentIndexPath)(CachedHdf5File.fromPath) - nBuckets = segmentIndex.uint64Reader.getAttr("/", "n_hash_buckets") - - bucketIndex = segmentIndex.hashFunction(segmentId) % nBuckets - bucketOffsets = segmentIndex.uint64Reader.readArrayBlockWithOffset("hash_bucket_offsets", 2, bucketIndex) - bucketStart = bucketOffsets(0) - bucketEnd = bucketOffsets(1) - - hashBucketExists = bucketEnd - bucketStart != 0 - topLeftsOpt <- Fox.runIf(hashBucketExists)(readTopLefts(segmentIndex, bucketStart, bucketEnd, segmentId)) - topLefts = topLeftsOpt.flatten - } yield - topLefts match { - case Some(topLefts) => topLefts.flatMap(topLeft => Vec3Int.fromArray(topLeft.map(_.toInt))) - case None => Array.empty - } + def readSegmentIndex(segmentIndexFileKey: SegmentIndexFileKey, segmentId: Long): Fox[Array[Vec3Int]] = + segmentIndexFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => + zarrSegmentIndexFileService.readSegmentIndex(segmentIndexFileKey: SegmentIndexFileKey, segmentId: Long) + case LayerAttachmentDataformat.hdf5 => + hdf5SegmentIndexFileService.readSegmentIndex(segmentIndexFileKey: SegmentIndexFileKey, segmentId: Long) + case _ => unsupportedDataFormat(segmentIndexFileKey) + } - def readFileMag(organizationId: String, datasetDirectoryName: String, dataLayerName: String): Fox[Vec3Int] = - for { - segmentIndexPath <- getSegmentIndexFile(organizationId, datasetDirectoryName, dataLayerName).toFox - segmentIndex = fileHandleCache.getCachedHdf5File(segmentIndexPath)(CachedHdf5File.fromPath) - mag <- Vec3Int.fromArray(segmentIndex.uint64Reader.getArrayAttr("/", "mag").map(_.toInt)).toFox - } yield mag - - private def readTopLefts(segmentIndex: CachedHdf5File, - bucketStart: Long, - bucketEnd: Long, - segmentId: Long): Fox[Option[Array[Array[Short]]]] = - for { - _ <- Fox.successful(()) - buckets = segmentIndex.uint64Reader.readMatrixBlockWithOffset("hash_buckets", - (bucketEnd - bucketStart + 1).toInt, - 3, - bucketStart, - 0) - bucketLocalOffset = buckets.map(_(0)).indexOf(segmentId) - topLeftOpts <- Fox.runIf(bucketLocalOffset >= 0)(for { - _ <- Fox.successful(()) - topLeftStart = buckets(bucketLocalOffset)(1) - topLeftEnd = buckets(bucketLocalOffset)(2) - bucketEntriesDtype <- tryo(segmentIndex.stringReader.getAttr("/", "dtype_bucket_entries")).toFox - _ <- Fox - .fromBool(bucketEntriesDtype == "uint16") ?~> "value for dtype_bucket_entries in segment index file is not supported, only uint16 is supported" - topLefts = segmentIndex.uint16Reader.readMatrixBlockWithOffset("top_lefts", - (topLeftEnd - topLeftStart).toInt, - 3, - topLeftStart, - 0) - } yield topLefts) - } yield topLeftOpts + def readFileMag(segmentIndexFileKey: SegmentIndexFileKey): Fox[Vec3Int] = + segmentIndexFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => + zarrSegmentIndexFileService.readFileMag(segmentIndexFileKey: SegmentIndexFileKey) + case LayerAttachmentDataformat.hdf5 => + hdf5SegmentIndexFileService.readFileMag(segmentIndexFileKey: SegmentIndexFileKey) + case _ => unsupportedDataFormat(segmentIndexFileKey) + } def topLeftsToDistinctBucketPositions(topLefts: Array[Vec3Int], targetMag: Vec3Int, @@ -120,119 +122,94 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, .map(_ / Vec3Int.full(DataLayer.bucketLength)) // map positions to cube indices .distinct - def getSegmentVolume(organizationId: String, - datasetDirectoryName: String, - dataLayerName: String, + def getSegmentVolume(dataSourceId: DataSourceId, + dataLayer: DataLayer, + segmentIndexFileKey: SegmentIndexFileKey, + agglomerateFileKeyOpt: Option[AgglomerateFileKey], segmentId: Long, - mag: Vec3Int, - mappingName: Option[String])(implicit m: MessagesProvider, tc: TokenContext): Fox[Long] = + mag: Vec3Int)(implicit tc: TokenContext): Fox[Long] = calculateSegmentVolume( segmentId, mag, None, // see #7556 - getBucketPositions(organizationId, datasetDirectoryName, dataLayerName, mappingName), - getDataForBucketPositions(organizationId, datasetDirectoryName, dataLayerName, mappingName) + getBucketPositions(segmentIndexFileKey, agglomerateFileKeyOpt), + getDataForBucketPositions(dataSourceId, dataLayer, agglomerateFileKeyOpt) ) - def getSegmentBoundingBox( - organizationId: String, - datasetDirectoryName: String, - dataLayerName: String, - segmentId: Long, - mag: Vec3Int, - mappingName: Option[String])(implicit m: MessagesProvider, tc: TokenContext): Fox[BoundingBox] = + def getSegmentBoundingBox(dataSourceId: DataSourceId, + dataLayer: DataLayer, + segmentIndexFileKey: SegmentIndexFileKey, + agglomerateFileKeyOpt: Option[AgglomerateFileKey], + segmentId: Long, + mag: Vec3Int)(implicit tc: TokenContext): Fox[BoundingBox] = calculateSegmentBoundingBox( segmentId, mag, None, // see #7556 - getBucketPositions(organizationId, datasetDirectoryName, dataLayerName, mappingName), - getDataForBucketPositions(organizationId, datasetDirectoryName, dataLayerName, mappingName) + getBucketPositions(segmentIndexFileKey, agglomerateFileKeyOpt), + getDataForBucketPositions(dataSourceId, dataLayer, agglomerateFileKeyOpt) ) - def assertSegmentIndexFileExists(organizationId: String, - datasetDirectoryName: String, - dataLayerName: String): Fox[Path] = - getSegmentIndexFile(organizationId, datasetDirectoryName, dataLayerName).toFox ?~> "segmentIndexFile.notFound" - - private def getDataForBucketPositions(organizationId: String, - datasetDirectoryName: String, - dataLayerName: String, - mappingName: Option[String])( + private def getDataForBucketPositions(dataSourceId: DataSourceId, + dataLayer: DataLayer, + agglomerateFileKeyOpt: Option[AgglomerateFileKey])( bucketPositions: Seq[Vec3Int], mag: Vec3Int, additionalCoordinates: Option[Seq[AdditionalCoordinate]])( - implicit m: MessagesProvider, - tc: TokenContext): Fox[(Seq[Box[Array[Byte]]], ElementClass.Value)] = + implicit tc: TokenContext): Fox[(Seq[Box[Array[Byte]]], ElementClass.Value)] = { + // Additional coordinates parameter ignored, see #7556 + val mag1BucketPositions = bucketPositions.map(_ * mag) + val bucketRequests = mag1BucketPositions.map( + mag1BucketPosition => + DataServiceDataRequest( + dataSourceId = Some(dataSourceId), + dataLayer = dataLayer, + cuboid = Cuboid( + VoxelPosition(mag1BucketPosition.x * DataLayer.bucketLength, + mag1BucketPosition.y * DataLayer.bucketLength, + mag1BucketPosition.z * DataLayer.bucketLength, + mag), + DataLayer.bucketLength, + DataLayer.bucketLength, + DataLayer.bucketLength + ), + settings = DataServiceRequestSettings(halfByte = false, + appliedAgglomerate = agglomerateFileKeyOpt.map(_.attachment.name), + version = None, + additionalCoordinates = None), + )) for { - // Additional coordinates parameter ignored, see #7556 - (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, - datasetDirectoryName, - dataLayerName) - mag1BucketPositions = bucketPositions.map(_ * mag) - bucketRequests = mag1BucketPositions.map( - mag1BucketPosition => - DataServiceDataRequest( - dataSourceId = Some(dataSource.id), - dataLayer = dataLayer, - cuboid = Cuboid( - VoxelPosition(mag1BucketPosition.x * DataLayer.bucketLength, - mag1BucketPosition.y * DataLayer.bucketLength, - mag1BucketPosition.z * DataLayer.bucketLength, - mag), - DataLayer.bucketLength, - DataLayer.bucketLength, - DataLayer.bucketLength - ), - settings = DataServiceRequestSettings(halfByte = false, - appliedAgglomerate = mappingName, - version = None, - additionalCoordinates = None), - )) bucketData <- binaryDataServiceHolder.binaryDataService.handleMultipleBucketRequests(bucketRequests) } yield (bucketData, dataLayer.elementClass) - private def getBucketPositions(organizationId: String, - datasetDirectoryName: String, - dataLayerName: String, - mappingName: Option[String])(segmentOrAgglomerateId: Long, mag: Vec3Int)( - implicit tc: TokenContext, - m: MessagesProvider): Fox[Set[Vec3IntProto]] = + } + + private def getBucketPositions( + segmentIndexFileKey: SegmentIndexFileKey, + agglomerateFileKeyOpt: Option[AgglomerateFileKey])(segmentOrAgglomerateId: Long, mag: Vec3Int)( + implicit tc: TokenContext): Fox[Set[Vec3IntProto]] = for { - segmentIds <- getSegmentIdsForAgglomerateIdIfNeeded(organizationId, - datasetDirectoryName, - dataLayerName, - segmentOrAgglomerateId, - mappingName) + segmentIds <- getSegmentIdsForAgglomerateIdIfNeeded(agglomerateFileKeyOpt, segmentOrAgglomerateId) positionsPerSegment <- Fox.serialCombined(segmentIds)(segmentId => - getBucketPositions(organizationId, datasetDirectoryName, dataLayerName, segmentId, mag)) + getBucketPositions(segmentIndexFileKey, segmentId, mag)) positionsCollected = positionsPerSegment.flatten.toSet.map(vec3IntToProto) } yield positionsCollected - private def getBucketPositions(organizationId: String, - datasetDirectoryName: String, - dataLayerName: String, + private def getBucketPositions(segmentIndexFileKey: SegmentIndexFileKey, segmentId: Long, mag: Vec3Int): Fox[Array[Vec3Int]] = for { - fileMag <- readFileMag(organizationId, datasetDirectoryName, dataLayerName) - bucketPositionsInFileMag <- readSegmentIndex(organizationId, datasetDirectoryName, dataLayerName, segmentId) + fileMag <- readFileMag(segmentIndexFileKey) + bucketPositionsInFileMag <- readSegmentIndex(segmentIndexFileKey, segmentId) bucketPositions = bucketPositionsInFileMag.map(_ / (mag / fileMag)) } yield bucketPositions private def getSegmentIdsForAgglomerateIdIfNeeded( - organizationId: String, - datasetDirectoryName: String, - dataLayerName: String, - segmentOrAgglomerateId: Long, - mappingNameOpt: Option[String])(implicit tc: TokenContext, m: MessagesProvider): Fox[Seq[Long]] = + agglomerateFileKeyOpt: Option[AgglomerateFileKey], + segmentOrAgglomerateId: Long)(implicit tc: TokenContext): Fox[Seq[Long]] = // Editable mappings cannot happen here since those requests go to the tracingstore - mappingNameOpt match { - case Some(mappingName) => + agglomerateFileKeyOpt match { + case Some(agglomerateFileKey) => for { - (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, - datasetDirectoryName, - dataLayerName) - agglomerateService <- binaryDataServiceHolder.binaryDataService.agglomerateServiceOpt.toFox - agglomerateFileKey <- agglomerateService.lookUpAgglomerateFileKey(dataSource.id, dataLayer, mappingName) largestAgglomerateId <- agglomerateService.largestAgglomerateId(agglomerateFileKey) segmentIds <- if (segmentOrAgglomerateId <= largestAgglomerateId) { agglomerateService.segmentIdsForAgglomerateId( @@ -245,4 +222,7 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, case None => Fox.successful(List(segmentOrAgglomerateId)) } + private def unsupportedDataFormat(segmentIndexFileKey: SegmentIndexFileKey)(implicit ec: ExecutionContext) = + Fox.failure( + s"Trying to load segment index file with unsupported data format ${segmentIndexFileKey.attachment.dataFormat}") } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala new file mode 100644 index 00000000000..55a67578954 --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala @@ -0,0 +1,13 @@ +package com.scalableminds.webknossos.datastore.services.segmentindex + +import com.scalableminds.util.geometry.Vec3Int +import com.scalableminds.util.tools.Fox + +import javax.inject.Inject + +class ZarrSegmentIndexFileService @Inject()() { + + def readSegmentIndex(segmentIndexFileKey: SegmentIndexFileKey, segmentId: Long): Fox[Array[Vec3Int]] = ??? + + def readFileMag(segmentIndexFileKey: SegmentIndexFileKey): Fox[Vec3Int] = ??? +} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala index 79530a557b4..50760c2872d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala @@ -15,6 +15,7 @@ import com.scalableminds.webknossos.datastore.models.datasource.LayerAttachment import net.liftweb.common.{Box, Failure, Full} import com.scalableminds.webknossos.datastore.services.Hdf5HashedArrayUtils import com.typesafe.scalalogging.LazyLogging +import net.liftweb.common.Box.tryo import java.nio.file.Path import scala.util.Using @@ -58,6 +59,11 @@ class Hdf5FileCache(val maxEntries: Int) extends LRUConcurrentCache[String, Cach override def onElementRemoval(key: String, value: CachedHdf5File): Unit = value.scheduleForRemoval() + def getCachedHdf5File(attachment: LayerAttachment)(loadFn: Path => CachedHdf5File): Box[CachedHdf5File] = + for { + localPath <- tryo(attachment.localPath) + } yield getCachedHdf5File(localPath)(loadFn) + def getCachedHdf5File(filePath: Path)(loadFn: Path => CachedHdf5File): CachedHdf5File = { val fileKey = filePath.toString @@ -95,5 +101,9 @@ class Hdf5FileCache(val maxEntries: Int) extends LRUConcurrentCache[String, Cach } yield boxedResult def withCachedHdf5[T](attachment: LayerAttachment)(block: CachedHdf5File => T): Box[T] = - withCachedHdf5(Path.of(attachment.path))(block) + for { + localAttachmentPath <- tryo(attachment.localPath) + result <- withCachedHdf5(localAttachmentPath)(block) + } yield result + } From d47b2408d408ab252e7a37d10aff6fbfd076c4af Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 24 Jun 2025 13:49:48 +0200 Subject: [PATCH 069/100] implement arrSegmentIndexFileService --- ...Utils.scala => ArrayArtifactHashing.scala} | 5 +- .../services/mesh/MeshFileService.scala | 7 +- .../services/mesh/ZarrMeshFileService.scala | 4 +- .../SegmentIndexFileService.scala | 44 +++-- .../ZarrSegmentIndexFileService.scala | 176 +++++++++++++++++- .../datastore/storage/Hdf5FileCache.scala | 4 +- 6 files changed, 207 insertions(+), 33 deletions(-) rename webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/{Hdf5HashedArrayUtils.scala => ArrayArtifactHashing.scala} (85%) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5HashedArrayUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ArrayArtifactHashing.scala similarity index 85% rename from webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5HashedArrayUtils.scala rename to webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ArrayArtifactHashing.scala index 75f0c4f8f13..44ba35317ed 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5HashedArrayUtils.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ArrayArtifactHashing.scala @@ -4,9 +4,7 @@ import org.apache.commons.codec.digest.MurmurHash3 import java.nio.ByteBuffer -trait Hdf5HashedArrayUtils { - - val hdf5FileExtension = "hdf5" +trait ArrayArtifactHashing { def getHashFunction(name: String): Long => Long = name match { case "identity" => identity @@ -14,4 +12,5 @@ trait Hdf5HashedArrayUtils { x: Long => Math.abs(MurmurHash3.hash128x64(ByteBuffer.allocate(8).putLong(x).array())(1)) } + } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala index b9de26d4136..89973898b58 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala @@ -11,7 +11,7 @@ import com.scalableminds.webknossos.datastore.models.datasource.{ LayerAttachment, LayerAttachmentDataformat } -import com.scalableminds.webknossos.datastore.services.Hdf5HashedArrayUtils +import com.scalableminds.webknossos.datastore.services.ArrayArtifactHashing import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService import net.liftweb.common.Box.tryo import net.liftweb.common.Box @@ -70,10 +70,11 @@ class MeshFileService @Inject()(config: DataStoreConfig, neuroglancerPrecomputedMeshService: NeuroglancerPrecomputedMeshFileService, remoteSourceDescriptorService: RemoteSourceDescriptorService) extends FoxImplicits - with Hdf5HashedArrayUtils { + with ArrayArtifactHashing { private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) private val localMeshesDir = "meshes" + private val hdf5MeshFileExtension = "hdf5" private val meshFileKeyCache : AlfuCache[(DataSourceId, String, String), MeshFileKey] = AlfuCache() // dataSourceId, layerName, mappingName → MeshFileKey @@ -119,7 +120,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, val layerDir = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName).resolve(dataLayer.name) val scannedMeshFileNames = PathUtils - .listFiles(layerDir.resolve(localMeshesDir), silent = true, PathUtils.fileExtensionFilter(hdf5FileExtension)) + .listFiles(layerDir.resolve(localMeshesDir), silent = true, PathUtils.fileExtensionFilter(hdf5MeshFileExtension)) .map { paths => paths.map(path => FilenameUtils.removeExtension(path.getFileName.toString)) } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala index 401acaa2511..116d838d55d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala @@ -7,7 +7,7 @@ import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} import com.scalableminds.webknossos.datastore.datareaders.DatasetArray import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId -import com.scalableminds.webknossos.datastore.services.{ChunkCacheService, Hdf5HashedArrayUtils} +import com.scalableminds.webknossos.datastore.services.{ChunkCacheService, ArrayArtifactHashing} import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService import net.liftweb.common.Box.tryo import play.api.i18n.{Messages, MessagesProvider} @@ -25,7 +25,7 @@ case class MeshFileAttributes( hashFunction: String, nBuckets: Int, mappingName: Option[String] -) extends Hdf5HashedArrayUtils { +) extends ArrayArtifactHashing { lazy val applyHashFunction: Long => Long = getHashFunction(hashFunction) } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala index 379f5db007d..068bff35ff2 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala @@ -24,7 +24,7 @@ import com.scalableminds.webknossos.datastore.models.{AdditionalCoordinate, Voxe import com.scalableminds.webknossos.datastore.services.{ AgglomerateService, BinaryDataServiceHolder, - Hdf5HashedArrayUtils + ArrayArtifactHashing } import com.scalableminds.webknossos.datastore.storage.{AgglomerateFileKey, RemoteSourceDescriptorService} import net.liftweb.common.Box.tryo @@ -41,12 +41,13 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, zarrSegmentIndexFileService: ZarrSegmentIndexFileService, remoteSourceDescriptorService: RemoteSourceDescriptorService, agglomerateService: AgglomerateService, - binaryDataServiceHolder: BinaryDataServiceHolder)(implicit ec: ExecutionContext) + binaryDataServiceHolder: BinaryDataServiceHolder) extends FoxImplicits - with Hdf5HashedArrayUtils + with ArrayArtifactHashing with SegmentStatistics { private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) private val localSegmentIndexDir = "segmentIndex" + private val hdf5SegmentIndexFileExtension = "hdf5" protected lazy val bucketScanner = new NativeBucketScanner() @@ -58,8 +59,8 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, segmentIndexFileKeyCache.getOrLoad((dataSourceId, dataLayer.name), _ => lookUpSegmentIndexFileKeyImpl(dataSourceId, dataLayer)) - private def lookUpSegmentIndexFileKeyImpl(dataSourceId: DataSourceId, - dataLayer: DataLayer): Fox[SegmentIndexFileKey] = { + private def lookUpSegmentIndexFileKeyImpl(dataSourceId: DataSourceId, dataLayer: DataLayer)( + implicit ec: ExecutionContext): Fox[SegmentIndexFileKey] = { val registeredAttachment: Option[LayerAttachment] = dataLayer.attachments.flatMap(_.segmentIndex) val localDatasetDir = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) val localAttachment: Option[LayerAttachment] = findLocalSegmentIndexFile(localDatasetDir, dataLayer).toOption @@ -81,7 +82,9 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, private def findLocalSegmentIndexFile(localDatasetDir: Path, dataLayer: DataLayer): Box[LayerAttachment] = { val segmentIndexDir = localDatasetDir.resolve(dataLayer.name).resolve(this.localSegmentIndexDir) for { - files <- PathUtils.listFiles(segmentIndexDir, silent = true, PathUtils.fileExtensionFilter(hdf5FileExtension)) + files <- PathUtils.listFiles(segmentIndexDir, + silent = true, + PathUtils.fileExtensionFilter(hdf5SegmentIndexFileExtension)) file <- files.headOption } yield LayerAttachment( @@ -95,7 +98,8 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, * Read the segment index file and return the bucket positions for the given segment id. * The bucket positions are the top left corners of the buckets that contain the segment in the file mag. */ - def readSegmentIndex(segmentIndexFileKey: SegmentIndexFileKey, segmentId: Long): Fox[Array[Vec3Int]] = + def readSegmentIndex(segmentIndexFileKey: SegmentIndexFileKey, + segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Vec3Int]] = segmentIndexFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrSegmentIndexFileService.readSegmentIndex(segmentIndexFileKey: SegmentIndexFileKey, segmentId: Long) @@ -104,7 +108,8 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, case _ => unsupportedDataFormat(segmentIndexFileKey) } - def readFileMag(segmentIndexFileKey: SegmentIndexFileKey): Fox[Vec3Int] = + def readFileMag(segmentIndexFileKey: SegmentIndexFileKey)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Vec3Int] = segmentIndexFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrSegmentIndexFileService.readFileMag(segmentIndexFileKey: SegmentIndexFileKey) @@ -127,7 +132,7 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, segmentIndexFileKey: SegmentIndexFileKey, agglomerateFileKeyOpt: Option[AgglomerateFileKey], segmentId: Long, - mag: Vec3Int)(implicit tc: TokenContext): Fox[Long] = + mag: Vec3Int)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = calculateSegmentVolume( segmentId, mag, @@ -141,7 +146,7 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, segmentIndexFileKey: SegmentIndexFileKey, agglomerateFileKeyOpt: Option[AgglomerateFileKey], segmentId: Long, - mag: Vec3Int)(implicit tc: TokenContext): Fox[BoundingBox] = + mag: Vec3Int)(implicit ec: ExecutionContext, tc: TokenContext): Fox[BoundingBox] = calculateSegmentBoundingBox( segmentId, mag, @@ -156,7 +161,8 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, bucketPositions: Seq[Vec3Int], mag: Vec3Int, additionalCoordinates: Option[Seq[AdditionalCoordinate]])( - implicit tc: TokenContext): Fox[(Seq[Box[Array[Byte]]], ElementClass.Value)] = { + implicit ec: ExecutionContext, + tc: TokenContext): Fox[(Seq[Box[Array[Byte]]], ElementClass.Value)] = { // Additional coordinates parameter ignored, see #7556 val mag1BucketPositions = bucketPositions.map(_ * mag) val bucketRequests = mag1BucketPositions.map( @@ -183,10 +189,10 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, } yield (bucketData, dataLayer.elementClass) } - private def getBucketPositions( - segmentIndexFileKey: SegmentIndexFileKey, - agglomerateFileKeyOpt: Option[AgglomerateFileKey])(segmentOrAgglomerateId: Long, mag: Vec3Int)( - implicit tc: TokenContext): Fox[Set[Vec3IntProto]] = + private def getBucketPositions(segmentIndexFileKey: SegmentIndexFileKey, + agglomerateFileKeyOpt: Option[AgglomerateFileKey])( + segmentOrAgglomerateId: Long, + mag: Vec3Int)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Set[Vec3IntProto]] = for { segmentIds <- getSegmentIdsForAgglomerateIdIfNeeded(agglomerateFileKeyOpt, segmentOrAgglomerateId) positionsPerSegment <- Fox.serialCombined(segmentIds)(segmentId => @@ -194,9 +200,9 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, positionsCollected = positionsPerSegment.flatten.toSet.map(vec3IntToProto) } yield positionsCollected - private def getBucketPositions(segmentIndexFileKey: SegmentIndexFileKey, - segmentId: Long, - mag: Vec3Int): Fox[Array[Vec3Int]] = + private def getBucketPositions(segmentIndexFileKey: SegmentIndexFileKey, segmentId: Long, mag: Vec3Int)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[Array[Vec3Int]] = for { fileMag <- readFileMag(segmentIndexFileKey) bucketPositionsInFileMag <- readSegmentIndex(segmentIndexFileKey, segmentId) @@ -205,7 +211,7 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, private def getSegmentIdsForAgglomerateIdIfNeeded( agglomerateFileKeyOpt: Option[AgglomerateFileKey], - segmentOrAgglomerateId: Long)(implicit tc: TokenContext): Fox[Seq[Long]] = + segmentOrAgglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = // Editable mappings cannot happen here since those requests go to the tracingstore agglomerateFileKeyOpt match { case Some(agglomerateFileKey) => diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala index 55a67578954..29b5c5c2ce3 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala @@ -1,13 +1,181 @@ package com.scalableminds.webknossos.datastore.services.segmentindex +import com.scalableminds.util.accesscontext.TokenContext +import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.geometry.Vec3Int -import com.scalableminds.util.tools.Fox +import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} +import com.scalableminds.webknossos.datastore.datareaders.DatasetArray +import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array +import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId +import com.scalableminds.webknossos.datastore.services.{ArrayArtifactHashing, ChunkCacheService} +import ucar.ma2.{Array => MultiArray} +import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService +import net.liftweb.common.Box.tryo +import play.api.libs.json.{JsResult, JsValue, Reads} import javax.inject.Inject +import scala.concurrent.ExecutionContext -class ZarrSegmentIndexFileService @Inject()() { +case class SegmentIndexFileAttributes( + formatVersion: Long, + mag: Vec3Int, + nHashBuckets: Long, + hashFunction: String, + dtypeBucketEntries: String, +) extends ArrayArtifactHashing { + lazy val applyHashFunction: Long => Long = getHashFunction(hashFunction) +} + +object SegmentIndexFileAttributes { + val FILENAME_ZARR_JSON = "zarr.json" + + implicit object SegmentIndexFileAttributesZarr3GroupHeaderReads extends Reads[SegmentIndexFileAttributes] { + override def reads(json: JsValue): JsResult[SegmentIndexFileAttributes] = { + val keyAttributes = "attributes" + val keyVx = "voxelytics" + val keyFormatVersion = "artifact_schema_version" + val keyArtifactAttrs = "artifact_attributes" + val segmentIndexFileAttrs = json \ keyAttributes \ keyVx \ keyArtifactAttrs + for { + formatVersion <- (json \ keyAttributes \ keyVx \ keyFormatVersion).validate[Long] + mag <- (segmentIndexFileAttrs \ "mag").validate[Vec3Int] + nHashBuckets <- (segmentIndexFileAttrs \ "n_hash_buckets").validate[Long] + hashFunction <- (segmentIndexFileAttrs \ "hash_function").validate[String] + dtypeBucketEntries <- (segmentIndexFileAttrs \ "dtype_bucket_entries").validate[String] + } yield + SegmentIndexFileAttributes( + formatVersion, + mag, + nHashBuckets, + hashFunction, + dtypeBucketEntries + ) + } + } +} + +class ZarrSegmentIndexFileService @Inject()(remoteSourceDescriptorService: RemoteSourceDescriptorService, + chunkCacheService: ChunkCacheService) + extends FoxImplicits { + + private val keyHashBucketOffsets = "hash_bucket_offsets" + private val keyHashBuckets = "hash_buckets" + private val keyTopLefts = "top_lefts" + + private lazy val openArraysCache = AlfuCache[(SegmentIndexFileKey, String), DatasetArray]() + private lazy val attributesCache = AlfuCache[SegmentIndexFileKey, SegmentIndexFileAttributes]() + + private def readSegmentIndexFileAttributes(segmentIndexFileKey: SegmentIndexFileKey)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[SegmentIndexFileAttributes] = + attributesCache.getOrLoad(segmentIndexFileKey, key => readSegmentIndexFileAttributesImpl(key)) + + private def readSegmentIndexFileAttributesImpl(segmentIndexFileKey: SegmentIndexFileKey)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[SegmentIndexFileAttributes] = + for { + groupVaultPath <- remoteSourceDescriptorService.vaultPathFor(segmentIndexFileKey.attachment) + groupHeaderBytes <- (groupVaultPath / SegmentIndexFileAttributes.FILENAME_ZARR_JSON).readBytes() + segmentIndexFileAttributes <- JsonHelper + .parseAs[SegmentIndexFileAttributes](groupHeaderBytes) + .toFox ?~> "Could not parse meshFile attributes from zarr group file" + } yield segmentIndexFileAttributes + + def readFileMag(segmentIndexFileKey: SegmentIndexFileKey)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[Vec3Int] = + for { + attributes <- readSegmentIndexFileAttributes(segmentIndexFileKey) + } yield attributes.mag + + def readSegmentIndex(segmentIndexFileKey: SegmentIndexFileKey, + segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Vec3Int]] = + for { + attributes <- readSegmentIndexFileAttributes(segmentIndexFileKey) + hashBucketOffsetsArray <- openZarrArray(segmentIndexFileKey, keyHashBucketOffsets) + bucketIndex = attributes.applyHashFunction(segmentId) % attributes.nHashBuckets + bucketRange <- hashBucketOffsetsArray.readAsMultiArray(offset = bucketIndex, shape = 2) + bucketStart <- tryo(bucketRange.getLong(0)).toFox + bucketEnd <- tryo(bucketRange.getLong(1)).toFox + hashBucketExists = bucketEnd - bucketStart != 0 + topLeftsOpt <- Fox.runIf(hashBucketExists)(readTopLefts(segmentIndexFileKey, bucketStart, bucketEnd, segmentId)) + } yield topLeftsOpt.getOrElse(Array.empty) + + private def readTopLefts(segmentIndexFileKey: SegmentIndexFileKey, + bucketStart: Long, + bucketEnd: Long, + segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Vec3Int]] = + for { + attributes <- readSegmentIndexFileAttributes(segmentIndexFileKey) + hashBucketsArray <- openZarrArray(segmentIndexFileKey, keyHashBuckets) + topLeftsArray <- openZarrArray(segmentIndexFileKey, keyTopLefts) + bucket <- hashBucketsArray.readAsMultiArray(offset = Array(bucketStart, 0), + shape = Array((bucketEnd - bucketStart + 1).toInt, 3)) + bucketLocalOffset <- findLocalOffsetInBucket(bucket, segmentId).toFox ?~> s"SegmentId $segmentId not in bucket list" + topLeftOpts <- Fox.runIf(bucketLocalOffset >= 0)(for { + topLeftStart <- tryo(bucket.getLong(bucket.getIndex.set(Array(bucketLocalOffset, 1)))).toFox + topLeftEnd <- tryo(bucket.getLong(bucket.getIndex.set(Array(bucketLocalOffset, 2)))).toFox + topLeftCount = (topLeftEnd - topLeftStart).toInt + _ <- Fox + .fromBool(attributes.dtypeBucketEntries == "uint16") ?~> "value for dtype_bucket_entries in segment index file is not supported, only uint16 is supported" + topLeftsMA <- topLeftsArray.readAsMultiArray(offset = Array(topLeftStart, 0), shape = Array(topLeftCount, 3)) + topLefts <- tryo((0 until topLeftCount).map { idx => + Vec3Int( + topLeftsMA.getInt(topLeftsMA.getIndex.set(Array(idx, 0))), + topLeftsMA.getInt(topLeftsMA.getIndex.set(Array(idx, 1))), + topLeftsMA.getInt(topLeftsMA.getIndex.set(Array(idx, 2))) + ) + }.toArray).toFox + } yield topLefts) + } yield topLeftOpts.getOrElse(Array.empty) + + private def findLocalOffsetInBucket(bucket: MultiArray, segmentId: Long): Option[Int] = + (0 until bucket.getShape()(0)).find(idx => bucket.getLong(bucket.getIndex.set(Array(idx, 0))) == segmentId) + + /* + + private def readTopLefts(segmentIndex: CachedHdf5File, bucketStart: Long, bucketEnd: Long, segmentId: Long)( + implicit ec: ExecutionContext): Fox[Option[Array[Array[Short]]]] = + for { + _ <- Fox.successful(()) + buckets = segmentIndex.uint64Reader.readMatrixBlockWithOffset("hash_buckets", + (bucketEnd - bucketStart + 1).toInt, + 3, + bucketStart, + 0) + bucketLocalOffset = buckets.map(_(0)).indexOf(segmentId) + topLeftOpts <- Fox.runIf(bucketLocalOffset >= 0)(for { + _ <- Fox.successful(()) + topLeftStart = buckets(bucketLocalOffset)(1) + topLeftEnd = buckets(bucketLocalOffset)(2) + bucketEntriesDtype <- tryo(segmentIndex.stringReader.getAttr("/", "dtype_bucket_entries")).toFox + _ <- Fox + .fromBool(bucketEntriesDtype == "uint16") ?~> "value for dtype_bucket_entries in segment index file is not supported, only uint16 is supported" + topLefts = segmentIndex.uint16Reader.readMatrixBlockWithOffset("top_lefts", + (topLeftEnd - topLeftStart).toInt, + 3, + topLeftStart, + 0) + } yield topLefts) + } yield topLeftOpts + */ + + private def openZarrArray(segmentIndexFileKey: SegmentIndexFileKey, + zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext): Fox[DatasetArray] = + openArraysCache.getOrLoad((segmentIndexFileKey, zarrArrayName), + _ => openZarrArrayImpl(segmentIndexFileKey, zarrArrayName)) - def readSegmentIndex(segmentIndexFileKey: SegmentIndexFileKey, segmentId: Long): Fox[Array[Vec3Int]] = ??? + private def openZarrArrayImpl(segmentIndexFileKey: SegmentIndexFileKey, zarrArrayName: String)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[DatasetArray] = + for { + groupVaultPath <- remoteSourceDescriptorService.vaultPathFor(segmentIndexFileKey.attachment) + zarrArray <- Zarr3Array.open(groupVaultPath / zarrArrayName, + DataSourceId("dummy", "unused"), + "layer", + None, + None, + None, + chunkCacheService.sharedChunkContentsCache) + } yield zarrArray - def readFileMag(segmentIndexFileKey: SegmentIndexFileKey): Fox[Vec3Int] = ??? } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala index 50760c2872d..4ce66f62f91 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala @@ -13,7 +13,7 @@ import com.scalableminds.util.cache.LRUConcurrentCache import com.scalableminds.webknossos.datastore.dataformats.SafeCachable import com.scalableminds.webknossos.datastore.models.datasource.LayerAttachment import net.liftweb.common.{Box, Failure, Full} -import com.scalableminds.webknossos.datastore.services.Hdf5HashedArrayUtils +import com.scalableminds.webknossos.datastore.services.ArrayArtifactHashing import com.typesafe.scalalogging.LazyLogging import net.liftweb.common.Box.tryo @@ -23,7 +23,7 @@ import scala.util.Using class CachedHdf5File(reader: IHDF5Reader) extends SafeCachable with AutoCloseable - with Hdf5HashedArrayUtils + with ArrayArtifactHashing with LazyLogging { override protected def onFinalize(): Unit = reader.close() From b1f58318c05031fce2e055dfc6a5d30dc6e5ab84 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 24 Jun 2025 15:16:45 +0200 Subject: [PATCH 070/100] Correctly read segment index file as mag1 segment positions --- .../controllers/DataSourceController.scala | 14 +++---- .../datastore/helpers/SegmentStatistics.scala | 6 ++- .../Hdf5SegmentIndexFileService.scala | 6 --- .../SegmentIndexFileService.scala | 42 ++++++++----------- .../ZarrSegmentIndexFileService.scala | 42 +++---------------- .../TSRemoteDatastoreClient.scala | 19 --------- 6 files changed, 31 insertions(+), 98 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 3de7263e959..f73a69b76ba 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -627,13 +627,11 @@ class DataSourceController @Inject()( mappingNameForMeshFile = None, omitMissing = false ) - fileMag <- segmentIndexFileService.readFileMag(segmentIndexFileKey) topLeftsNested: Seq[Array[Vec3Int]] <- Fox.serialCombined(segmentIds)(sId => segmentIndexFileService.readSegmentIndex(segmentIndexFileKey, sId)) topLefts: Array[Vec3Int] = topLeftsNested.toArray.flatten - bucketPositions = segmentIndexFileService.topLeftsToDistinctBucketPositions(topLefts, - request.body.mag, - fileMag) + bucketPositions = segmentIndexFileService.topLeftsToDistinctTargetMagBucketPositions(topLefts, + request.body.mag) bucketPositionsForCubeSize = bucketPositions .map(_.scale(DataLayer.bucketLength)) // bucket positions raw are indices of 32³ buckets .map(_ / request.body.cubeSize) @@ -645,7 +643,7 @@ class DataSourceController @Inject()( /** * Query the segment index file for multiple segments - * @return List of bucketPositions as indices of 32³ buckets + * @return List of bucketPositions as indices of 32³ buckets (in target mag) */ def querySegmentIndex(organizationId: String, datasetDirectoryName: String, @@ -669,13 +667,11 @@ class DataSourceController @Inject()( mappingNameForMeshFile = None, omitMissing = true // assume agglomerate ids not present in the mapping belong to user-brushed segments ) - fileMag <- segmentIndexFileService.readFileMag(segmentIndexFileKey) topLeftsNested: Seq[Array[Vec3Int]] <- Fox.serialCombined(segmentIds)(sId => segmentIndexFileService.readSegmentIndex(segmentIndexFileKey, sId)) topLefts: Array[Vec3Int] = topLeftsNested.toArray.flatten - bucketPositions = segmentIndexFileService.topLeftsToDistinctBucketPositions(topLefts, - request.body.mag, - fileMag) + bucketPositions = segmentIndexFileService.topLeftsToDistinctTargetMagBucketPositions(topLefts, + request.body.mag) } yield SegmentIndexData(segmentOrAgglomerateId, bucketPositions.toSeq) } } yield Ok(Json.toJson(segmentIdsAndBucketPositions)) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/SegmentStatistics.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/SegmentStatistics.scala index 8a2d1dce1fc..bdd67dc41c6 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/SegmentStatistics.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/SegmentStatistics.scala @@ -35,8 +35,10 @@ trait SegmentStatistics extends ProtoGeometryImplicits with FoxImplicits { implicit ec: ExecutionContext): Fox[Long] = for { bucketPositionsProtos: Set[Vec3IntProto] <- getBucketPositions(segmentId, mag) - bucketPositionsInMag = bucketPositionsProtos.map(vec3IntFromProto) - (bucketBoxes, elementClass) <- getDataForBucketPositions(bucketPositionsInMag.toSeq, mag, additionalCoordinates) + bucketPositionsInRequestedMag = bucketPositionsProtos.map(vec3IntFromProto) + (bucketBoxes, elementClass) <- getDataForBucketPositions(bucketPositionsInRequestedMag.toSeq, + mag, + additionalCoordinates) counts <- Fox.serialCombined(bucketBoxes.toList) { case Full(bucketBytes) => tryo( diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/Hdf5SegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/Hdf5SegmentIndexFileService.scala index 32a2d32991d..5de88db36d7 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/Hdf5SegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/Hdf5SegmentIndexFileService.scala @@ -31,12 +31,6 @@ class Hdf5SegmentIndexFileService @Inject()() extends FoxImplicits { case None => Array.empty } - def readFileMag(segmentIndexFileKey: SegmentIndexFileKey)(implicit ec: ExecutionContext): Fox[Vec3Int] = - for { - segmentIndex <- fileHandleCache.getCachedHdf5File(segmentIndexFileKey.attachment)(CachedHdf5File.fromPath).toFox - mag <- Vec3Int.fromArray(segmentIndex.uint64Reader.getArrayAttr("/", "mag").map(_.toInt)).toFox - } yield mag - private def readTopLefts(segmentIndex: CachedHdf5File, bucketStart: Long, bucketEnd: Long, segmentId: Long)( implicit ec: ExecutionContext): Fox[Option[Array[Array[Short]]]] = for { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala index 068bff35ff2..b5b935a8083 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala @@ -97,6 +97,7 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, /** * Read the segment index file and return the bucket positions for the given segment id. * The bucket positions are the top left corners of the buckets that contain the segment in the file mag. + * The bucket positions are in mag1 coordinates though! */ def readSegmentIndex(segmentIndexFileKey: SegmentIndexFileKey, segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Vec3Int]] = @@ -108,22 +109,10 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, case _ => unsupportedDataFormat(segmentIndexFileKey) } - def readFileMag(segmentIndexFileKey: SegmentIndexFileKey)(implicit ec: ExecutionContext, - tc: TokenContext): Fox[Vec3Int] = - segmentIndexFileKey.attachment.dataFormat match { - case LayerAttachmentDataformat.zarr3 => - zarrSegmentIndexFileService.readFileMag(segmentIndexFileKey: SegmentIndexFileKey) - case LayerAttachmentDataformat.hdf5 => - hdf5SegmentIndexFileService.readFileMag(segmentIndexFileKey: SegmentIndexFileKey) - case _ => unsupportedDataFormat(segmentIndexFileKey) - } - - def topLeftsToDistinctBucketPositions(topLefts: Array[Vec3Int], - targetMag: Vec3Int, - fileMag: Vec3Int): Array[Vec3Int] = + def topLeftsToDistinctTargetMagBucketPositions(topLefts: Array[Vec3Int], targetMag: Vec3Int): Array[Vec3Int] = topLefts .map(_.scale(DataLayer.bucketLength)) // map indices to positions - .map(_ / (targetMag / fileMag)) + .map(_ / targetMag) .map(_ / Vec3Int.full(DataLayer.bucketLength)) // map positions to cube indices .distinct @@ -158,13 +147,15 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, private def getDataForBucketPositions(dataSourceId: DataSourceId, dataLayer: DataLayer, agglomerateFileKeyOpt: Option[AgglomerateFileKey])( - bucketPositions: Seq[Vec3Int], - mag: Vec3Int, + bucketPositionsInRequestedMag: Seq[Vec3Int], + requestedMag: Vec3Int, additionalCoordinates: Option[Seq[AdditionalCoordinate]])( implicit ec: ExecutionContext, tc: TokenContext): Fox[(Seq[Box[Array[Byte]]], ElementClass.Value)] = { // Additional coordinates parameter ignored, see #7556 - val mag1BucketPositions = bucketPositions.map(_ * mag) + + val mag1BucketPositions = bucketPositionsInRequestedMag.map(_ * requestedMag) + val bucketRequests = mag1BucketPositions.map( mag1BucketPosition => DataServiceDataRequest( @@ -174,7 +165,7 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, VoxelPosition(mag1BucketPosition.x * DataLayer.bucketLength, mag1BucketPosition.y * DataLayer.bucketLength, mag1BucketPosition.z * DataLayer.bucketLength, - mag), + requestedMag), DataLayer.bucketLength, DataLayer.bucketLength, DataLayer.bucketLength @@ -189,25 +180,26 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, } yield (bucketData, dataLayer.elementClass) } + // Reads bucket positions froms egment index file. Returns target-mag bucket positions + // (even though the file stores mag1 bucket positions) private def getBucketPositions(segmentIndexFileKey: SegmentIndexFileKey, agglomerateFileKeyOpt: Option[AgglomerateFileKey])( segmentOrAgglomerateId: Long, - mag: Vec3Int)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Set[Vec3IntProto]] = + requestedMag: Vec3Int)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Set[Vec3IntProto]] = for { segmentIds <- getSegmentIdsForAgglomerateIdIfNeeded(agglomerateFileKeyOpt, segmentOrAgglomerateId) positionsPerSegment <- Fox.serialCombined(segmentIds)(segmentId => - getBucketPositions(segmentIndexFileKey, segmentId, mag)) + getBucketPositions(segmentIndexFileKey, segmentId, requestedMag)) positionsCollected = positionsPerSegment.flatten.toSet.map(vec3IntToProto) } yield positionsCollected - private def getBucketPositions(segmentIndexFileKey: SegmentIndexFileKey, segmentId: Long, mag: Vec3Int)( + private def getBucketPositions(segmentIndexFileKey: SegmentIndexFileKey, segmentId: Long, requestedMag: Vec3Int)( implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Vec3Int]] = for { - fileMag <- readFileMag(segmentIndexFileKey) - bucketPositionsInFileMag <- readSegmentIndex(segmentIndexFileKey, segmentId) - bucketPositions = bucketPositionsInFileMag.map(_ / (mag / fileMag)) - } yield bucketPositions + mag1BucketPositions <- readSegmentIndex(segmentIndexFileKey, segmentId) + bucketPositionsInRequestedMag = mag1BucketPositions.map(_ / requestedMag) + } yield bucketPositionsInRequestedMag private def getSegmentIdsForAgglomerateIdIfNeeded( agglomerateFileKeyOpt: Option[AgglomerateFileKey], diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala index 29b5c5c2ce3..c42e9676a92 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala @@ -58,6 +58,8 @@ class ZarrSegmentIndexFileService @Inject()(remoteSourceDescriptorService: Remot chunkCacheService: ChunkCacheService) extends FoxImplicits { + // TODO clear caches + private val keyHashBucketOffsets = "hash_bucket_offsets" private val keyHashBuckets = "hash_buckets" private val keyTopLefts = "top_lefts" @@ -81,12 +83,6 @@ class ZarrSegmentIndexFileService @Inject()(remoteSourceDescriptorService: Remot .toFox ?~> "Could not parse meshFile attributes from zarr group file" } yield segmentIndexFileAttributes - def readFileMag(segmentIndexFileKey: SegmentIndexFileKey)(implicit ec: ExecutionContext, - tc: TokenContext): Fox[Vec3Int] = - for { - attributes <- readSegmentIndexFileAttributes(segmentIndexFileKey) - } yield attributes.mag - def readSegmentIndex(segmentIndexFileKey: SegmentIndexFileKey, segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[Vec3Int]] = for { @@ -120,9 +116,9 @@ class ZarrSegmentIndexFileService @Inject()(remoteSourceDescriptorService: Remot topLeftsMA <- topLeftsArray.readAsMultiArray(offset = Array(topLeftStart, 0), shape = Array(topLeftCount, 3)) topLefts <- tryo((0 until topLeftCount).map { idx => Vec3Int( - topLeftsMA.getInt(topLeftsMA.getIndex.set(Array(idx, 0))), - topLeftsMA.getInt(topLeftsMA.getIndex.set(Array(idx, 1))), - topLeftsMA.getInt(topLeftsMA.getIndex.set(Array(idx, 2))) + topLeftsMA.getShort(topLeftsMA.getIndex.set(Array(idx, 0))), + topLeftsMA.getShort(topLeftsMA.getIndex.set(Array(idx, 1))), + topLeftsMA.getShort(topLeftsMA.getIndex.set(Array(idx, 2))) ) }.toArray).toFox } yield topLefts) @@ -131,34 +127,6 @@ class ZarrSegmentIndexFileService @Inject()(remoteSourceDescriptorService: Remot private def findLocalOffsetInBucket(bucket: MultiArray, segmentId: Long): Option[Int] = (0 until bucket.getShape()(0)).find(idx => bucket.getLong(bucket.getIndex.set(Array(idx, 0))) == segmentId) - /* - - private def readTopLefts(segmentIndex: CachedHdf5File, bucketStart: Long, bucketEnd: Long, segmentId: Long)( - implicit ec: ExecutionContext): Fox[Option[Array[Array[Short]]]] = - for { - _ <- Fox.successful(()) - buckets = segmentIndex.uint64Reader.readMatrixBlockWithOffset("hash_buckets", - (bucketEnd - bucketStart + 1).toInt, - 3, - bucketStart, - 0) - bucketLocalOffset = buckets.map(_(0)).indexOf(segmentId) - topLeftOpts <- Fox.runIf(bucketLocalOffset >= 0)(for { - _ <- Fox.successful(()) - topLeftStart = buckets(bucketLocalOffset)(1) - topLeftEnd = buckets(bucketLocalOffset)(2) - bucketEntriesDtype <- tryo(segmentIndex.stringReader.getAttr("/", "dtype_bucket_entries")).toFox - _ <- Fox - .fromBool(bucketEntriesDtype == "uint16") ?~> "value for dtype_bucket_entries in segment index file is not supported, only uint16 is supported" - topLefts = segmentIndex.uint16Reader.readMatrixBlockWithOffset("top_lefts", - (topLeftEnd - topLeftStart).toInt, - 3, - topLeftStart, - 0) - } yield topLefts) - } yield topLeftOpts - */ - private def openZarrArray(segmentIndexFileKey: SegmentIndexFileKey, zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext): Fox[DatasetArray] = openArraysCache.getOrLoad((segmentIndexFileKey, zarrArrayName), diff --git a/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/TSRemoteDatastoreClient.scala b/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/TSRemoteDatastoreClient.scala index 20eee18c328..087c44c0412 100644 --- a/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/TSRemoteDatastoreClient.scala +++ b/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/TSRemoteDatastoreClient.scala @@ -116,25 +116,6 @@ class TSRemoteDatastoreClient @Inject()( hasIndexFile <- rpc(s"$remoteLayerUri/hasSegmentIndex").withTokenFromContext.silent.getWithJsonResponse[Boolean] } yield hasIndexFile - def querySegmentIndex(remoteFallbackLayer: RemoteFallbackLayer, - segmentId: Long, - mag: Vec3Int, - mappingName: Option[String], // should be the baseMappingName in case of editable mappings - editableMappingTracingId: Option[String])(implicit tc: TokenContext): Fox[Seq[Vec3Int]] = - for { - remoteLayerUri <- getRemoteLayerUri(remoteFallbackLayer) - positions <- rpc(s"$remoteLayerUri/segmentIndex/$segmentId").withTokenFromContext.silent - .postJsonWithJsonResponse[GetSegmentIndexParameters, Seq[Vec3Int]](GetSegmentIndexParameters( - mag, - cubeSize = Vec3Int.ones, // Don't use the cubeSize parameter here (since we want to calculate indices later anyway) - additionalCoordinates = None, - mappingName = mappingName, - editableMappingTracingId = editableMappingTracingId - )) - - indices = positions.map(_.scale(1f / DataLayer.bucketLength)) // Route returns positions to use the same interface as tracing store, we want indices - } yield indices - def querySegmentIndexForMultipleSegments( remoteFallbackLayer: RemoteFallbackLayer, segmentIds: Seq[Long], From d34fe29c4f8dfdb03b8f2237febfcea98ba75184 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 24 Jun 2025 15:29:09 +0200 Subject: [PATCH 071/100] unused imports --- .../webknossos/tracingstore/TSRemoteDatastoreClient.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/TSRemoteDatastoreClient.scala b/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/TSRemoteDatastoreClient.scala index 087c44c0412..73d7cdd36fc 100644 --- a/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/TSRemoteDatastoreClient.scala +++ b/webknossos-tracingstore/app/com/scalableminds/webknossos/tracingstore/TSRemoteDatastoreClient.scala @@ -11,12 +11,10 @@ import com.scalableminds.webknossos.datastore.ListOfLong.ListOfLong import com.scalableminds.webknossos.datastore.geometry.Vec3IntProto import com.scalableminds.webknossos.datastore.helpers.{ GetMultipleSegmentIndexParameters, - GetSegmentIndexParameters, MissingBucketHeaders, ProtoGeometryImplicits, SegmentIndexData } -import com.scalableminds.webknossos.datastore.models.datasource.DataLayer import com.scalableminds.webknossos.datastore.models.{VoxelSize, WebknossosDataRequest} import com.scalableminds.webknossos.datastore.models.datasource.inbox.InboxDataSource import com.scalableminds.webknossos.datastore.rpc.RPC From 0eb73d2a119650d3b32ee2bf45656e868e9ceda5 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 25 Jun 2025 09:40:03 +0200 Subject: [PATCH 072/100] implement pr feedback --- conf/messages | 5 +++- .../explore/PrecomputedExplorer.scala | 26 +++++++++++-------- .../services/mesh/DSFullMeshService.scala | 2 +- .../services/mesh/MeshFileService.scala | 22 ++++++++++------ 4 files changed, 34 insertions(+), 21 deletions(-) diff --git a/conf/messages b/conf/messages index ebea2c69404..873aeec8038 100644 --- a/conf/messages +++ b/conf/messages @@ -268,6 +268,7 @@ mesh.file.readEncoding.failed=Failed to read encoding from mesh file mesh.file.lookup.failed=Failed to look up mesh file “{0}” mesh.file.readVersion.failed=Failed to read format version from file “{0}” mesh.file.readMappingName.failed=Failed to read mapping name from mesh file “{0}” +mesh.meshFileName.required=Trying to load mesh from mesh file, but mesh file name was not supplied. task.create.noTasks=Zero tasks were requested task.create.failed=Failed to create Task @@ -350,7 +351,9 @@ job.trainModel.notAllowed.organization = Training AI models is only allowed for job.runInference.notAllowed.organization = Running inference is only allowed for datasets of your own organization. job.paidJob.notAllowed.noPaidPlan = You are not allowed to run this job because your organization does not have a paid plan. job.notEnoughCredits = Your organization does not have enough WEBKNOSSOS credits to run this job. -creditTransaction.notPaidPlan = Your organization does not have a paid plan. +job.updateStatus.failed = Failed to update long-running job’s status +job.creditTransaction.failed = Failed to perform credit transaction +job.creditTransaction.refund.failed = Failed to perform credit transaction refund voxelytics.disabled = Voxelytics workflow reporting and logging are not enabled for this WEBKNOSSOS instance. voxelytics.runNotFound = Workflow runs not found diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala index c36754f45cc..7bcd3d7f0f4 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/explore/PrecomputedExplorer.scala @@ -91,15 +91,19 @@ class PrecomputedExplorer(implicit val ec: ExecutionContext) extends RemoteLayer } private def exploreMeshesForLayer(meshPath: VaultPath, credentialId: Option[String])( - implicit tc: TokenContext): Fox[Seq[LayerAttachment]] = - (for { - meshInfo <- (meshPath / NeuroglancerMesh.FILENAME_INFO) - .parseAsJson[NeuroglancerPrecomputedMeshInfo] ?~> "Failed to read mesh info" - _ <- Fox.fromBool(meshInfo.transform.length == 12) ?~> "Invalid mesh info: transform has to be of length 12" - } yield - Seq( - LayerAttachment(NeuroglancerMesh.meshName, - meshPath.toUri, - LayerAttachmentDataformat.neuroglancerPrecomputed, - credentialId))).orElse(Fox.successful(Seq.empty)) + implicit tc: TokenContext): Fox[Seq[LayerAttachment]] = { + val exploredMeshesFox = + for { + meshInfo <- (meshPath / NeuroglancerMesh.FILENAME_INFO) + .parseAsJson[NeuroglancerPrecomputedMeshInfo] ?~> "Failed to read mesh info" + _ <- Fox.fromBool(meshInfo.transform.length == 12) ?~> "Invalid mesh info: transform has to be of length 12" + } yield + Seq( + LayerAttachment(NeuroglancerMesh.meshName, + meshPath.toUri, + LayerAttachmentDataformat.neuroglancerPrecomputed, + credentialId)) + // If mesh exploration at this path failed, continue but return no meshes. + exploredMeshesFox.orElse(Fox.successful(Seq.empty)) + } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala index bba11a88e97..df9fcf2cf7f 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala @@ -129,7 +129,7 @@ class DSFullMeshService @Inject()(dataSourceRepository: DataSourceRepository, (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) - meshFileName <- fullMeshRequest.meshFileName.toFox ?~> "meshFileName.required" + meshFileName <- fullMeshRequest.meshFileName.toFox ?~> "mesh.meshFileName.required" meshFileKey <- meshFileService.lookUpMeshFileKey(dataSource.id, dataLayer, meshFileName) mappingNameForMeshFile <- meshFileService.mappingNameForMeshFile(meshFileKey) segmentIds <- segmentIdsForAgglomerateIdIfNeeded( diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala index 8308e3c1899..bb3e55ba4cb 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala @@ -12,7 +12,7 @@ import com.scalableminds.webknossos.datastore.models.datasource.{ LayerAttachmentDataformat } import com.scalableminds.webknossos.datastore.services.Hdf5HashedArrayUtils -import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService +import com.scalableminds.webknossos.datastore.storage.{AgglomerateFileKey, RemoteSourceDescriptorService} import net.liftweb.common.Box.tryo import net.liftweb.common.Box import org.apache.commons.io.FilenameUtils @@ -97,17 +97,17 @@ class MeshFileService @Inject()(config: DataStoreConfig, path = remoteSourceDescriptorService.uriFromPathLiteral(attachment.path.toString, localDatasetDir, dataLayer.name)) }) + localFallbackAttachment = LayerAttachment( + meshFileName, + localDatasetDir.resolve(dataLayer.name).resolve(meshesDir).toUri, + LayerAttachmentDataformat.hdf5 + ) + selectedAttachment = registeredAttachmentNormalized.getOrElse(localFallbackAttachment) } yield MeshFileKey( dataSourceId, dataLayer.name, - registeredAttachmentNormalized.getOrElse( - LayerAttachment( - meshFileName, - localDatasetDir.resolve(dataLayer.name).resolve(meshesDir).toUri, - LayerAttachmentDataformat.hdf5 - ) - ) + selectedAttachment ) } @@ -155,6 +155,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, hdf5MeshFileService.mappingNameForMeshFile(meshFileKey).toFox case LayerAttachmentDataformat.neuroglancerPrecomputed => Fox.successful(None) + case _ => unsupportedDataFormat(meshFileKey) } private def versionForMeshFile(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = @@ -165,6 +166,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, Fox.successful(hdf5MeshFileService.versionForMeshFile(meshFileKey)) case LayerAttachmentDataformat.neuroglancerPrecomputed => Fox.successful(NeuroglancerMesh.meshInfoVersion) + case _ => unsupportedDataFormat(meshFileKey) } def getVertexQuantizationBits(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Int] = @@ -185,6 +187,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, hdf5MeshFileService.listMeshChunksForMultipleSegments(meshFileKey, segmentIds) case LayerAttachmentDataformat.neuroglancerPrecomputed => neuroglancerPrecomputedMeshService.listMeshChunksForMultipleSegments(meshFileKey, segmentIds) + case _ => unsupportedDataFormat(meshFileKey) } def readMeshChunk(meshFileKey: MeshFileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest], @@ -194,6 +197,7 @@ class MeshFileService @Inject()(config: DataStoreConfig, case LayerAttachmentDataformat.hdf5 => hdf5MeshFileService.readMeshChunk(meshFileKey, meshChunkDataRequests).toFox case LayerAttachmentDataformat.neuroglancerPrecomputed => neuroglancerPrecomputedMeshService.readMeshChunk(meshFileKey, meshChunkDataRequests) + case _ => unsupportedDataFormat(meshFileKey) } def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = { @@ -211,4 +215,6 @@ class MeshFileService @Inject()(config: DataStoreConfig, clearedHdf5Count + clearedZarrCount + clearedNeuroglancerCount } + private def unsupportedDataFormat(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext) = + Fox.failure(s"Trying to load mesh file with unsupported data format ${meshFileKey.attachment.dataFormat}") } From 9217eb5b2639b1a07960f4b0dd7bc237a80a8ff5 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 25 Jun 2025 09:52:32 +0200 Subject: [PATCH 073/100] unused import --- .../webknossos/datastore/services/mesh/MeshFileService.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala index bb3e55ba4cb..309ec8427a6 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala @@ -12,7 +12,7 @@ import com.scalableminds.webknossos.datastore.models.datasource.{ LayerAttachmentDataformat } import com.scalableminds.webknossos.datastore.services.Hdf5HashedArrayUtils -import com.scalableminds.webknossos.datastore.storage.{AgglomerateFileKey, RemoteSourceDescriptorService} +import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService import net.liftweb.common.Box.tryo import net.liftweb.common.Box import org.apache.commons.io.FilenameUtils From fd932e68b3bd7ff42520bb3ec0c64b64e9664579 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 25 Jun 2025 10:15:55 +0200 Subject: [PATCH 074/100] same cleanup also when looking up agglomerates --- .../datastore/services/AgglomerateService.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index abf55751ce1..9298c67f393 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -96,17 +96,17 @@ class AgglomerateService @Inject()(config: DataStoreConfig, path = remoteSourceDescriptorService.uriFromPathLiteral(attachment.path.toString, localDatasetDir, dataLayer.name)) }) + localFallbackAttachment = LayerAttachment( + mappingName, + localDatasetDir.resolve(dataLayer.name).resolve(agglomerateDir).toUri, + LayerAttachmentDataformat.hdf5 + ) + selectedAttachment = registeredAttachmentNormalized.getOrElse(localFallbackAttachment) } yield AgglomerateFileKey( dataSourceId, dataLayer.name, - registeredAttachmentNormalized.getOrElse( - LayerAttachment( - mappingName, - localDatasetDir.resolve(dataLayer.name).resolve(agglomerateDir).toUri, - LayerAttachmentDataformat.hdf5 - ) - ) + selectedAttachment ) } From 05f477d2f32377d7d4debb039d774fb18cff554e Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 25 Jun 2025 11:15:16 +0200 Subject: [PATCH 075/100] add cache clear for segment index files --- .../webknossos/datastore/DataStoreModule.scala | 8 ++++++++ .../controllers/DataSourceController.scala | 7 +++++-- .../services/mesh/Hdf5MeshFileService.scala | 15 ++++++++------- .../Hdf5SegmentIndexFileService.scala | 14 +++++++++++++- .../segmentindex/SegmentIndexFileService.scala | 13 +++++++++++++ .../ZarrSegmentIndexFileService.scala | 12 ++++++++++-- 6 files changed, 57 insertions(+), 12 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala index a38bed41991..85711bbd246 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala @@ -11,6 +11,11 @@ import com.scalableminds.webknossos.datastore.services.mesh.{ NeuroglancerPrecomputedMeshFileService, ZarrMeshFileService } +import com.scalableminds.webknossos.datastore.services.segmentindex.{ + Hdf5SegmentIndexFileService, + SegmentIndexFileService, + ZarrSegmentIndexFileService +} import com.scalableminds.webknossos.datastore.services.uploading.UploadService import com.scalableminds.webknossos.datastore.storage.{DataVaultService, RemoteSourceDescriptorService} @@ -38,6 +43,9 @@ class DataStoreModule extends AbstractModule { bind(classOf[AgglomerateService]).asEagerSingleton() bind(classOf[ZarrAgglomerateService]).asEagerSingleton() bind(classOf[Hdf5AgglomerateService]).asEagerSingleton() + bind(classOf[SegmentIndexFileService]).asEagerSingleton() + bind(classOf[ZarrSegmentIndexFileService]).asEagerSingleton() + bind(classOf[Hdf5SegmentIndexFileService]).asEagerSingleton() bind(classOf[NeuroglancerPrecomputedMeshFileService]).asEagerSingleton() bind(classOf[RemoteSourceDescriptorService]).asEagerSingleton() bind(classOf[ChunkCacheService]).asEagerSingleton() diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 6ce11764c00..3ec909ed7fc 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -444,10 +444,13 @@ class DataSourceController @Inject()( private def clearCachesOfDataSource(organizationId: String, datasetDirectoryName: String, layerName: Option[String]): InboxDataSource = { + val dataSourceId = DataSourceId(datasetDirectoryName, organizationId) val (closedAgglomerateFileHandleCount, clearedBucketProviderCount, removedChunksCount) = binaryDataServiceHolder.binaryDataService.clearCache(organizationId, datasetDirectoryName, layerName) val closedMeshFileHandleCount = - meshFileService.clearCache(DataSourceId(organizationId, datasetDirectoryName), layerName) + meshFileService.clearCache(dataSourceId, layerName) + val closedSegmentIndexFileHandleCount = + segmentIndexFileService.clearCache(dataSourceId, layerName) val reloadedDataSource: InboxDataSource = dataSourceService.dataSourceFromDir( dataSourceService.dataBaseDir.resolve(organizationId).resolve(datasetDirectoryName), organizationId) @@ -455,7 +458,7 @@ class DataSourceController @Inject()( val clearedVaultCacheEntriesOpt = dataSourceService.invalidateVaultCache(reloadedDataSource, layerName) clearedVaultCacheEntriesOpt.foreach { clearedVaultCacheEntries => logger.info( - s"Cleared caches for ${layerName.map(l => s"layer '$l' of ").getOrElse("")}dataset $organizationId/$datasetDirectoryName: closed $closedAgglomerateFileHandleCount agglomerate file handles and $closedMeshFileHandleCount mesh file handles, removed $clearedBucketProviderCount bucketProviders, $clearedVaultCacheEntries vault cache entries and $removedChunksCount image chunk cache entries.") + s"Cleared caches for ${layerName.map(l => s"layer '$l' of ").getOrElse("")}dataset $organizationId/$datasetDirectoryName: closed $closedAgglomerateFileHandleCount agglomerate file handles, $closedMeshFileHandleCount mesh file handles, $closedSegmentIndexFileHandleCount segment index file handles, removed $clearedBucketProviderCount bucketProviders, $clearedVaultCacheEntries vault cache entries and $removedChunksCount image chunk cache entries.") } reloadedDataSource } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala index 31cc69222ec..93fd8910149 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala @@ -16,10 +16,10 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) - private lazy val meshFileCache = new Hdf5FileCache(30) + private lazy val fileHandleCache = new Hdf5FileCache(30) def mappingNameForMeshFile(meshFileKey: MeshFileKey): Box[Option[String]] = tryo { - meshFileCache + fileHandleCache .withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => cachedMeshFile.mappingName } @@ -30,7 +30,7 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance } private def readMeshFileMetadata(meshFileKey: MeshFileKey): Box[(String, Double, Array[Array[Double]])] = - meshFileCache.withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => + fileHandleCache.withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => val lodScaleMultiplier = cachedMeshFile.float64Reader.getAttr("/", "lod_scale_multiplier") val transform = cachedMeshFile.float64Reader.getMatrixAttr("/", "transform") (cachedMeshFile.meshFormat, lodScaleMultiplier, transform) @@ -40,7 +40,7 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance segmentIds: Seq[Long], lodScaleMultiplier: Double, transform: Array[Array[Double]]): List[List[MeshLodInfo]] = - meshFileCache + fileHandleCache .withCachedHdf5(meshFileKey.attachment) { cachedMeshFile: CachedHdf5File => segmentIds.toList.flatMap(segmentId => listMeshChunksForSegment(cachedMeshFile, segmentId, lodScaleMultiplier, transform)) @@ -99,7 +99,7 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance lod: Int): Array[Array[Double]] = transform def versionForMeshFile(meshFileKey: MeshFileKey): Long = - meshFileCache + fileHandleCache .withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => cachedMeshFile.artifactSchemaVersion } @@ -109,7 +109,7 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance def readMeshChunk(meshFileKey: MeshFileKey, meshChunkDataRequests: Seq[MeshChunkDataRequest]): Box[(Array[Byte], String)] = for { - resultBox <- meshFileCache.withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => + resultBox <- fileHandleCache.withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => readMeshChunkFromCachedMeshFile(cachedMeshFile, meshChunkDataRequests) } (output, encoding) <- resultBox @@ -153,6 +153,7 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = { val datasetPath = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) val relevantPath = layerNameOpt.map(l => datasetPath.resolve(l)).getOrElse(datasetPath) - meshFileCache.clear(key => key.startsWith(relevantPath.toString)) + fileHandleCache.clear(key => key.startsWith(relevantPath.toString)) } + } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/Hdf5SegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/Hdf5SegmentIndexFileService.scala index db3e9f3be34..531bd6ad3f4 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/Hdf5SegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/Hdf5SegmentIndexFileService.scala @@ -3,12 +3,18 @@ package com.scalableminds.webknossos.datastore.services.segmentindex import com.scalableminds.util.geometry.Vec3Int import com.scalableminds.util.tools.Box.tryo import com.scalableminds.util.tools.{Fox, FoxImplicits} +import com.scalableminds.webknossos.datastore.DataStoreConfig +import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache} +import java.nio.file.Paths import javax.inject.Inject import scala.concurrent.ExecutionContext -class Hdf5SegmentIndexFileService @Inject()() extends FoxImplicits { +class Hdf5SegmentIndexFileService @Inject()(config: DataStoreConfig) extends FoxImplicits { + + private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) + private lazy val fileHandleCache = new Hdf5FileCache(100) def readSegmentIndex(segmentIndexFileKey: SegmentIndexFileKey, segmentId: Long)( @@ -55,4 +61,10 @@ class Hdf5SegmentIndexFileService @Inject()() extends FoxImplicits { 0) } yield topLefts) } yield topLeftOpts + + def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = { + val datasetPath = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) + val relevantPath = layerNameOpt.map(l => datasetPath.resolve(l)).getOrElse(datasetPath) + fileHandleCache.clear(key => key.startsWith(relevantPath.toString)) + } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala index 78d752f0c6f..2029c74962c 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala @@ -219,6 +219,19 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, case None => Fox.successful(List(segmentOrAgglomerateId)) } + def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = { + segmentIndexFileKeyCache.clear { + case (keyDataSourceId, keyLayerName) => + dataSourceId == keyDataSourceId && layerNameOpt.forall(_ == keyLayerName) + } + + val clearedHdf5Count = hdf5SegmentIndexFileService.clearCache(dataSourceId, layerNameOpt) + + val clearedZarrCount = zarrSegmentIndexFileService.clearCache(dataSourceId, layerNameOpt) + + clearedHdf5Count + clearedZarrCount + } + private def unsupportedDataFormat(segmentIndexFileKey: SegmentIndexFileKey)(implicit ec: ExecutionContext) = Fox.failure( s"Trying to load segment index file with unsupported data format ${segmentIndexFileKey.attachment.dataFormat}") diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala index 0b26873f796..187b055f05b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala @@ -58,8 +58,6 @@ class ZarrSegmentIndexFileService @Inject()(remoteSourceDescriptorService: Remot chunkCacheService: ChunkCacheService) extends FoxImplicits { - // TODO clear caches - private val keyHashBucketOffsets = "hash_bucket_offsets" private val keyHashBuckets = "hash_buckets" private val keyTopLefts = "top_lefts" @@ -146,4 +144,14 @@ class ZarrSegmentIndexFileService @Inject()(remoteSourceDescriptorService: Remot chunkCacheService.sharedChunkContentsCache) } yield zarrArray + def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = { + attributesCache.clear { segmentIndexFileKey => + segmentIndexFileKey.dataSourceId == dataSourceId && layerNameOpt.forall(segmentIndexFileKey.layerName == _) + } + + openArraysCache.clear { + case (segmentIndexFileKey, _) => + segmentIndexFileKey.dataSourceId == dataSourceId && layerNameOpt.forall(segmentIndexFileKey.layerName == _) + } + } } From dd0e803639bc5ee156bb60694d6749cfd9f43b92 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 25 Jun 2025 11:23:51 +0200 Subject: [PATCH 076/100] changelog --- unreleased_changes/8711.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 unreleased_changes/8711.md diff --git a/unreleased_changes/8711.md b/unreleased_changes/8711.md new file mode 100644 index 00000000000..115604962a5 --- /dev/null +++ b/unreleased_changes/8711.md @@ -0,0 +1,5 @@ +### Added +- Segment index files can now also be read from the new zarr3-based format, and from remote object storage. + +### Fixed +- Fixed a bug where segment index files for segmentation layers with no mag1 would be read incorrectly. From 5318f068c4d3b60f7a16cdd79830c14dbc3354ea Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 25 Jun 2025 11:34:02 +0200 Subject: [PATCH 077/100] WIP: Read Zarr Connectome Files --- .../datastore/DataStoreModule.scala | 8 ++++++++ .../controllers/DataSourceController.scala | 14 +++----------- .../ConnectomeFileService.scala | 19 ++++++++++--------- .../Hdf5ConnectomeFileService.scala | 7 +++++++ .../ZarrConnectomeFileService.scala | 5 +++++ 5 files changed, 33 insertions(+), 20 deletions(-) rename webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/{ => connectome}/ConnectomeFileService.scala (97%) create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala index 85711bbd246..bf1577f16e5 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala @@ -4,6 +4,11 @@ import org.apache.pekko.actor.ActorSystem import com.google.inject.AbstractModule import com.google.inject.name.Names import com.scalableminds.webknossos.datastore.services._ +import com.scalableminds.webknossos.datastore.services.connectome.{ + ConnectomeFileService, + Hdf5ConnectomeFileService, + ZarrConnectomeFileService +} import com.scalableminds.webknossos.datastore.services.mesh.{ AdHocMeshServiceHolder, Hdf5MeshFileService, @@ -46,6 +51,9 @@ class DataStoreModule extends AbstractModule { bind(classOf[SegmentIndexFileService]).asEagerSingleton() bind(classOf[ZarrSegmentIndexFileService]).asEagerSingleton() bind(classOf[Hdf5SegmentIndexFileService]).asEagerSingleton() + bind(classOf[ConnectomeFileService]).asEagerSingleton() + bind(classOf[ZarrConnectomeFileService]).asEagerSingleton() + bind(classOf[Hdf5ConnectomeFileService]).asEagerSingleton() bind(classOf[NeuroglancerPrecomputedMeshFileService]).asEagerSingleton() bind(classOf[RemoteSourceDescriptorService]).asEagerSingleton() bind(classOf[ChunkCacheService]).asEagerSingleton() diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 3ec909ed7fc..ac44c0d390c 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -5,17 +5,8 @@ import com.scalableminds.util.geometry.Vec3Int import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.Fox import com.scalableminds.webknossos.datastore.ListOfLong.ListOfLong -import com.scalableminds.webknossos.datastore.explore.{ - ExploreRemoteDatasetRequest, - ExploreRemoteDatasetResponse, - ExploreRemoteLayerService -} -import com.scalableminds.webknossos.datastore.helpers.{ - GetMultipleSegmentIndexParameters, - GetSegmentIndexParameters, - SegmentIndexData, - SegmentStatisticsParameters -} +import com.scalableminds.webknossos.datastore.explore.{ExploreRemoteDatasetRequest, ExploreRemoteDatasetResponse, ExploreRemoteLayerService} +import com.scalableminds.webknossos.datastore.helpers.{GetMultipleSegmentIndexParameters, GetSegmentIndexParameters, SegmentIndexData, SegmentStatisticsParameters} import com.scalableminds.webknossos.datastore.models.datasource.inbox.InboxDataSource import com.scalableminds.webknossos.datastore.models.datasource.{DataLayer, DataSource, DataSourceId, GenericDataSource} import com.scalableminds.webknossos.datastore.services._ @@ -25,6 +16,7 @@ import com.scalableminds.webknossos.datastore.services.uploading._ import com.scalableminds.webknossos.datastore.storage.DataVaultService import com.scalableminds.util.tools.Box.tryo import com.scalableminds.util.tools.{Box, Empty, Failure, Full} +import com.scalableminds.webknossos.datastore.services.connectome.{ByAgglomerateIdsRequest, BySynapseIdsRequest, ConnectomeFileNameWithMappingName, ConnectomeFileService} import play.api.data.Form import play.api.data.Forms.{longNumber, nonEmptyText, number, tuple} import play.api.i18n.Messages diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala similarity index 97% rename from webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ConnectomeFileService.scala rename to webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala index 3a24c199568..dfdc7f81521 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala @@ -1,19 +1,17 @@ -package com.scalableminds.webknossos.datastore.services +package com.scalableminds.webknossos.datastore.services.connectome -import java.io.File -import java.nio.file.{Path, Paths} import com.scalableminds.util.io.PathUtils -import com.scalableminds.util.tools.{Fox, JsonHelper, FoxImplicits} +import com.scalableminds.util.tools.Box.tryo +import com.scalableminds.util.tools.{Fox, FoxImplicits, Full, JsonHelper} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache} import com.typesafe.scalalogging.LazyLogging - -import javax.inject.Inject -import com.scalableminds.util.tools.Full -import com.scalableminds.util.tools.Box.tryo import org.apache.commons.io.FilenameUtils import play.api.libs.json.{Json, OFormat} +import java.io.File +import java.nio.file.{Path, Paths} +import javax.inject.Inject import scala.collection.Searching._ import scala.collection.mutable.ListBuffer import scala.concurrent.ExecutionContext @@ -80,7 +78,10 @@ object ConnectomeLegend { implicit val jsonFormat: OFormat[ConnectomeLegend] = Json.format[ConnectomeLegend] } -class ConnectomeFileService @Inject()(config: DataStoreConfig)(implicit ec: ExecutionContext) +class ConnectomeFileService @Inject()( + config: DataStoreConfig, + hdf5ConnectomeFileService: Hdf5ConnectomeFileService, + zarrConnectomeFileService: ZarrConnectomeFileService)(implicit ec: ExecutionContext) extends FoxImplicits with LazyLogging { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala new file mode 100644 index 00000000000..e998e20b6f5 --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala @@ -0,0 +1,7 @@ +package com.scalableminds.webknossos.datastore.services.connectome + +import javax.inject.Inject + +class Hdf5ConnectomeFileService @Inject()() { + +} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala new file mode 100644 index 00000000000..3c873cac1b3 --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala @@ -0,0 +1,5 @@ +package com.scalableminds.webknossos.datastore.services.connectome + +import jakarta.inject.Inject + +class ZarrConnectomeFileService @Inject()() {} From 1d79dfccd325095d880be9eb5086cf91f12e20a6 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 25 Jun 2025 12:01:50 +0200 Subject: [PATCH 078/100] WIP list +lookup --- .../controllers/DataSourceController.scala | 39 +++--- .../connectome/ConnectomeFileService.scala | 113 ++++++++++++++---- .../Hdf5ConnectomeFileService.scala | 17 +++ .../services/mesh/MeshFileService.scala | 10 +- 4 files changed, 136 insertions(+), 43 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index ac44c0d390c..ed152372a46 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -5,8 +5,17 @@ import com.scalableminds.util.geometry.Vec3Int import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.Fox import com.scalableminds.webknossos.datastore.ListOfLong.ListOfLong -import com.scalableminds.webknossos.datastore.explore.{ExploreRemoteDatasetRequest, ExploreRemoteDatasetResponse, ExploreRemoteLayerService} -import com.scalableminds.webknossos.datastore.helpers.{GetMultipleSegmentIndexParameters, GetSegmentIndexParameters, SegmentIndexData, SegmentStatisticsParameters} +import com.scalableminds.webknossos.datastore.explore.{ + ExploreRemoteDatasetRequest, + ExploreRemoteDatasetResponse, + ExploreRemoteLayerService +} +import com.scalableminds.webknossos.datastore.helpers.{ + GetMultipleSegmentIndexParameters, + GetSegmentIndexParameters, + SegmentIndexData, + SegmentStatisticsParameters +} import com.scalableminds.webknossos.datastore.models.datasource.inbox.InboxDataSource import com.scalableminds.webknossos.datastore.models.datasource.{DataLayer, DataSource, DataSourceId, GenericDataSource} import com.scalableminds.webknossos.datastore.services._ @@ -16,7 +25,12 @@ import com.scalableminds.webknossos.datastore.services.uploading._ import com.scalableminds.webknossos.datastore.storage.DataVaultService import com.scalableminds.util.tools.Box.tryo import com.scalableminds.util.tools.{Box, Empty, Failure, Full} -import com.scalableminds.webknossos.datastore.services.connectome.{ByAgglomerateIdsRequest, BySynapseIdsRequest, ConnectomeFileNameWithMappingName, ConnectomeFileService} +import com.scalableminds.webknossos.datastore.services.connectome.{ + ByAgglomerateIdsRequest, + BySynapseIdsRequest, + ConnectomeFileNameWithMappingName, + ConnectomeFileService +} import play.api.data.Form import play.api.data.Forms.{longNumber, nonEmptyText, number, tuple} import play.api.i18n.Messages @@ -502,20 +516,13 @@ class DataSourceController @Inject()( accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { val connectomeFileNames = - connectomeFileService.exploreConnectomeFiles(organizationId, datasetDirectoryName, dataLayerName) + connectomeFileService.listConnectomeFiles(organizationId, datasetDirectoryName, dataLayerName) for { - mappingNames <- Fox.serialCombined(connectomeFileNames.toList) { connectomeFileName => - val path = - connectomeFileService.connectomeFilePath(organizationId, - datasetDirectoryName, - dataLayerName, - connectomeFileName) - connectomeFileService.mappingNameForConnectomeFile(path) - } - connectomesWithMappings = connectomeFileNames - .zip(mappingNames) - .map(tuple => ConnectomeFileNameWithMappingName(tuple._1, tuple._2)) - } yield Ok(Json.toJson(connectomesWithMappings)) + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) + connectomeFileInfos <- connectomeFileService.listConnectomeFiles(dataSource.id, dataLayer) + } yield Ok(Json.toJson(connectomeFileInfos)) } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala index dfdc7f81521..674cb9ece0e 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala @@ -1,12 +1,22 @@ package com.scalableminds.webknossos.datastore.services.connectome +import com.scalableminds.util.accesscontext.TokenContext +import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.io.PathUtils import com.scalableminds.util.tools.Box.tryo -import com.scalableminds.util.tools.{Fox, FoxImplicits, Full, JsonHelper} +import com.scalableminds.util.tools.{Box, Fox, FoxImplicits, Full, JsonHelper} import com.scalableminds.webknossos.datastore.DataStoreConfig -import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache} +import com.scalableminds.webknossos.datastore.models.datasource.{ + DataLayer, + DataSourceId, + LayerAttachment, + LayerAttachmentDataformat +} +import com.scalableminds.webknossos.datastore.services.mesh.{MeshFileInfo, MeshFileKey} +import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache, RemoteSourceDescriptorService} import com.typesafe.scalalogging.LazyLogging import org.apache.commons.io.FilenameUtils +import play.api.i18n.{Messages, MessagesProvider} import play.api.libs.json.{Json, OFormat} import java.io.File @@ -78,33 +88,92 @@ object ConnectomeLegend { implicit val jsonFormat: OFormat[ConnectomeLegend] = Json.format[ConnectomeLegend] } +case class ConnectomeFileKey(dataSourceId: DataSourceId, layerName: String, attachment: LayerAttachment) + class ConnectomeFileService @Inject()( config: DataStoreConfig, + remoteSourceDescriptorService: RemoteSourceDescriptorService, hdf5ConnectomeFileService: Hdf5ConnectomeFileService, zarrConnectomeFileService: ZarrConnectomeFileService)(implicit ec: ExecutionContext) extends FoxImplicits with LazyLogging { private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) - private val connectomesDir = "connectomes" - private val connectomeFileExtension = "hdf5" + private val localConnectomesDir = "connectomes" + private val hdf5ConnectomeFileExtension = "hdf5" + + private val connectomeFileKeyCache + : AlfuCache[(DataSourceId, String, String), ConnectomeFileKey] = AlfuCache() // dataSourceId, layerName, attachmentName → MeshFileKey + + def lookUpConnectomeFileKey(dataSourceId: DataSourceId, dataLayer: DataLayer, connectomeFileName: String)( + implicit ec: ExecutionContext): Fox[ConnectomeFileKey] = + connectomeFileKeyCache.getOrLoad( + (dataSourceId, dataLayer.name, connectomeFileName), + _ => lookUpConnectomeFileKeyImpl(dataSourceId, dataLayer, connectomeFileName).toFox) + + private def lookUpConnectomeFileKeyImpl(dataSourceId: DataSourceId, + dataLayer: DataLayer, + connectomeFileName: String): Box[ConnectomeFileKey] = { + val registeredAttachment: Option[LayerAttachment] = dataLayer.attachments match { + case Some(attachments) => attachments.meshes.find(_.name == connectomeFileName) + case None => None + } + val localDatasetDir = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) + for { + registeredAttachmentNormalized <- tryo(registeredAttachment.map { attachment => + attachment.copy( + path = + remoteSourceDescriptorService.uriFromPathLiteral(attachment.path.toString, localDatasetDir, dataLayer.name)) + }) + localFallbackAttachment = LayerAttachment( + connectomeFileName, + localDatasetDir.resolve(dataLayer.name).resolve(localConnectomesDir).toUri, + LayerAttachmentDataformat.hdf5 + ) + selectedAttachment = registeredAttachmentNormalized.getOrElse(localFallbackAttachment) + } yield + ConnectomeFileKey( + dataSourceId, + dataLayer.name, + selectedAttachment + ) + } - private lazy val connectomeFileCache = new Hdf5FileCache(30) + def listConnectomeFiles(dataSourceId: DataSourceId, dataLayer: DataLayer)( + implicit ec: ExecutionContext, + tc: TokenContext, + m: MessagesProvider): Fox[List[ConnectomeFileNameWithMappingName]] = { + val attachedConnectomeFileNames = dataLayer.attachments.map(_.meshes).getOrElse(Seq.empty).map(_.name).toSet - def exploreConnectomeFiles(organizationId: String, - datasetDirectoryName: String, - dataLayerName: String): Set[String] = { - val layerDir = dataBaseDir.resolve(organizationId).resolve(datasetDirectoryName).resolve(dataLayerName) - PathUtils - .listFiles(layerDir.resolve(connectomesDir), + val layerDir = + dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName).resolve(dataLayer.name) + val scannedConnectomeFileNames = PathUtils + .listFiles(layerDir.resolve(localConnectomesDir), silent = true, - PathUtils.fileExtensionFilter(connectomeFileExtension)) + PathUtils.fileExtensionFilter(hdf5ConnectomeFileExtension)) .map { paths => paths.map(path => FilenameUtils.removeExtension(path.getFileName.toString)) } .toOption .getOrElse(Nil) .toSet + + val allConnectomeFileNames = attachedConnectomeFileNames ++ scannedConnectomeFileNames + + Fox.fromFuture( + Fox + .serialSequence(allConnectomeFileNames.toSeq) { connectomeFileName => + for { + connectomeFileKey <- lookUpConnectomeFileKey(dataSourceId, dataLayer, connectomeFileName) ?~> Messages( + "connectome.file.lookup.failed", + connectomeFileName) + mappingName <- mappingNameForConnectomeFile(connectomeFileKey) ?~> Messages( + "connectome.file.readMappingName.failed", + connectomeFileName) + } yield ConnectomeFileNameWithMappingName(connectomeFileName, mappingName) + } + // Only return successes, we don’t want a malformed file breaking the list request. + .map(_.flatten)) } def connectomeFilePath(organizationId: String, @@ -118,16 +187,12 @@ class ConnectomeFileService @Inject()( .resolve(connectomesDir) .resolve(s"$connectomeFileName.$connectomeFileExtension") - def mappingNameForConnectomeFile(connectomeFilePath: Path): Fox[String] = - for { - cachedConnectomeFile <- tryo { - connectomeFileCache.getCachedHdf5File(connectomeFilePath)(CachedHdf5File.fromPath) - }.toFox ?~> "connectome.file.open.failed" - mappingName <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.stringReader.getAttr("/", "metadata/mapping_name") - } ?~> "connectome.file.readEncoding.failed" - _ = cachedConnectomeFile.finishAccess() - } yield mappingName + def mappingNameForConnectomeFile(connectomeFileKey: ConnectomeFileKey): Fox[String] = + connectomeFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => zarrConnectomeFileService.mappingNameForConnectomeFile(connectomeFileKey) + case LayerAttachmentDataformat.hdf5 => hdf5ConnectomeFileService.mappingNameForConnectomeFile(connectomeFileKey) + case _ => unsupportedDataFormat(connectomeFileKey) + } def synapsesForAgglomerates(connectomeFilePath: Path, agglomerateIds: List[Long]): Fox[List[DirectedSynapseList]] = if (agglomerateIds.length == 1) { @@ -307,4 +372,8 @@ class ConnectomeFileService @Inject()( block }.toFox + private def unsupportedDataFormat(connectomeFileKey: ConnectomeFileKey)(implicit ec: ExecutionContext) = + Fox.failure( + s"Trying to load connectome file with unsupported data format ${connectomeFileKey.attachment.dataFormat}") + } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala index e998e20b6f5..ce0eb059165 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala @@ -1,7 +1,24 @@ package com.scalableminds.webknossos.datastore.services.connectome +import com.scalableminds.util.tools.Box.tryo +import com.scalableminds.util.tools.Fox +import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache} + +import java.nio.file.Path import javax.inject.Inject class Hdf5ConnectomeFileService @Inject()() { + private lazy val connectomeFileCache = new Hdf5FileCache(30) + + def mappingNameForConnectomeFile(connectomeFilePath: Path): Fox[String] = + for { + cachedConnectomeFile <- tryo { + connectomeFileCache.getCachedHdf5File(connectomeFilePath)(CachedHdf5File.fromPath) + }.toFox ?~> "connectome.file.open.failed" + mappingName <- finishAccessOnFailure(cachedConnectomeFile) { + cachedConnectomeFile.stringReader.getAttr("/", "metadata/mapping_name") + } ?~> "connectome.file.readEncoding.failed" + _ = cachedConnectomeFile.finishAccess() + } yield mappingName } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala index a8ec9069132..f0141be5621 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala @@ -77,16 +77,16 @@ class MeshFileService @Inject()(config: DataStoreConfig, private val hdf5MeshFileExtension = "hdf5" private val meshFileKeyCache - : AlfuCache[(DataSourceId, String, String), MeshFileKey] = AlfuCache() // dataSourceId, layerName, mappingName → MeshFileKey + : AlfuCache[(DataSourceId, String, String), MeshFileKey] = AlfuCache() // dataSourceId, layerName, attachmentName → MeshFileKey def lookUpMeshFileKey(dataSourceId: DataSourceId, dataLayer: DataLayer, meshFileName: String)( implicit ec: ExecutionContext): Fox[MeshFileKey] = meshFileKeyCache.getOrLoad((dataSourceId, dataLayer.name, meshFileName), - _ => lookUpMeshFileImpl(dataSourceId, dataLayer, meshFileName).toFox) + _ => lookUpMeshFileKeyImpl(dataSourceId, dataLayer, meshFileName).toFox) - private def lookUpMeshFileImpl(dataSourceId: DataSourceId, - dataLayer: DataLayer, - meshFileName: String): Box[MeshFileKey] = { + private def lookUpMeshFileKeyImpl(dataSourceId: DataSourceId, + dataLayer: DataLayer, + meshFileName: String): Box[MeshFileKey] = { val registeredAttachment: Option[LayerAttachment] = dataLayer.attachments match { case Some(attachments) => attachments.meshes.find(_.name == meshFileName) case None => None From d6f11cb56fb9b4612cd1d601b548a1839979208d Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 25 Jun 2025 13:43:38 +0200 Subject: [PATCH 079/100] delegate to correct service depending on attachment dataformat --- .../controllers/DataSourceController.scala | 47 ++-- .../connectome/ConnectomeFileService.scala | 244 ++++-------------- .../Hdf5ConnectomeFileService.scala | 203 ++++++++++++++- .../ZarrConnectomeFileService.scala | 26 +- 4 files changed, 296 insertions(+), 224 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index ed152372a46..376866cec15 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -28,7 +28,6 @@ import com.scalableminds.util.tools.{Box, Empty, Failure, Full} import com.scalableminds.webknossos.datastore.services.connectome.{ ByAgglomerateIdsRequest, BySynapseIdsRequest, - ConnectomeFileNameWithMappingName, ConnectomeFileService } import play.api.data.Form @@ -515,8 +514,6 @@ class DataSourceController @Inject()( Action.async { implicit request => accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { - val connectomeFileNames = - connectomeFileService.listConnectomeFiles(organizationId, datasetDirectoryName, dataLayerName) for { (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, @@ -533,10 +530,13 @@ class DataSourceController @Inject()( accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { - meshFilePath <- Fox.successful( - connectomeFileService - .connectomeFilePath(organizationId, datasetDirectoryName, dataLayerName, request.body.connectomeFile)) - synapses <- connectomeFileService.synapsesForAgglomerates(meshFilePath, request.body.agglomerateIds) + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) + meshFileKey <- connectomeFileService.lookUpConnectomeFileKey(dataSource.id, + dataLayer, + request.body.connectomeFile) + synapses <- connectomeFileService.synapsesForAgglomerates(meshFileKey, request.body.agglomerateIds) } yield Ok(Json.toJson(synapses)) } } @@ -549,10 +549,13 @@ class DataSourceController @Inject()( accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { - meshFilePath <- Fox.successful( - connectomeFileService - .connectomeFilePath(organizationId, datasetDirectoryName, dataLayerName, request.body.connectomeFile)) - agglomerateIds <- connectomeFileService.synapticPartnerForSynapses(meshFilePath, + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) + meshFileKey <- connectomeFileService.lookUpConnectomeFileKey(dataSource.id, + dataLayer, + request.body.connectomeFile) + agglomerateIds <- connectomeFileService.synapticPartnerForSynapses(meshFileKey, request.body.synapseIds, direction) } yield Ok(Json.toJson(agglomerateIds)) @@ -566,10 +569,13 @@ class DataSourceController @Inject()( accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { - meshFilePath <- Fox.successful( - connectomeFileService - .connectomeFilePath(organizationId, datasetDirectoryName, dataLayerName, request.body.connectomeFile)) - synapsePositions <- connectomeFileService.positionsForSynapses(meshFilePath, request.body.synapseIds) + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) + meshFileKey <- connectomeFileService.lookUpConnectomeFileKey(dataSource.id, + dataLayer, + request.body.connectomeFile) + synapsePositions <- connectomeFileService.positionsForSynapses(meshFileKey, request.body.synapseIds) } yield Ok(Json.toJson(synapsePositions)) } } @@ -581,10 +587,13 @@ class DataSourceController @Inject()( accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { - meshFilePath <- Fox.successful( - connectomeFileService - .connectomeFilePath(organizationId, datasetDirectoryName, dataLayerName, request.body.connectomeFile)) - synapseTypes <- connectomeFileService.typesForSynapses(meshFilePath, request.body.synapseIds) + (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, + datasetDirectoryName, + dataLayerName) + meshFileKey <- connectomeFileService.lookUpConnectomeFileKey(dataSource.id, + dataLayer, + request.body.connectomeFile) + synapseTypes <- connectomeFileService.typesForSynapses(meshFileKey, request.body.synapseIds) } yield Ok(Json.toJson(synapseTypes)) } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala index 674cb9ece0e..e19a235045a 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala @@ -4,7 +4,7 @@ import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.io.PathUtils import com.scalableminds.util.tools.Box.tryo -import com.scalableminds.util.tools.{Box, Fox, FoxImplicits, Full, JsonHelper} +import com.scalableminds.util.tools.{Box, Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.models.datasource.{ DataLayer, @@ -12,17 +12,14 @@ import com.scalableminds.webknossos.datastore.models.datasource.{ LayerAttachment, LayerAttachmentDataformat } -import com.scalableminds.webknossos.datastore.services.mesh.{MeshFileInfo, MeshFileKey} -import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache, RemoteSourceDescriptorService} +import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService import com.typesafe.scalalogging.LazyLogging import org.apache.commons.io.FilenameUtils import play.api.i18n.{Messages, MessagesProvider} import play.api.libs.json.{Json, OFormat} -import java.io.File -import java.nio.file.{Path, Paths} +import java.nio.file.Paths import javax.inject.Inject -import scala.collection.Searching._ import scala.collection.mutable.ListBuffer import scala.concurrent.ExecutionContext @@ -90,11 +87,10 @@ object ConnectomeLegend { case class ConnectomeFileKey(dataSourceId: DataSourceId, layerName: String, attachment: LayerAttachment) -class ConnectomeFileService @Inject()( - config: DataStoreConfig, - remoteSourceDescriptorService: RemoteSourceDescriptorService, - hdf5ConnectomeFileService: Hdf5ConnectomeFileService, - zarrConnectomeFileService: ZarrConnectomeFileService)(implicit ec: ExecutionContext) +class ConnectomeFileService @Inject()(config: DataStoreConfig, + remoteSourceDescriptorService: RemoteSourceDescriptorService, + hdf5ConnectomeFileService: Hdf5ConnectomeFileService, + zarrConnectomeFileService: ZarrConnectomeFileService) extends FoxImplicits with LazyLogging { @@ -103,7 +99,7 @@ class ConnectomeFileService @Inject()( private val hdf5ConnectomeFileExtension = "hdf5" private val connectomeFileKeyCache - : AlfuCache[(DataSourceId, String, String), ConnectomeFileKey] = AlfuCache() // dataSourceId, layerName, attachmentName → MeshFileKey + : AlfuCache[(DataSourceId, String, String), ConnectomeFileKey] = AlfuCache() // dataSourceId, layerName, attachmentName → ConnectomeFileKey def lookUpConnectomeFileKey(dataSourceId: DataSourceId, dataLayer: DataLayer, connectomeFileName: String)( implicit ec: ExecutionContext): Fox[ConnectomeFileKey] = @@ -115,7 +111,7 @@ class ConnectomeFileService @Inject()( dataLayer: DataLayer, connectomeFileName: String): Box[ConnectomeFileKey] = { val registeredAttachment: Option[LayerAttachment] = dataLayer.attachments match { - case Some(attachments) => attachments.meshes.find(_.name == connectomeFileName) + case Some(attachments) => attachments.connectomes.find(_.name == connectomeFileName) case None => None } val localDatasetDir = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) @@ -143,7 +139,7 @@ class ConnectomeFileService @Inject()( implicit ec: ExecutionContext, tc: TokenContext, m: MessagesProvider): Fox[List[ConnectomeFileNameWithMappingName]] = { - val attachedConnectomeFileNames = dataLayer.attachments.map(_.meshes).getOrElse(Seq.empty).map(_.name).toSet + val attachedConnectomeFileNames = dataLayer.attachments.map(_.connectomes).getOrElse(Seq.empty).map(_.name).toSet val layerDir = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName).resolve(dataLayer.name) @@ -176,201 +172,55 @@ class ConnectomeFileService @Inject()( .map(_.flatten)) } - def connectomeFilePath(organizationId: String, - datasetDirectoryName: String, - dataLayerName: String, - connectomeFileName: String): Path = - dataBaseDir - .resolve(organizationId) - .resolve(datasetDirectoryName) - .resolve(dataLayerName) - .resolve(connectomesDir) - .resolve(s"$connectomeFileName.$connectomeFileExtension") - - def mappingNameForConnectomeFile(connectomeFileKey: ConnectomeFileKey): Fox[String] = + def mappingNameForConnectomeFile(connectomeFileKey: ConnectomeFileKey)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[String] = connectomeFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrConnectomeFileService.mappingNameForConnectomeFile(connectomeFileKey) case LayerAttachmentDataformat.hdf5 => hdf5ConnectomeFileService.mappingNameForConnectomeFile(connectomeFileKey) case _ => unsupportedDataFormat(connectomeFileKey) } - def synapsesForAgglomerates(connectomeFilePath: Path, agglomerateIds: List[Long]): Fox[List[DirectedSynapseList]] = - if (agglomerateIds.length == 1) { - for { - agglomerateId <- agglomerateIds.headOption.toFox ?~> "Failed to extract the single agglomerate ID from request" - inSynapses <- ingoingSynapsesForAgglomerate(connectomeFilePath, agglomerateId) ?~> "Failed to read ingoing synapses" - outSynapses <- outgoingSynapsesForAgglomerate(connectomeFilePath, agglomerateId) ?~> "Failed to read outgoing synapses" - } yield List(DirectedSynapseList(inSynapses, outSynapses)) - } else { - val agglomeratePairs = directedPairs(agglomerateIds.toSet.toList) - for { - synapsesPerPair <- Fox.serialCombined(agglomeratePairs)(pair => - synapseIdsForDirectedPair(connectomeFilePath, pair._1, pair._2)) - synapseListsMap = gatherPairSynapseLists(agglomerateIds, agglomeratePairs, synapsesPerPair) - synapseListsOrdered = agglomerateIds.map(id => synapseListsMap(id)) - } yield synapseListsOrdered + def synapsesForAgglomerates(connectomeFileKey: ConnectomeFileKey, agglomerateIds: List[Long])( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[List[DirectedSynapseList]] = + connectomeFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => + zarrConnectomeFileService.synapsesForAgglomerates(connectomeFileKey, agglomerateIds) + case LayerAttachmentDataformat.hdf5 => + hdf5ConnectomeFileService.synapsesForAgglomerates(connectomeFileKey, agglomerateIds) + case _ => unsupportedDataFormat(connectomeFileKey) } - private def directedPairs(items: List[Long]): List[(Long, Long)] = - (for { x <- items; y <- items } yield (x, y)).filter(pair => pair._1 != pair._2) - - private def gatherPairSynapseLists(agglomerateIds: List[Long], - agglomeratePairs: List[(Long, Long)], - synapsesPerPair: List[List[Long]]): collection.Map[Long, DirectedSynapseList] = { - val directedSynapseListsMutable = scala.collection.mutable.Map[Long, DirectedSynapseListMutable]() - agglomerateIds.foreach { agglomerateId => - directedSynapseListsMutable(agglomerateId) = DirectedSynapseListMutable.empty - } - agglomeratePairs.zip(synapsesPerPair).foreach { pairWithSynapses: ((Long, Long), List[Long]) => - val srcAgglomerate = pairWithSynapses._1._1 - val dstAgglomerate = pairWithSynapses._1._2 - directedSynapseListsMutable(srcAgglomerate).out ++= pairWithSynapses._2 - directedSynapseListsMutable(dstAgglomerate).in ++= pairWithSynapses._2 + def synapticPartnerForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long], direction: String)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[List[Long]] = + connectomeFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => + zarrConnectomeFileService.synapticPartnerForSynapses(connectomeFileKey, synapseIds, direction) + case LayerAttachmentDataformat.hdf5 => + hdf5ConnectomeFileService.synapticPartnerForSynapses(connectomeFileKey, synapseIds, direction) + case _ => unsupportedDataFormat(connectomeFileKey) } - directedSynapseListsMutable.view.mapValues(_.freeze).toMap - } - private def ingoingSynapsesForAgglomerate(connectomeFilePath: Path, agglomerateId: Long): Fox[List[Long]] = - for { - cachedConnectomeFile <- tryo { - connectomeFileCache.getCachedHdf5File(connectomeFilePath)(CachedHdf5File.fromPath) - }.toFox ?~> "connectome.file.open.failed" - fromAndToPtr: Array[Long] <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/CSC_indptr", 2, agglomerateId) - } ?~> "Could not read offsets from connectome file" - from <- fromAndToPtr.lift(0).toFox ?~> "Could not read start offset from connectome file" - to <- fromAndToPtr.lift(1).toFox ?~> "Could not read end offset from connectome file" - // readArrayBlockWithOffset has a bug and does not return the empty array when block size 0 is passed, hence the if. - agglomeratePairs: Array[Long] <- if (to - from == 0L) Fox.successful(Array.empty[Long]) - else - finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/CSC_agglomerate_pair", (to - from).toInt, from) - } ?~> "Could not read agglomerate pairs from connectome file" - synapseIdsNested <- Fox.serialCombined(agglomeratePairs.toList) { agglomeratePair: Long => - for { - from <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/agglomerate_pair_offsets", 1, agglomeratePair) - }.flatMap(_.headOption.toFox) - to <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/agglomerate_pair_offsets", - 1, - agglomeratePair + 1) - }.flatMap(_.headOption.toFox) - } yield List.range(from, to) - } ?~> "Could not read ingoing synapses from connectome file" - _ = cachedConnectomeFile.finishAccess() - } yield synapseIdsNested.flatten - - private def outgoingSynapsesForAgglomerate(connectomeFilePath: Path, agglomerateId: Long): Fox[List[Long]] = - for { - cachedConnectomeFile <- tryo { - connectomeFileCache.getCachedHdf5File(connectomeFilePath)(CachedHdf5File.fromPath) - }.toFox ?~> "connectome.file.open.failed" - fromAndToPtr: Array[Long] <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/CSR_indptr", 2, agglomerateId) - } ?~> "Could not read offsets from connectome file" - fromPtr <- fromAndToPtr.lift(0).toFox ?~> "Could not read start offset from connectome file" - toPtr <- fromAndToPtr.lift(1).toFox ?~> "Could not read end offset from connectome file" - from <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/agglomerate_pair_offsets", 1, fromPtr) - }.flatMap(_.headOption.toFox) ?~> "Could not synapses from connectome file" - to <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/agglomerate_pair_offsets", 1, toPtr) - }.flatMap(_.headOption.toFox) ?~> "Could not synapses from connectome file" - } yield List.range(from, to) - - def synapticPartnerForSynapses(connectomeFilePath: Path, synapseIds: List[Long], direction: String): Fox[List[Long]] = - for { - _ <- Fox.fromBool(direction == "src" || direction == "dst") ?~> s"Invalid synaptic partner direction: $direction" - collection = s"/synapse_to_${direction}_agglomerate" - cachedConnectomeFile <- tryo { - connectomeFileCache.getCachedHdf5File(connectomeFilePath)(CachedHdf5File.fromPath) - }.toFox ?~> "connectome.file.open.failed" - agglomerateIds <- Fox.serialCombined(synapseIds) { synapseId: Long => - finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(collection, 1, synapseId) - }.flatMap(_.headOption.toFox) - } - } yield agglomerateIds - - def positionsForSynapses(connectomeFilePath: Path, synapseIds: List[Long]): Fox[List[List[Long]]] = - for { - cachedConnectomeFile <- tryo { - connectomeFileCache.getCachedHdf5File(connectomeFilePath)(CachedHdf5File.fromPath) - }.toFox ?~> "connectome.file.open.failed" - synapsePositions <- Fox.serialCombined(synapseIds) { synapseId: Long => - finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readMatrixBlockWithOffset("/synapse_positions", 1, 3, synapseId, 0) - }.flatMap(_.headOption.toFox) - } - } yield synapsePositions.map(_.toList) - - def typesForSynapses(connectomeFilePath: Path, synapseIds: List[Long]): Fox[SynapseTypesWithLegend] = - for { - cachedConnectomeFile <- tryo { - connectomeFileCache.getCachedHdf5File(connectomeFilePath)(CachedHdf5File.fromPath) - }.toFox ?~> "connectome.file.open.failed" - typeNames = typeNamesForSynapsesOrEmpty(connectomeFilePath) - synapseTypes <- Fox.serialCombined(synapseIds) { synapseId: Long => - finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/synapse_types", 1, synapseId) - }.flatMap(_.headOption.toFox) - } - } yield SynapseTypesWithLegend(synapseTypes, typeNames) - - private def typeNamesForSynapsesOrEmpty(connectomeFilePath: Path): List[String] = { - val typeNamesPath = Paths.get(s"${connectomeFilePath.toString.dropRight(connectomeFileExtension.length)}json") - if (new File(typeNamesPath.toString).exists()) { - JsonHelper.parseFromFileAs[ConnectomeLegend](typeNamesPath, typeNamesPath.getParent) match { - case Full(connectomeLegend) => connectomeLegend.synapse_type_names - case _ => List.empty - } - } else List.empty - } - - private def synapseIdsForDirectedPair(connectomeFilePath: Path, - srcAgglomerateId: Long, - dstAgglomerateId: Long): Fox[List[Long]] = - for { - cachedConnectomeFile <- tryo { - connectomeFileCache.getCachedHdf5File(connectomeFilePath)(CachedHdf5File.fromPath) - }.toFox ?~> "connectome.file.open.failed" - fromAndToPtr: Array[Long] <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/CSR_indptr", 2, srcAgglomerateId) - } ?~> "Could not read offsets from connectome file" - fromPtr <- fromAndToPtr.lift(0).toFox ?~> "Could not read start offset from connectome file" - toPtr <- fromAndToPtr.lift(1).toFox ?~> "Could not read end offset from connectome file" - columnValues: Array[Long] <- if (toPtr - fromPtr == 0L) Fox.successful(Array.empty[Long]) - else - finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/CSR_indices", (toPtr - fromPtr).toInt, fromPtr) - } ?~> "Could not read agglomerate pairs from connectome file" - columnOffset <- searchSorted(columnValues, dstAgglomerateId) - pairIndex = fromPtr + columnOffset - synapses <- if ((columnOffset >= columnValues.length) || (columnValues(columnOffset) != dstAgglomerateId)) - Fox.successful(List.empty) - else - for { - fromAndTo <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/agglomerate_pair_offsets", 2, pairIndex) - } - from <- fromAndTo.lift(0).toFox - to <- fromAndTo.lift(1).toFox - } yield List.range(from, to) - } yield synapses - - private def searchSorted(haystack: Array[Long], needle: Long): Fox[Int] = - haystack.search(needle) match { - case Found(i) => Fox.successful(i) - case InsertionPoint(i) => Fox.successful(i) + def positionsForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long])( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[List[List[Long]]] = + connectomeFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => + zarrConnectomeFileService.positionsForSynapses(connectomeFileKey, synapseIds) + case LayerAttachmentDataformat.hdf5 => + hdf5ConnectomeFileService.positionsForSynapses(connectomeFileKey, synapseIds) + case _ => unsupportedDataFormat(connectomeFileKey) } - private def finishAccessOnFailure[T](f: CachedHdf5File)(block: => T): Fox[T] = - tryo { _: Throwable => - f.finishAccess() - } { - block - }.toFox + def typesForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long])( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[SynapseTypesWithLegend] = + connectomeFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => zarrConnectomeFileService.typesForSynapses(connectomeFileKey, synapseIds) + case LayerAttachmentDataformat.hdf5 => hdf5ConnectomeFileService.typesForSynapses(connectomeFileKey, synapseIds) + case _ => unsupportedDataFormat(connectomeFileKey) + } private def unsupportedDataFormat(connectomeFileKey: ConnectomeFileKey)(implicit ec: ExecutionContext) = Fox.failure( diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala index ce0eb059165..8bfe8cf71a5 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala @@ -1,24 +1,213 @@ package com.scalableminds.webknossos.datastore.services.connectome import com.scalableminds.util.tools.Box.tryo -import com.scalableminds.util.tools.Fox +import com.scalableminds.util.tools.{Box, Fox, FoxImplicits, Full, JsonHelper} import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache} -import java.nio.file.Path +import java.io.File +import java.nio.file.Paths import javax.inject.Inject +import scala.collection.Searching.{Found, InsertionPoint} +import scala.concurrent.ExecutionContext -class Hdf5ConnectomeFileService @Inject()() { +class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { private lazy val connectomeFileCache = new Hdf5FileCache(30) + private val connectomeFileExtension = "hdf5" - def mappingNameForConnectomeFile(connectomeFilePath: Path): Fox[String] = + def mappingNameForConnectomeFile(connectomeFileKey: ConnectomeFileKey)(implicit ec: ExecutionContext): Fox[String] = for { - cachedConnectomeFile <- tryo { - connectomeFileCache.getCachedHdf5File(connectomeFilePath)(CachedHdf5File.fromPath) - }.toFox ?~> "connectome.file.open.failed" + cachedConnectomeFile <- connectomeFileCache + .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) + .toFox ?~> "connectome.file.open.failed" mappingName <- finishAccessOnFailure(cachedConnectomeFile) { cachedConnectomeFile.stringReader.getAttr("/", "metadata/mapping_name") } ?~> "connectome.file.readEncoding.failed" _ = cachedConnectomeFile.finishAccess() } yield mappingName + + def synapsesForAgglomerates(connectomeFileKey: ConnectomeFileKey, agglomerateIds: List[Long])( + implicit ec: ExecutionContext): Fox[List[DirectedSynapseList]] = + if (agglomerateIds.length == 1) { + for { + agglomerateId <- agglomerateIds.headOption.toFox ?~> "Failed to extract the single agglomerate ID from request" + inSynapses <- ingoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId) ?~> "Failed to read ingoing synapses" + outSynapses <- outgoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId) ?~> "Failed to read outgoing synapses" + } yield List(DirectedSynapseList(inSynapses, outSynapses)) + } else { + val agglomeratePairs = directedPairs(agglomerateIds.toSet.toList) + for { + synapsesPerPair <- Fox.serialCombined(agglomeratePairs)(pair => + synapseIdsForDirectedPair(connectomeFileKey, pair._1, pair._2)) + synapseListsMap = gatherPairSynapseLists(agglomerateIds, agglomeratePairs, synapsesPerPair) + synapseListsOrdered = agglomerateIds.map(id => synapseListsMap(id)) + } yield synapseListsOrdered + } + + private def directedPairs(items: List[Long]): List[(Long, Long)] = + (for { x <- items; y <- items } yield (x, y)).filter(pair => pair._1 != pair._2) + + private def gatherPairSynapseLists(agglomerateIds: List[Long], + agglomeratePairs: List[(Long, Long)], + synapsesPerPair: List[List[Long]]): collection.Map[Long, DirectedSynapseList] = { + val directedSynapseListsMutable = scala.collection.mutable.Map[Long, DirectedSynapseListMutable]() + agglomerateIds.foreach { agglomerateId => + directedSynapseListsMutable(agglomerateId) = DirectedSynapseListMutable.empty + } + agglomeratePairs.zip(synapsesPerPair).foreach { pairWithSynapses: ((Long, Long), List[Long]) => + val srcAgglomerate = pairWithSynapses._1._1 + val dstAgglomerate = pairWithSynapses._1._2 + directedSynapseListsMutable(srcAgglomerate).out ++= pairWithSynapses._2 + directedSynapseListsMutable(dstAgglomerate).in ++= pairWithSynapses._2 + } + directedSynapseListsMutable.view.mapValues(_.freeze).toMap + } + + private def ingoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( + implicit ec: ExecutionContext): Fox[List[Long]] = + for { + cachedConnectomeFile <- connectomeFileCache + .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) + .toFox ?~> "connectome.file.open.failed" + fromAndToPtr: Array[Long] <- finishAccessOnFailure(cachedConnectomeFile) { + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/CSC_indptr", 2, agglomerateId) + } ?~> "Could not read offsets from connectome file" + from <- fromAndToPtr.lift(0).toFox ?~> "Could not read start offset from connectome file" + to <- fromAndToPtr.lift(1).toFox ?~> "Could not read end offset from connectome file" + // readArrayBlockWithOffset has a bug and does not return the empty array when block size 0 is passed, hence the if. + agglomeratePairs: Array[Long] <- if (to - from == 0L) Fox.successful(Array.empty[Long]) + else + finishAccessOnFailure(cachedConnectomeFile) { + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/CSC_agglomerate_pair", (to - from).toInt, from) + } ?~> "Could not read agglomerate pairs from connectome file" + synapseIdsNested <- Fox.serialCombined(agglomeratePairs.toList) { agglomeratePair: Long => + for { + from <- finishAccessOnFailure(cachedConnectomeFile) { + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/agglomerate_pair_offsets", 1, agglomeratePair) + }.flatMap(_.headOption.toFox) + to <- finishAccessOnFailure(cachedConnectomeFile) { + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/agglomerate_pair_offsets", + 1, + agglomeratePair + 1) + }.flatMap(_.headOption.toFox) + } yield List.range(from, to) + } ?~> "Could not read ingoing synapses from connectome file" + _ = cachedConnectomeFile.finishAccess() + } yield synapseIdsNested.flatten + + private def outgoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( + implicit ec: ExecutionContext): Fox[List[Long]] = + for { + cachedConnectomeFile <- connectomeFileCache + .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) + .toFox ?~> "connectome.file.open.failed" + fromAndToPtr: Array[Long] <- finishAccessOnFailure(cachedConnectomeFile) { + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/CSR_indptr", 2, agglomerateId) + } ?~> "Could not read offsets from connectome file" + fromPtr <- fromAndToPtr.lift(0).toFox ?~> "Could not read start offset from connectome file" + toPtr <- fromAndToPtr.lift(1).toFox ?~> "Could not read end offset from connectome file" + from <- finishAccessOnFailure(cachedConnectomeFile) { + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/agglomerate_pair_offsets", 1, fromPtr) + }.flatMap(_.headOption.toFox) ?~> "Could not synapses from connectome file" + to <- finishAccessOnFailure(cachedConnectomeFile) { + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/agglomerate_pair_offsets", 1, toPtr) + }.flatMap(_.headOption.toFox) ?~> "Could not synapses from connectome file" + } yield List.range(from, to) + + def synapticPartnerForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long], direction: String)( + implicit ec: ExecutionContext): Fox[List[Long]] = + for { + _ <- Fox.fromBool(direction == "src" || direction == "dst") ?~> s"Invalid synaptic partner direction: $direction" + collection = s"/synapse_to_${direction}_agglomerate" + cachedConnectomeFile <- connectomeFileCache + .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) + .toFox ?~> "connectome.file.open.failed" + agglomerateIds <- Fox.serialCombined(synapseIds) { synapseId: Long => + finishAccessOnFailure(cachedConnectomeFile) { + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(collection, 1, synapseId) + }.flatMap(_.headOption.toFox) + } + } yield agglomerateIds + + def positionsForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long])( + implicit ec: ExecutionContext): Fox[List[List[Long]]] = + for { + cachedConnectomeFile <- connectomeFileCache + .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) + .toFox ?~> "connectome.file.open.failed" + synapsePositions <- Fox.serialCombined(synapseIds) { synapseId: Long => + finishAccessOnFailure(cachedConnectomeFile) { + cachedConnectomeFile.uint64Reader.readMatrixBlockWithOffset("/synapse_positions", 1, 3, synapseId, 0) + }.flatMap(_.headOption.toFox) + } + } yield synapsePositions.map(_.toList) + + def typesForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long])( + implicit ec: ExecutionContext): Fox[SynapseTypesWithLegend] = + for { + cachedConnectomeFile <- connectomeFileCache + .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) + .toFox ?~> "connectome.file.open.failed" + typeNames = typeNamesForSynapsesOrEmpty(connectomeFileKey) + synapseTypes <- Fox.serialCombined(synapseIds) { synapseId: Long => + finishAccessOnFailure(cachedConnectomeFile) { + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/synapse_types", 1, synapseId) + }.flatMap(_.headOption.toFox) + } + } yield SynapseTypesWithLegend(synapseTypes, typeNames) + + private def typeNamesForSynapsesOrEmpty(connectomeFileKey: ConnectomeFileKey): List[String] = { + val typeNamesPath = + Paths.get(s"${connectomeFileKey.attachment.localPath.toString.dropRight(connectomeFileExtension.length)}json") + if (new File(typeNamesPath.toString).exists()) { + JsonHelper.parseFromFileAs[ConnectomeLegend](typeNamesPath, typeNamesPath.getParent) match { + case Full(connectomeLegend) => connectomeLegend.synapse_type_names + case _ => List.empty + } + } else List.empty + } + + private def synapseIdsForDirectedPair(connectomeFileKey: ConnectomeFileKey, + srcAgglomerateId: Long, + dstAgglomerateId: Long)(implicit ec: ExecutionContext): Fox[List[Long]] = + for { + cachedConnectomeFile <- connectomeFileCache + .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) + .toFox ?~> "connectome.file.open.failed" + fromAndToPtr: Array[Long] <- finishAccessOnFailure(cachedConnectomeFile) { + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/CSR_indptr", 2, srcAgglomerateId) + } ?~> "Could not read offsets from connectome file" + fromPtr <- fromAndToPtr.lift(0).toFox ?~> "Could not read start offset from connectome file" + toPtr <- fromAndToPtr.lift(1).toFox ?~> "Could not read end offset from connectome file" + columnValues: Array[Long] <- if (toPtr - fromPtr == 0L) Fox.successful(Array.empty[Long]) + else + finishAccessOnFailure(cachedConnectomeFile) { + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/CSR_indices", (toPtr - fromPtr).toInt, fromPtr) + } ?~> "Could not read agglomerate pairs from connectome file" + columnOffset <- searchSorted(columnValues, dstAgglomerateId).toFox + pairIndex = fromPtr + columnOffset + synapses <- if ((columnOffset >= columnValues.length) || (columnValues(columnOffset) != dstAgglomerateId)) + Fox.successful(List.empty) + else + for { + fromAndTo <- finishAccessOnFailure(cachedConnectomeFile) { + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/agglomerate_pair_offsets", 2, pairIndex) + } + from <- fromAndTo.lift(0).toFox + to <- fromAndTo.lift(1).toFox + } yield List.range(from, to) + } yield synapses + + private def searchSorted(haystack: Array[Long], needle: Long): Box[Int] = + haystack.search(needle) match { + case Found(i) => Full(i) + case InsertionPoint(i) => Full(i) + } + + private def finishAccessOnFailure[T](f: CachedHdf5File)(block: => T)(implicit ec: ExecutionContext): Fox[T] = + tryo { _: Throwable => + f.finishAccess() + } { + block + }.toFox } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala index 3c873cac1b3..08ff8088d53 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala @@ -1,5 +1,29 @@ package com.scalableminds.webknossos.datastore.services.connectome +import com.scalableminds.util.accesscontext.TokenContext +import com.scalableminds.util.tools.Fox import jakarta.inject.Inject -class ZarrConnectomeFileService @Inject()() {} +import scala.concurrent.ExecutionContext + +class ZarrConnectomeFileService @Inject()() { + def mappingNameForConnectomeFile(connectomeFileKey: ConnectomeFileKey)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[String] = ??? + + def synapsesForAgglomerates(connectomeFileKey: ConnectomeFileKey, agglomerateIds: List[Long])( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[List[DirectedSynapseList]] = ??? + + def synapticPartnerForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long], direction: String)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[List[Long]] = ??? + + def positionsForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long])( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[List[List[Long]]] = ??? + + def typesForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long])( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[SynapseTypesWithLegend] = ??? + +} From 08b57fdfa8eb28567d912a73fb88f49b6cacd146 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 25 Jun 2025 13:52:26 +0200 Subject: [PATCH 080/100] hard coded synapse type names for hdf5 --- .../connectome/ConnectomeFileService.scala | 4 ++-- .../Hdf5ConnectomeFileService.scala | 19 +++---------------- 2 files changed, 5 insertions(+), 18 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala index e19a235045a..e5ace72a2c2 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala @@ -172,8 +172,8 @@ class ConnectomeFileService @Inject()(config: DataStoreConfig, .map(_.flatten)) } - def mappingNameForConnectomeFile(connectomeFileKey: ConnectomeFileKey)(implicit ec: ExecutionContext, - tc: TokenContext): Fox[String] = + private def mappingNameForConnectomeFile(connectomeFileKey: ConnectomeFileKey)(implicit ec: ExecutionContext, + tc: TokenContext): Fox[String] = connectomeFileKey.attachment.dataFormat match { case LayerAttachmentDataformat.zarr3 => zarrConnectomeFileService.mappingNameForConnectomeFile(connectomeFileKey) case LayerAttachmentDataformat.hdf5 => hdf5ConnectomeFileService.mappingNameForConnectomeFile(connectomeFileKey) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala index 8bfe8cf71a5..64f3016466d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala @@ -1,11 +1,9 @@ package com.scalableminds.webknossos.datastore.services.connectome import com.scalableminds.util.tools.Box.tryo -import com.scalableminds.util.tools.{Box, Fox, FoxImplicits, Full, JsonHelper} +import com.scalableminds.util.tools.{Box, Fox, FoxImplicits, Full} import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache} -import java.io.File -import java.nio.file.Paths import javax.inject.Inject import scala.collection.Searching.{Found, InsertionPoint} import scala.concurrent.ExecutionContext @@ -13,7 +11,6 @@ import scala.concurrent.ExecutionContext class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { private lazy val connectomeFileCache = new Hdf5FileCache(30) - private val connectomeFileExtension = "hdf5" def mappingNameForConnectomeFile(connectomeFileKey: ConnectomeFileKey)(implicit ec: ExecutionContext): Fox[String] = for { @@ -148,7 +145,8 @@ class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { cachedConnectomeFile <- connectomeFileCache .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) .toFox ?~> "connectome.file.open.failed" - typeNames = typeNamesForSynapsesOrEmpty(connectomeFileKey) + // Hard coded type name list, as all legacy files have this value. + typeNames = List("dendritic-shaft-synapse", "spine-head-synapse", "soma-synapse") synapseTypes <- Fox.serialCombined(synapseIds) { synapseId: Long => finishAccessOnFailure(cachedConnectomeFile) { cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/synapse_types", 1, synapseId) @@ -156,17 +154,6 @@ class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { } } yield SynapseTypesWithLegend(synapseTypes, typeNames) - private def typeNamesForSynapsesOrEmpty(connectomeFileKey: ConnectomeFileKey): List[String] = { - val typeNamesPath = - Paths.get(s"${connectomeFileKey.attachment.localPath.toString.dropRight(connectomeFileExtension.length)}json") - if (new File(typeNamesPath.toString).exists()) { - JsonHelper.parseFromFileAs[ConnectomeLegend](typeNamesPath, typeNamesPath.getParent) match { - case Full(connectomeLegend) => connectomeLegend.synapse_type_names - case _ => List.empty - } - } else List.empty - } - private def synapseIdsForDirectedPair(connectomeFileKey: ConnectomeFileKey, srcAgglomerateId: Long, dstAgglomerateId: Long)(implicit ec: ExecutionContext): Fox[List[Long]] = From 56f8385f71ab16040d978c715c09431a38933f2f Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 25 Jun 2025 14:07:41 +0200 Subject: [PATCH 081/100] Update webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala Co-authored-by: MichaelBuessemeyer <39529669+MichaelBuessemeyer@users.noreply.github.com> --- .../webknossos/datastore/services/BinaryDataServiceHolder.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala index cb14a24e924..3951a07e360 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala @@ -12,7 +12,7 @@ import scala.concurrent.ExecutionContext * There is, however an additional instance for volume tracings in the TracingStore * The TracingStore one (for VolumeTracings) already is a singleton, since the surrounding VolumeTracingService is a singleton. * The DataStore one is singleton-ized via this holder. - * Also, this allows giving the datastore-only sharedChunkContentsCache to the datastore one, while passing None to the tracingstore one. + * Also, this allows giving the datastore-only sharedChunkContentsCache to the DataStore one, while passing None to the TracingStore one. */ class BinaryDataServiceHolder @Inject()(config: DataStoreConfig, From 36f4cf8a09fb658e297dfae4c037cbbc69845512 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 26 Jun 2025 10:03:13 +0200 Subject: [PATCH 082/100] read attributes --- .../ZarrConnectomeFileService.scala | 63 ++++++++++++++++++- 1 file changed, 60 insertions(+), 3 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala index 08ff8088d53..f125d5dd7df 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala @@ -1,14 +1,71 @@ package com.scalableminds.webknossos.datastore.services.connectome import com.scalableminds.util.accesscontext.TokenContext -import com.scalableminds.util.tools.Fox +import com.scalableminds.util.cache.AlfuCache +import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} +import com.scalableminds.webknossos.datastore.datareaders.DatasetArray +import com.scalableminds.webknossos.datastore.services.mesh.{MeshFileAttributes, MeshFileKey} +import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService import jakarta.inject.Inject +import play.api.libs.json.{JsResult, JsValue, Reads} import scala.concurrent.ExecutionContext -class ZarrConnectomeFileService @Inject()() { +case class ConnectomeFileAttributes( + formatVersion: Long, + mappingName: String, + synapseTypeNames: Seq[String] +) + +object ConnectomeFileAttributes { + val FILENAME_ZARR_JSON = "zarr.json" + + implicit object ConnectomeFileAttributesZarr3GroupHeaderReads extends Reads[ConnectomeFileAttributes] { + override def reads(json: JsValue): JsResult[ConnectomeFileAttributes] = { + val keyAttributes = "attributes" + val keyVx = "voxelytics" + val keyFormatVersion = "artifact_schema_version" + val keyArtifactAttrs = "artifact_attributes" + val connectomeFileAttrs = json \ keyAttributes \ keyVx \ keyArtifactAttrs + for { + formatVersion <- (json \ keyAttributes \ keyVx \ keyFormatVersion).validate[Long] + mappingName <- (connectomeFileAttrs \ "mapping_name").validate[String] + synapseTypeNames <- (connectomeFileAttrs \ "synapse_type_names").validate[Seq[String]] + } yield + ConnectomeFileAttributes( + formatVersion, + mappingName, + synapseTypeNames + ) + } + } +} + +class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteSourceDescriptorService) + extends FoxImplicits { + private lazy val openArraysCache = AlfuCache[(ConnectomeFileKey, String), DatasetArray]() + private lazy val attributesCache = AlfuCache[ConnectomeFileKey, ConnectomeFileAttributes]() + + private def readConnectomeFileAttributes(connectomeFileKey: ConnectomeFileKey)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[ConnectomeFileAttributes] = + attributesCache.getOrLoad( + connectomeFileKey, + _ => + for { + groupVaultPath <- remoteSourceDescriptorService.vaultPathFor(connectomeFileKey.attachment) + groupHeaderBytes <- (groupVaultPath / ConnectomeFileAttributes.FILENAME_ZARR_JSON).readBytes() + connectomeFileAttributes <- JsonHelper + .parseAs[ConnectomeFileAttributes](groupHeaderBytes) + .toFox ?~> "Could not parse connectome file attributes from zarr group file" + } yield connectomeFileAttributes + ) + def mappingNameForConnectomeFile(connectomeFileKey: ConnectomeFileKey)(implicit ec: ExecutionContext, - tc: TokenContext): Fox[String] = ??? + tc: TokenContext): Fox[String] = + for { + attributes <- readConnectomeFileAttributes(connectomeFileKey) + } yield attributes.mappingName def synapsesForAgglomerates(connectomeFileKey: ConnectomeFileKey, agglomerateIds: List[Long])( implicit ec: ExecutionContext, From 325d784ab8652db49615d0add1b38c3e718c8fe7 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 26 Jun 2025 10:21:20 +0200 Subject: [PATCH 083/100] move functions used in both hdf5 and zarr case up to ConnectomeFileService. introduce delegateToService helper --- .../connectome/ConnectomeFileService.scala | 129 ++++++++++++------ .../Hdf5ConnectomeFileService.scala | 46 +------ .../ZarrConnectomeFileService.scala | 12 +- 3 files changed, 106 insertions(+), 81 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala index e5ace72a2c2..bd321c9e70f 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala @@ -79,12 +79,6 @@ object ConnectomeFileNameWithMappingName { implicit val jsonFormat: OFormat[ConnectomeFileNameWithMappingName] = Json.format[ConnectomeFileNameWithMappingName] } -case class ConnectomeLegend(synapse_type_names: List[String]) - -object ConnectomeLegend { - implicit val jsonFormat: OFormat[ConnectomeLegend] = Json.format[ConnectomeLegend] -} - case class ConnectomeFileKey(dataSourceId: DataSourceId, layerName: String, attachment: LayerAttachment) class ConnectomeFileService @Inject()(config: DataStoreConfig, @@ -174,56 +168,115 @@ class ConnectomeFileService @Inject()(config: DataStoreConfig, private def mappingNameForConnectomeFile(connectomeFileKey: ConnectomeFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[String] = - connectomeFileKey.attachment.dataFormat match { - case LayerAttachmentDataformat.zarr3 => zarrConnectomeFileService.mappingNameForConnectomeFile(connectomeFileKey) - case LayerAttachmentDataformat.hdf5 => hdf5ConnectomeFileService.mappingNameForConnectomeFile(connectomeFileKey) - case _ => unsupportedDataFormat(connectomeFileKey) - } + delegateToService( + connectomeFileKey, + zarrFn = zarrConnectomeFileService.mappingNameForConnectomeFile(connectomeFileKey), + hdf5Fn = hdf5ConnectomeFileService.mappingNameForConnectomeFile(connectomeFileKey) + ) def synapsesForAgglomerates(connectomeFileKey: ConnectomeFileKey, agglomerateIds: List[Long])( implicit ec: ExecutionContext, tc: TokenContext): Fox[List[DirectedSynapseList]] = - connectomeFileKey.attachment.dataFormat match { - case LayerAttachmentDataformat.zarr3 => - zarrConnectomeFileService.synapsesForAgglomerates(connectomeFileKey, agglomerateIds) - case LayerAttachmentDataformat.hdf5 => - hdf5ConnectomeFileService.synapsesForAgglomerates(connectomeFileKey, agglomerateIds) - case _ => unsupportedDataFormat(connectomeFileKey) + if (agglomerateIds.length == 1) { + for { + agglomerateId <- agglomerateIds.headOption.toFox ?~> "Failed to extract the single agglomerate ID from request" + inSynapses <- ingoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId) ?~> "Failed to read ingoing synapses" + outSynapses <- outgoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId) ?~> "Failed to read outgoing synapses" + } yield List(DirectedSynapseList(inSynapses, outSynapses)) + } else { + val agglomeratePairs = directedPairs(agglomerateIds.toSet.toList) + for { + synapsesPerPair <- Fox.serialCombined(agglomeratePairs)(pair => + synapseIdsForDirectedPair(connectomeFileKey, pair._1, pair._2)) + synapseListsMap = gatherPairSynapseLists(agglomerateIds, agglomeratePairs, synapsesPerPair) + synapseListsOrdered = agglomerateIds.map(id => synapseListsMap(id)) + } yield synapseListsOrdered } + private def directedPairs(items: List[Long]): List[(Long, Long)] = + (for { x <- items; y <- items } yield (x, y)).filter(pair => pair._1 != pair._2) + + private def gatherPairSynapseLists(agglomerateIds: List[Long], + agglomeratePairs: List[(Long, Long)], + synapsesPerPair: List[List[Long]]): collection.Map[Long, DirectedSynapseList] = { + val directedSynapseListsMutable = scala.collection.mutable.Map[Long, DirectedSynapseListMutable]() + agglomerateIds.foreach { agglomerateId => + directedSynapseListsMutable(agglomerateId) = DirectedSynapseListMutable.empty + } + agglomeratePairs.zip(synapsesPerPair).foreach { pairWithSynapses: ((Long, Long), List[Long]) => + val srcAgglomerate = pairWithSynapses._1._1 + val dstAgglomerate = pairWithSynapses._1._2 + directedSynapseListsMutable(srcAgglomerate).out ++= pairWithSynapses._2 + directedSynapseListsMutable(dstAgglomerate).in ++= pairWithSynapses._2 + } + directedSynapseListsMutable.view.mapValues(_.freeze).toMap + } + + private def ingoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[List[Long]] = + delegateToService( + connectomeFileKey, + zarrFn = zarrConnectomeFileService.ingoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId), + hdf5Fn = hdf5ConnectomeFileService.ingoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId) + ) + + private def outgoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[List[Long]] = + delegateToService( + connectomeFileKey, + zarrFn = zarrConnectomeFileService.outgoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId), + hdf5Fn = hdf5ConnectomeFileService.outgoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId) + ) + + private def synapseIdsForDirectedPair( + connectomeFileKey: ConnectomeFileKey, + srcAgglomerateId: Long, + dstAgglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[List[Long]] = + delegateToService( + connectomeFileKey, + zarrFn = + zarrConnectomeFileService.synapseIdsForDirectedPair(connectomeFileKey, srcAgglomerateId, dstAgglomerateId), + hdf5Fn = + hdf5ConnectomeFileService.synapseIdsForDirectedPair(connectomeFileKey, srcAgglomerateId, dstAgglomerateId) + ) + def synapticPartnerForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long], direction: String)( implicit ec: ExecutionContext, tc: TokenContext): Fox[List[Long]] = - connectomeFileKey.attachment.dataFormat match { - case LayerAttachmentDataformat.zarr3 => - zarrConnectomeFileService.synapticPartnerForSynapses(connectomeFileKey, synapseIds, direction) - case LayerAttachmentDataformat.hdf5 => - hdf5ConnectomeFileService.synapticPartnerForSynapses(connectomeFileKey, synapseIds, direction) - case _ => unsupportedDataFormat(connectomeFileKey) - } + delegateToService( + connectomeFileKey, + zarrFn = zarrConnectomeFileService.synapticPartnerForSynapses(connectomeFileKey, synapseIds, direction), + hdf5Fn = hdf5ConnectomeFileService.synapticPartnerForSynapses(connectomeFileKey, synapseIds, direction) + ) def positionsForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long])( implicit ec: ExecutionContext, tc: TokenContext): Fox[List[List[Long]]] = - connectomeFileKey.attachment.dataFormat match { - case LayerAttachmentDataformat.zarr3 => - zarrConnectomeFileService.positionsForSynapses(connectomeFileKey, synapseIds) - case LayerAttachmentDataformat.hdf5 => - hdf5ConnectomeFileService.positionsForSynapses(connectomeFileKey, synapseIds) - case _ => unsupportedDataFormat(connectomeFileKey) - } + delegateToService( + connectomeFileKey, + zarrFn = zarrConnectomeFileService.positionsForSynapses(connectomeFileKey, synapseIds), + hdf5Fn = hdf5ConnectomeFileService.positionsForSynapses(connectomeFileKey, synapseIds) + ) def typesForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long])( implicit ec: ExecutionContext, tc: TokenContext): Fox[SynapseTypesWithLegend] = + delegateToService( + connectomeFileKey, + zarrFn = zarrConnectomeFileService.typesForSynapses(connectomeFileKey, synapseIds), + hdf5Fn = hdf5ConnectomeFileService.typesForSynapses(connectomeFileKey, synapseIds) + ) + + private def delegateToService[A](connectomeFileKey: ConnectomeFileKey, zarrFn: Fox[A], hdf5Fn: Fox[A])( + implicit ec: ExecutionContext): Fox[A] = connectomeFileKey.attachment.dataFormat match { - case LayerAttachmentDataformat.zarr3 => zarrConnectomeFileService.typesForSynapses(connectomeFileKey, synapseIds) - case LayerAttachmentDataformat.hdf5 => hdf5ConnectomeFileService.typesForSynapses(connectomeFileKey, synapseIds) - case _ => unsupportedDataFormat(connectomeFileKey) + case LayerAttachmentDataformat.zarr3 => zarrFn + case LayerAttachmentDataformat.hdf5 => hdf5Fn + case _ => + Fox.failure( + s"Trying to load connectome file with unsupported data format ${connectomeFileKey.attachment.dataFormat}") } - private def unsupportedDataFormat(connectomeFileKey: ConnectomeFileKey)(implicit ec: ExecutionContext) = - Fox.failure( - s"Trying to load connectome file with unsupported data format ${connectomeFileKey.attachment.dataFormat}") - } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala index 64f3016466d..d995831139f 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala @@ -23,44 +23,7 @@ class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { _ = cachedConnectomeFile.finishAccess() } yield mappingName - def synapsesForAgglomerates(connectomeFileKey: ConnectomeFileKey, agglomerateIds: List[Long])( - implicit ec: ExecutionContext): Fox[List[DirectedSynapseList]] = - if (agglomerateIds.length == 1) { - for { - agglomerateId <- agglomerateIds.headOption.toFox ?~> "Failed to extract the single agglomerate ID from request" - inSynapses <- ingoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId) ?~> "Failed to read ingoing synapses" - outSynapses <- outgoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId) ?~> "Failed to read outgoing synapses" - } yield List(DirectedSynapseList(inSynapses, outSynapses)) - } else { - val agglomeratePairs = directedPairs(agglomerateIds.toSet.toList) - for { - synapsesPerPair <- Fox.serialCombined(agglomeratePairs)(pair => - synapseIdsForDirectedPair(connectomeFileKey, pair._1, pair._2)) - synapseListsMap = gatherPairSynapseLists(agglomerateIds, agglomeratePairs, synapsesPerPair) - synapseListsOrdered = agglomerateIds.map(id => synapseListsMap(id)) - } yield synapseListsOrdered - } - - private def directedPairs(items: List[Long]): List[(Long, Long)] = - (for { x <- items; y <- items } yield (x, y)).filter(pair => pair._1 != pair._2) - - private def gatherPairSynapseLists(agglomerateIds: List[Long], - agglomeratePairs: List[(Long, Long)], - synapsesPerPair: List[List[Long]]): collection.Map[Long, DirectedSynapseList] = { - val directedSynapseListsMutable = scala.collection.mutable.Map[Long, DirectedSynapseListMutable]() - agglomerateIds.foreach { agglomerateId => - directedSynapseListsMutable(agglomerateId) = DirectedSynapseListMutable.empty - } - agglomeratePairs.zip(synapsesPerPair).foreach { pairWithSynapses: ((Long, Long), List[Long]) => - val srcAgglomerate = pairWithSynapses._1._1 - val dstAgglomerate = pairWithSynapses._1._2 - directedSynapseListsMutable(srcAgglomerate).out ++= pairWithSynapses._2 - directedSynapseListsMutable(dstAgglomerate).in ++= pairWithSynapses._2 - } - directedSynapseListsMutable.view.mapValues(_.freeze).toMap - } - - private def ingoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( + def ingoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( implicit ec: ExecutionContext): Fox[List[Long]] = for { cachedConnectomeFile <- connectomeFileCache @@ -92,7 +55,7 @@ class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { _ = cachedConnectomeFile.finishAccess() } yield synapseIdsNested.flatten - private def outgoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( + def outgoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( implicit ec: ExecutionContext): Fox[List[Long]] = for { cachedConnectomeFile <- connectomeFileCache @@ -154,9 +117,8 @@ class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { } } yield SynapseTypesWithLegend(synapseTypes, typeNames) - private def synapseIdsForDirectedPair(connectomeFileKey: ConnectomeFileKey, - srcAgglomerateId: Long, - dstAgglomerateId: Long)(implicit ec: ExecutionContext): Fox[List[Long]] = + def synapseIdsForDirectedPair(connectomeFileKey: ConnectomeFileKey, srcAgglomerateId: Long, dstAgglomerateId: Long)( + implicit ec: ExecutionContext): Fox[List[Long]] = for { cachedConnectomeFile <- connectomeFileCache .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala index f125d5dd7df..9b6e6cd32b3 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala @@ -4,7 +4,6 @@ import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} import com.scalableminds.webknossos.datastore.datareaders.DatasetArray -import com.scalableminds.webknossos.datastore.services.mesh.{MeshFileAttributes, MeshFileKey} import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService import jakarta.inject.Inject import play.api.libs.json.{JsResult, JsValue, Reads} @@ -83,4 +82,15 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS implicit ec: ExecutionContext, tc: TokenContext): Fox[SynapseTypesWithLegend] = ??? + def ingoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[List[Long]] = ??? + + def outgoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[List[Long]] = ??? + + def synapseIdsForDirectedPair(connectomeFileKey: ConnectomeFileKey, srcAgglomerateId: Long, dstAgglomerateId: Long)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[List[Long]] = ??? } From e50dc665cd6f59cbeecbce11c02b80dded449909 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 26 Jun 2025 13:56:00 +0200 Subject: [PATCH 084/100] implement first functions in zarr connectome file service --- .../connectome/ConnectomeFileService.scala | 28 +++---- .../Hdf5ConnectomeFileService.scala | 11 +-- .../ZarrConnectomeFileService.scala | 84 +++++++++++++++++-- 3 files changed, 96 insertions(+), 27 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala index bd321c9e70f..b5c8525e197 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala @@ -25,7 +25,7 @@ import scala.concurrent.ExecutionContext case class ByAgglomerateIdsRequest( connectomeFile: String, - agglomerateIds: List[Long] + agglomerateIds: Seq[Long] ) object ByAgglomerateIdsRequest { @@ -42,8 +42,8 @@ object BySynapseIdsRequest { } case class DirectedSynapseList( - in: List[Long], - out: List[Long] + in: Seq[Long], + out: Seq[Long] ) object DirectedSynapseList { @@ -174,9 +174,9 @@ class ConnectomeFileService @Inject()(config: DataStoreConfig, hdf5Fn = hdf5ConnectomeFileService.mappingNameForConnectomeFile(connectomeFileKey) ) - def synapsesForAgglomerates(connectomeFileKey: ConnectomeFileKey, agglomerateIds: List[Long])( + def synapsesForAgglomerates(connectomeFileKey: ConnectomeFileKey, agglomerateIds: Seq[Long])( implicit ec: ExecutionContext, - tc: TokenContext): Fox[List[DirectedSynapseList]] = + tc: TokenContext): Fox[Seq[DirectedSynapseList]] = if (agglomerateIds.length == 1) { for { agglomerateId <- agglomerateIds.headOption.toFox ?~> "Failed to extract the single agglomerate ID from request" @@ -184,7 +184,7 @@ class ConnectomeFileService @Inject()(config: DataStoreConfig, outSynapses <- outgoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId) ?~> "Failed to read outgoing synapses" } yield List(DirectedSynapseList(inSynapses, outSynapses)) } else { - val agglomeratePairs = directedPairs(agglomerateIds.toSet.toList) + val agglomeratePairs = directedPairs(agglomerateIds.toSet.toSeq) for { synapsesPerPair <- Fox.serialCombined(agglomeratePairs)(pair => synapseIdsForDirectedPair(connectomeFileKey, pair._1, pair._2)) @@ -193,17 +193,17 @@ class ConnectomeFileService @Inject()(config: DataStoreConfig, } yield synapseListsOrdered } - private def directedPairs(items: List[Long]): List[(Long, Long)] = + private def directedPairs(items: Seq[Long]): Seq[(Long, Long)] = (for { x <- items; y <- items } yield (x, y)).filter(pair => pair._1 != pair._2) - private def gatherPairSynapseLists(agglomerateIds: List[Long], - agglomeratePairs: List[(Long, Long)], - synapsesPerPair: List[List[Long]]): collection.Map[Long, DirectedSynapseList] = { + private def gatherPairSynapseLists(agglomerateIds: Seq[Long], + agglomeratePairs: Seq[(Long, Long)], + synapsesPerPair: List[Seq[Long]]): collection.Map[Long, DirectedSynapseList] = { val directedSynapseListsMutable = scala.collection.mutable.Map[Long, DirectedSynapseListMutable]() agglomerateIds.foreach { agglomerateId => directedSynapseListsMutable(agglomerateId) = DirectedSynapseListMutable.empty } - agglomeratePairs.zip(synapsesPerPair).foreach { pairWithSynapses: ((Long, Long), List[Long]) => + agglomeratePairs.zip(synapsesPerPair).foreach { pairWithSynapses: ((Long, Long), Seq[Long]) => val srcAgglomerate = pairWithSynapses._1._1 val dstAgglomerate = pairWithSynapses._1._2 directedSynapseListsMutable(srcAgglomerate).out ++= pairWithSynapses._2 @@ -214,7 +214,7 @@ class ConnectomeFileService @Inject()(config: DataStoreConfig, private def ingoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( implicit ec: ExecutionContext, - tc: TokenContext): Fox[List[Long]] = + tc: TokenContext): Fox[Seq[Long]] = delegateToService( connectomeFileKey, zarrFn = zarrConnectomeFileService.ingoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId), @@ -223,7 +223,7 @@ class ConnectomeFileService @Inject()(config: DataStoreConfig, private def outgoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( implicit ec: ExecutionContext, - tc: TokenContext): Fox[List[Long]] = + tc: TokenContext): Fox[Seq[Long]] = delegateToService( connectomeFileKey, zarrFn = zarrConnectomeFileService.outgoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId), @@ -233,7 +233,7 @@ class ConnectomeFileService @Inject()(config: DataStoreConfig, private def synapseIdsForDirectedPair( connectomeFileKey: ConnectomeFileKey, srcAgglomerateId: Long, - dstAgglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[List[Long]] = + dstAgglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = delegateToService( connectomeFileKey, zarrFn = diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala index d995831139f..79f41a12701 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala @@ -24,7 +24,7 @@ class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { } yield mappingName def ingoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( - implicit ec: ExecutionContext): Fox[List[Long]] = + implicit ec: ExecutionContext): Fox[Seq[Long]] = for { cachedConnectomeFile <- connectomeFileCache .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) @@ -56,7 +56,7 @@ class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { } yield synapseIdsNested.flatten def outgoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( - implicit ec: ExecutionContext): Fox[List[Long]] = + implicit ec: ExecutionContext): Fox[Seq[Long]] = for { cachedConnectomeFile <- connectomeFileCache .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) @@ -72,7 +72,7 @@ class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { to <- finishAccessOnFailure(cachedConnectomeFile) { cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/agglomerate_pair_offsets", 1, toPtr) }.flatMap(_.headOption.toFox) ?~> "Could not synapses from connectome file" - } yield List.range(from, to) + } yield Seq.range(from, to) def synapticPartnerForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long], direction: String)( implicit ec: ExecutionContext): Fox[List[Long]] = @@ -118,7 +118,7 @@ class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { } yield SynapseTypesWithLegend(synapseTypes, typeNames) def synapseIdsForDirectedPair(connectomeFileKey: ConnectomeFileKey, srcAgglomerateId: Long, dstAgglomerateId: Long)( - implicit ec: ExecutionContext): Fox[List[Long]] = + implicit ec: ExecutionContext): Fox[Seq[Long]] = for { cachedConnectomeFile <- connectomeFileCache .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) @@ -144,9 +144,10 @@ class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { } from <- fromAndTo.lift(0).toFox to <- fromAndTo.lift(1).toFox - } yield List.range(from, to) + } yield Seq.range(from, to) } yield synapses + // TODO move to utils? private def searchSorted(haystack: Array[Long], needle: Long): Box[Int] = haystack.search(needle) match { case Found(i) => Full(i) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala index 9b6e6cd32b3..1a3fcb37510 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala @@ -2,12 +2,17 @@ package com.scalableminds.webknossos.datastore.services.connectome import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.cache.AlfuCache -import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} +import com.scalableminds.util.tools.Box.tryo +import com.scalableminds.util.tools.{Box, Fox, FoxImplicits, Full, JsonHelper} import com.scalableminds.webknossos.datastore.datareaders.DatasetArray +import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array +import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId +import com.scalableminds.webknossos.datastore.services.ChunkCacheService import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService import jakarta.inject.Inject import play.api.libs.json.{JsResult, JsValue, Reads} +import scala.collection.Searching.{Found, InsertionPoint} import scala.concurrent.ExecutionContext case class ConnectomeFileAttributes( @@ -40,11 +45,17 @@ object ConnectomeFileAttributes { } } -class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteSourceDescriptorService) +class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteSourceDescriptorService, + chunkCacheService: ChunkCacheService) extends FoxImplicits { private lazy val openArraysCache = AlfuCache[(ConnectomeFileKey, String), DatasetArray]() private lazy val attributesCache = AlfuCache[ConnectomeFileKey, ConnectomeFileAttributes]() + private val keyCsrIndptr = "CSR_indptr" + private val keyCsrIndices = "CSR_indices" + private val keyAgglomeratePairOffsets = "agglomerate_pair_offsets" + private val keyCscAgglomeratePair = "CSC_agglomerate_pair" + private def readConnectomeFileAttributes(connectomeFileKey: ConnectomeFileKey)( implicit ec: ExecutionContext, tc: TokenContext): Fox[ConnectomeFileAttributes] = @@ -66,10 +77,6 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS attributes <- readConnectomeFileAttributes(connectomeFileKey) } yield attributes.mappingName - def synapsesForAgglomerates(connectomeFileKey: ConnectomeFileKey, agglomerateIds: List[Long])( - implicit ec: ExecutionContext, - tc: TokenContext): Fox[List[DirectedSynapseList]] = ??? - def synapticPartnerForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long], direction: String)( implicit ec: ExecutionContext, tc: TokenContext): Fox[List[Long]] = ??? @@ -84,7 +91,24 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS def ingoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( implicit ec: ExecutionContext, - tc: TokenContext): Fox[List[Long]] = ??? + tc: TokenContext): Fox[List[Long]] = + for { + csrIndptrArray <- openZarrArray(connectomeFileKey, keyCsrIndptr) + agglomeratePairOffsetsArray <- openZarrArray(connectomeFileKey, keyAgglomeratePairOffsets) + cscAgglomeratePairArray <- openZarrArray(connectomeFileKey, keyCscAgglomeratePair) + fromAndToPtr <- csrIndptrArray.readAsMultiArray(offset = agglomerateId, shape = 2) + fromPtr <- tryo(fromAndToPtr.getLong(0)).toFox + toPtr <- tryo(fromAndToPtr.getLong(1)).toFox + agglomeratePairsMA <- cscAgglomeratePairArray.readAsMultiArray(offset = fromPtr, shape = (toPtr - fromPtr).toInt) + agglomeratePairs <- tryo(agglomeratePairsMA.getStorage.asInstanceOf[Array[Long]]).toFox + synapseIdsNested <- Fox.serialCombined(agglomeratePairs.toList) { agglomeratePair: Long => + for { + fromTo <- agglomeratePairOffsetsArray.readAsMultiArray(offset = agglomeratePair, shape = 2) + from <- tryo(fromTo.getLong(0)).toFox + to <- tryo(fromTo.getLong(1)).toFox + } yield Seq.range(from, to) + } + } yield synapseIdsNested.flatten def outgoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( implicit ec: ExecutionContext, @@ -92,5 +116,49 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS def synapseIdsForDirectedPair(connectomeFileKey: ConnectomeFileKey, srcAgglomerateId: Long, dstAgglomerateId: Long)( implicit ec: ExecutionContext, - tc: TokenContext): Fox[List[Long]] = ??? + tc: TokenContext): Fox[Seq[Long]] = + for { + csrIndptrArray <- openZarrArray(connectomeFileKey, keyCsrIndptr) + csrIndicesArray <- openZarrArray(connectomeFileKey, keyCsrIndices) + fromAndToPtr <- csrIndptrArray.readAsMultiArray(offset = srcAgglomerateId, shape = 2) + fromPtr <- tryo(fromAndToPtr.getLong(0)).toFox + toPtr <- tryo(fromAndToPtr.getLong(1)).toFox + columnValuesMA <- csrIndicesArray.readAsMultiArray(offset = fromPtr, shape = (toPtr - fromPtr).toInt) + columnValues: Array[Long] <- tryo(columnValuesMA.getStorage.asInstanceOf[Array[Long]]).toFox + columnOffset <- searchSorted(columnValues, dstAgglomerateId).toFox + pairIndex = fromPtr + columnOffset + synapses <- if ((columnOffset >= columnValues.length) || (columnValues(columnOffset) != dstAgglomerateId)) + Fox.successful(List.empty) + else + for { + agglomeratePairOffsetsArray <- openZarrArray(connectomeFileKey, keyAgglomeratePairOffsets) + fromAndTo <- agglomeratePairOffsetsArray.readAsMultiArray(offset = pairIndex, shape = 2) + from <- tryo(fromAndTo.getLong(0)).toFox + to <- tryo(fromAndTo.getLong(1)).toFox + } yield Seq.range(from, to) + } yield synapses + + // TODO move to utils? + private def searchSorted(haystack: Array[Long], needle: Long): Box[Int] = + haystack.search(needle) match { + case Found(i) => Full(i) + case InsertionPoint(i) => Full(i) + } + + private def openZarrArray(connectomeFileKey: ConnectomeFileKey, + zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext): Fox[DatasetArray] = + openArraysCache.getOrLoad( + (connectomeFileKey, zarrArrayName), + _ => + for { + groupVaultPath <- remoteSourceDescriptorService.vaultPathFor(connectomeFileKey.attachment) + zarrArray <- Zarr3Array.open(groupVaultPath / zarrArrayName, + DataSourceId("dummy", "unused"), + "layer", + None, + None, + None, + chunkCacheService.sharedChunkContentsCache) + } yield zarrArray + ) } From 95a8dabe7857fc05e97d113515cccf53399b1b1e Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 26 Jun 2025 14:31:58 +0200 Subject: [PATCH 085/100] implement remaining features --- .../controllers/DataSourceController.scala | 8 +- .../connectome/ConnectomeFileService.scala | 14 ++-- .../Hdf5ConnectomeFileService.scala | 12 +-- .../connectome/SynapticPartnerDirection.scala | 9 +++ .../ZarrConnectomeFileService.scala | 78 +++++++++++++++---- 5 files changed, 94 insertions(+), 27 deletions(-) create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/SynapticPartnerDirection.scala diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 376866cec15..135f5417f62 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -28,7 +28,8 @@ import com.scalableminds.util.tools.{Box, Empty, Failure, Full} import com.scalableminds.webknossos.datastore.services.connectome.{ ByAgglomerateIdsRequest, BySynapseIdsRequest, - ConnectomeFileService + ConnectomeFileService, + SynapticPartnerDirection } import play.api.data.Form import play.api.data.Forms.{longNumber, nonEmptyText, number, tuple} @@ -549,6 +550,9 @@ class DataSourceController @Inject()( accessTokenService.validateAccessFromTokenContext( UserAccessRequest.readDataSources(DataSourceId(datasetDirectoryName, organizationId))) { for { + directionValidated <- SynapticPartnerDirection + .fromString(direction) + .toFox ?~> "could not parse synaptic partner direction" (dataSource, dataLayer) <- dataSourceRepository.getDataSourceAndDataLayer(organizationId, datasetDirectoryName, dataLayerName) @@ -557,7 +561,7 @@ class DataSourceController @Inject()( request.body.connectomeFile) agglomerateIds <- connectomeFileService.synapticPartnerForSynapses(meshFileKey, request.body.synapseIds, - direction) + directionValidated) } yield Ok(Json.toJson(agglomerateIds)) } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala index b5c8525e197..13437aebfde 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala @@ -12,6 +12,7 @@ import com.scalableminds.webknossos.datastore.models.datasource.{ LayerAttachment, LayerAttachmentDataformat } +import com.scalableminds.webknossos.datastore.services.connectome.SynapticPartnerDirection.SynapticPartnerDirection import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService import com.typesafe.scalalogging.LazyLogging import org.apache.commons.io.FilenameUtils @@ -62,8 +63,8 @@ object DirectedSynapseListMutable { } case class SynapseTypesWithLegend( - synapseTypes: List[Long], - typeToString: List[String], + synapseTypes: Seq[Long], + typeToString: Seq[String], ) object SynapseTypesWithLegend { @@ -242,9 +243,10 @@ class ConnectomeFileService @Inject()(config: DataStoreConfig, hdf5ConnectomeFileService.synapseIdsForDirectedPair(connectomeFileKey, srcAgglomerateId, dstAgglomerateId) ) - def synapticPartnerForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long], direction: String)( - implicit ec: ExecutionContext, - tc: TokenContext): Fox[List[Long]] = + def synapticPartnerForSynapses( + connectomeFileKey: ConnectomeFileKey, + synapseIds: List[Long], + direction: SynapticPartnerDirection)(implicit ec: ExecutionContext, tc: TokenContext): Fox[List[Long]] = delegateToService( connectomeFileKey, zarrFn = zarrConnectomeFileService.synapticPartnerForSynapses(connectomeFileKey, synapseIds, direction), @@ -253,7 +255,7 @@ class ConnectomeFileService @Inject()(config: DataStoreConfig, def positionsForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long])( implicit ec: ExecutionContext, - tc: TokenContext): Fox[List[List[Long]]] = + tc: TokenContext): Fox[Seq[Seq[Long]]] = delegateToService( connectomeFileKey, zarrFn = zarrConnectomeFileService.positionsForSynapses(connectomeFileKey, synapseIds), diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala index 79f41a12701..98c149d6f33 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala @@ -2,6 +2,7 @@ package com.scalableminds.webknossos.datastore.services.connectome import com.scalableminds.util.tools.Box.tryo import com.scalableminds.util.tools.{Box, Fox, FoxImplicits, Full} +import com.scalableminds.webknossos.datastore.services.connectome.SynapticPartnerDirection.SynapticPartnerDirection import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache} import javax.inject.Inject @@ -74,17 +75,18 @@ class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { }.flatMap(_.headOption.toFox) ?~> "Could not synapses from connectome file" } yield Seq.range(from, to) - def synapticPartnerForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long], direction: String)( - implicit ec: ExecutionContext): Fox[List[Long]] = + def synapticPartnerForSynapses(connectomeFileKey: ConnectomeFileKey, + synapseIds: List[Long], + direction: SynapticPartnerDirection)(implicit ec: ExecutionContext): Fox[List[Long]] = for { - _ <- Fox.fromBool(direction == "src" || direction == "dst") ?~> s"Invalid synaptic partner direction: $direction" - collection = s"/synapse_to_${direction}_agglomerate" cachedConnectomeFile <- connectomeFileCache .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) .toFox ?~> "connectome.file.open.failed" agglomerateIds <- Fox.serialCombined(synapseIds) { synapseId: Long => finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(collection, 1, synapseId) + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(s"/synapse_to_${direction.toString}_agglomerate", + 1, + synapseId) }.flatMap(_.headOption.toFox) } } yield agglomerateIds diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/SynapticPartnerDirection.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/SynapticPartnerDirection.scala new file mode 100644 index 00000000000..2696ec25f8d --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/SynapticPartnerDirection.scala @@ -0,0 +1,9 @@ +package com.scalableminds.webknossos.datastore.services.connectome + +import com.scalableminds.util.enumeration.ExtendedEnumeration + +object SynapticPartnerDirection extends ExtendedEnumeration { + type SynapticPartnerDirection = Value + + val src, dst = Value +} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala index 1a3fcb37510..22b75555321 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala @@ -8,6 +8,7 @@ import com.scalableminds.webknossos.datastore.datareaders.DatasetArray import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId import com.scalableminds.webknossos.datastore.services.ChunkCacheService +import com.scalableminds.webknossos.datastore.services.connectome.SynapticPartnerDirection.SynapticPartnerDirection import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService import jakarta.inject.Inject import play.api.libs.json.{JsResult, JsValue, Reads} @@ -55,6 +56,10 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS private val keyCsrIndices = "CSR_indices" private val keyAgglomeratePairOffsets = "agglomerate_pair_offsets" private val keyCscAgglomeratePair = "CSC_agglomerate_pair" + private val keySynapseTypes = "synapse_types" + private val keySynapsePositions = "synapse_positions" + private val keySynapseToSrcAgglomerate = "synapse_to_src_agglomerate" + private val keySynapseToDstAgglomerate = "synapse_to_dst_agglomerate" private def readConnectomeFileAttributes(connectomeFileKey: ConnectomeFileKey)( implicit ec: ExecutionContext, @@ -77,28 +82,59 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS attributes <- readConnectomeFileAttributes(connectomeFileKey) } yield attributes.mappingName - def synapticPartnerForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long], direction: String)( - implicit ec: ExecutionContext, - tc: TokenContext): Fox[List[Long]] = ??? + def synapticPartnerForSynapses( + connectomeFileKey: ConnectomeFileKey, + synapseIds: List[Long], + direction: SynapticPartnerDirection)(implicit ec: ExecutionContext, tc: TokenContext): Fox[List[Long]] = { + val arrayKey = direction match { + case SynapticPartnerDirection.src => keySynapseToSrcAgglomerate + case SynapticPartnerDirection.dst => keySynapseToDstAgglomerate + } + for { + synapseToPartnerAgglomerateArray <- openZarrArray(connectomeFileKey, arrayKey) + agglomerateIds <- Fox.serialCombined(synapseIds) { synapseId: Long => + for { + agglomerateIdMA <- synapseToPartnerAgglomerateArray.readAsMultiArray(offset = synapseId, shape = 1) + agglomerateId <- tryo(agglomerateIdMA.getLong(0)).toFox + } yield agglomerateId + } + } yield agglomerateIds + } def positionsForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long])( implicit ec: ExecutionContext, - tc: TokenContext): Fox[List[List[Long]]] = ??? + tc: TokenContext): Fox[Seq[Seq[Long]]] = + for { + arraySynapsePositions <- openZarrArray(connectomeFileKey, keySynapsePositions) + synapsePositions <- Fox.serialCombined(synapseIds) { synapseId: Long => + for { + synapsePositionMA <- arraySynapsePositions.readAsMultiArray(offset = Array(synapseId, 0), shape = Array(1, 3)) // TODO should offset and shape be transposed? + synapsePosition <- tryo(synapsePositionMA.getStorage.asInstanceOf[Array[Long]].toSeq).toFox + } yield synapsePosition + } + } yield synapsePositions def typesForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long])( implicit ec: ExecutionContext, - tc: TokenContext): Fox[SynapseTypesWithLegend] = ??? + tc: TokenContext): Fox[SynapseTypesWithLegend] = + for { + arraySynapseTypes <- openZarrArray(connectomeFileKey, keySynapseTypes) + attributes <- readConnectomeFileAttributes(connectomeFileKey) + synapseTypes <- Fox.serialCombined(synapseIds) { synapseId: Long => + for { + synapseTypeMA <- arraySynapseTypes.readAsMultiArray(offset = synapseId, shape = 1) + synapseType <- tryo(synapseTypeMA.getLong(0)).toFox + } yield synapseType + } + } yield SynapseTypesWithLegend(synapseTypes, attributes.synapseTypeNames) def ingoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( implicit ec: ExecutionContext, tc: TokenContext): Fox[List[Long]] = for { - csrIndptrArray <- openZarrArray(connectomeFileKey, keyCsrIndptr) + (fromPtr, toPtr) <- getToAndFromPtr(connectomeFileKey, agglomerateId) agglomeratePairOffsetsArray <- openZarrArray(connectomeFileKey, keyAgglomeratePairOffsets) cscAgglomeratePairArray <- openZarrArray(connectomeFileKey, keyCscAgglomeratePair) - fromAndToPtr <- csrIndptrArray.readAsMultiArray(offset = agglomerateId, shape = 2) - fromPtr <- tryo(fromAndToPtr.getLong(0)).toFox - toPtr <- tryo(fromAndToPtr.getLong(1)).toFox agglomeratePairsMA <- cscAgglomeratePairArray.readAsMultiArray(offset = fromPtr, shape = (toPtr - fromPtr).toInt) agglomeratePairs <- tryo(agglomeratePairsMA.getStorage.asInstanceOf[Array[Long]]).toFox synapseIdsNested <- Fox.serialCombined(agglomeratePairs.toList) { agglomeratePair: Long => @@ -110,19 +146,33 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS } } yield synapseIdsNested.flatten + private def getToAndFromPtr(connectomeFileKey: ConnectomeFileKey, + agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[(Long, Long)] = + for { + csrIndptrArray <- openZarrArray(connectomeFileKey, keyCsrIndptr) + fromAndToPtr <- csrIndptrArray.readAsMultiArray(offset = agglomerateId, shape = 2) + fromPtr <- tryo(fromAndToPtr.getLong(0)).toFox + toPtr <- tryo(fromAndToPtr.getLong(1)).toFox + } yield (fromPtr, toPtr) + def outgoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( implicit ec: ExecutionContext, - tc: TokenContext): Fox[List[Long]] = ??? + tc: TokenContext): Fox[Seq[Long]] = + for { + (fromPtr, toPtr) <- getToAndFromPtr(connectomeFileKey, agglomerateId) + agglomeratePairOffsetsArray <- openZarrArray(connectomeFileKey, keyAgglomeratePairOffsets) + fromMA <- agglomeratePairOffsetsArray.readAsMultiArray(offset = fromPtr, shape = 1) + from <- tryo(fromMA.getLong(0)).toFox + toMA <- agglomeratePairOffsetsArray.readAsMultiArray(offset = toPtr, shape = 1) + to <- tryo(toMA.getLong(0)).toFox + } yield Seq.range(from, to) def synapseIdsForDirectedPair(connectomeFileKey: ConnectomeFileKey, srcAgglomerateId: Long, dstAgglomerateId: Long)( implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = for { - csrIndptrArray <- openZarrArray(connectomeFileKey, keyCsrIndptr) csrIndicesArray <- openZarrArray(connectomeFileKey, keyCsrIndices) - fromAndToPtr <- csrIndptrArray.readAsMultiArray(offset = srcAgglomerateId, shape = 2) - fromPtr <- tryo(fromAndToPtr.getLong(0)).toFox - toPtr <- tryo(fromAndToPtr.getLong(1)).toFox + (fromPtr, toPtr) <- getToAndFromPtr(connectomeFileKey, srcAgglomerateId) columnValuesMA <- csrIndicesArray.readAsMultiArray(offset = fromPtr, shape = (toPtr - fromPtr).toInt) columnValues: Array[Long] <- tryo(columnValuesMA.getStorage.asInstanceOf[Array[Long]]).toFox columnOffset <- searchSorted(columnValues, dstAgglomerateId).toFox From 5c390ea167959e004c64e9ff0e5f2944e2a58db6 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 26 Jun 2025 14:40:58 +0200 Subject: [PATCH 086/100] cache clear, cleanup --- .../util/collections/SequenceUtils.scala | 9 ++++ .../connectome/ConnectomeFileService.scala | 13 ++++++ .../Hdf5ConnectomeFileService.scala | 42 ++++++++++--------- .../ZarrConnectomeFileService.scala | 24 ++++++----- 4 files changed, 59 insertions(+), 29 deletions(-) diff --git a/util/src/main/scala/com/scalableminds/util/collections/SequenceUtils.scala b/util/src/main/scala/com/scalableminds/util/collections/SequenceUtils.scala index 1ce0f7ed496..3856deeac5c 100644 --- a/util/src/main/scala/com/scalableminds/util/collections/SequenceUtils.scala +++ b/util/src/main/scala/com/scalableminds/util/collections/SequenceUtils.scala @@ -1,5 +1,7 @@ package com.scalableminds.util.collections +import scala.collection.Searching.{Found, InsertionPoint} + object SequenceUtils { def findUniqueElement[T](list: Seq[T]): Option[T] = { val uniqueElements = list.distinct @@ -51,4 +53,11 @@ object SequenceUtils { val batchTo = Math.min(to, (batchIndex + 1) * batchSize + from - 1) (batchFrom, batchTo) } + + // Search in a sorted array, returns Box of index where element is found or, if missing, where element would be inserted + def searchSorted(haystack: Array[Long], needle: Long): Int = + haystack.search(needle) match { + case Found(i) => i + case InsertionPoint(i) => i + } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala index 13437aebfde..98bec6395e9 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala @@ -271,6 +271,19 @@ class ConnectomeFileService @Inject()(config: DataStoreConfig, hdf5Fn = hdf5ConnectomeFileService.typesForSynapses(connectomeFileKey, synapseIds) ) + def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = { + connectomeFileKeyCache.clear { + case (keyDataSourceId, keyLayerName, _) => + dataSourceId == keyDataSourceId && layerNameOpt.forall(_ == keyLayerName) + } + + val clearedHdf5Count = hdf5ConnectomeFileService.clearCache(dataSourceId, layerNameOpt) + + val clearedZarrCount = zarrConnectomeFileService.clearCache(dataSourceId, layerNameOpt) + + clearedHdf5Count + clearedZarrCount + } + private def delegateToService[A](connectomeFileKey: ConnectomeFileKey, zarrFn: Fox[A], hdf5Fn: Fox[A])( implicit ec: ExecutionContext): Fox[A] = connectomeFileKey.attachment.dataFormat match { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala index 98c149d6f33..7820e40702d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala @@ -1,21 +1,26 @@ package com.scalableminds.webknossos.datastore.services.connectome +import com.scalableminds.util.collections.SequenceUtils import com.scalableminds.util.tools.Box.tryo -import com.scalableminds.util.tools.{Box, Fox, FoxImplicits, Full} +import com.scalableminds.util.tools.{Fox, FoxImplicits} +import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId import com.scalableminds.webknossos.datastore.services.connectome.SynapticPartnerDirection.SynapticPartnerDirection import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache} +import com.scalableminds.webknossos.datastore.DataStoreConfig +import java.nio.file.Paths import javax.inject.Inject -import scala.collection.Searching.{Found, InsertionPoint} import scala.concurrent.ExecutionContext -class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { +class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxImplicits { - private lazy val connectomeFileCache = new Hdf5FileCache(30) + private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) + + private lazy val fileHandleCache = new Hdf5FileCache(30) def mappingNameForConnectomeFile(connectomeFileKey: ConnectomeFileKey)(implicit ec: ExecutionContext): Fox[String] = for { - cachedConnectomeFile <- connectomeFileCache + cachedConnectomeFile <- fileHandleCache .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) .toFox ?~> "connectome.file.open.failed" mappingName <- finishAccessOnFailure(cachedConnectomeFile) { @@ -27,7 +32,7 @@ class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { def ingoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( implicit ec: ExecutionContext): Fox[Seq[Long]] = for { - cachedConnectomeFile <- connectomeFileCache + cachedConnectomeFile <- fileHandleCache .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) .toFox ?~> "connectome.file.open.failed" fromAndToPtr: Array[Long] <- finishAccessOnFailure(cachedConnectomeFile) { @@ -59,7 +64,7 @@ class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { def outgoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( implicit ec: ExecutionContext): Fox[Seq[Long]] = for { - cachedConnectomeFile <- connectomeFileCache + cachedConnectomeFile <- fileHandleCache .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) .toFox ?~> "connectome.file.open.failed" fromAndToPtr: Array[Long] <- finishAccessOnFailure(cachedConnectomeFile) { @@ -79,7 +84,7 @@ class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { synapseIds: List[Long], direction: SynapticPartnerDirection)(implicit ec: ExecutionContext): Fox[List[Long]] = for { - cachedConnectomeFile <- connectomeFileCache + cachedConnectomeFile <- fileHandleCache .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) .toFox ?~> "connectome.file.open.failed" agglomerateIds <- Fox.serialCombined(synapseIds) { synapseId: Long => @@ -94,7 +99,7 @@ class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { def positionsForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long])( implicit ec: ExecutionContext): Fox[List[List[Long]]] = for { - cachedConnectomeFile <- connectomeFileCache + cachedConnectomeFile <- fileHandleCache .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) .toFox ?~> "connectome.file.open.failed" synapsePositions <- Fox.serialCombined(synapseIds) { synapseId: Long => @@ -107,7 +112,7 @@ class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { def typesForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long])( implicit ec: ExecutionContext): Fox[SynapseTypesWithLegend] = for { - cachedConnectomeFile <- connectomeFileCache + cachedConnectomeFile <- fileHandleCache .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) .toFox ?~> "connectome.file.open.failed" // Hard coded type name list, as all legacy files have this value. @@ -122,7 +127,7 @@ class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { def synapseIdsForDirectedPair(connectomeFileKey: ConnectomeFileKey, srcAgglomerateId: Long, dstAgglomerateId: Long)( implicit ec: ExecutionContext): Fox[Seq[Long]] = for { - cachedConnectomeFile <- connectomeFileCache + cachedConnectomeFile <- fileHandleCache .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) .toFox ?~> "connectome.file.open.failed" fromAndToPtr: Array[Long] <- finishAccessOnFailure(cachedConnectomeFile) { @@ -135,7 +140,7 @@ class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { finishAccessOnFailure(cachedConnectomeFile) { cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/CSR_indices", (toPtr - fromPtr).toInt, fromPtr) } ?~> "Could not read agglomerate pairs from connectome file" - columnOffset <- searchSorted(columnValues, dstAgglomerateId).toFox + columnOffset = SequenceUtils.searchSorted(columnValues, dstAgglomerateId) pairIndex = fromPtr + columnOffset synapses <- if ((columnOffset >= columnValues.length) || (columnValues(columnOffset) != dstAgglomerateId)) Fox.successful(List.empty) @@ -149,17 +154,16 @@ class Hdf5ConnectomeFileService @Inject()() extends FoxImplicits { } yield Seq.range(from, to) } yield synapses - // TODO move to utils? - private def searchSorted(haystack: Array[Long], needle: Long): Box[Int] = - haystack.search(needle) match { - case Found(i) => Full(i) - case InsertionPoint(i) => Full(i) - } - private def finishAccessOnFailure[T](f: CachedHdf5File)(block: => T)(implicit ec: ExecutionContext): Fox[T] = tryo { _: Throwable => f.finishAccess() } { block }.toFox + + def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = { + val datasetPath = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) + val relevantPath = layerNameOpt.map(l => datasetPath.resolve(l)).getOrElse(datasetPath) + fileHandleCache.clear(key => key.startsWith(relevantPath.toString)) + } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala index 22b75555321..a0f6249b7f9 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala @@ -2,8 +2,9 @@ package com.scalableminds.webknossos.datastore.services.connectome import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.cache.AlfuCache +import com.scalableminds.util.collections.SequenceUtils import com.scalableminds.util.tools.Box.tryo -import com.scalableminds.util.tools.{Box, Fox, FoxImplicits, Full, JsonHelper} +import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} import com.scalableminds.webknossos.datastore.datareaders.DatasetArray import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId @@ -13,7 +14,6 @@ import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorServ import jakarta.inject.Inject import play.api.libs.json.{JsResult, JsValue, Reads} -import scala.collection.Searching.{Found, InsertionPoint} import scala.concurrent.ExecutionContext case class ConnectomeFileAttributes( @@ -175,7 +175,7 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS (fromPtr, toPtr) <- getToAndFromPtr(connectomeFileKey, srcAgglomerateId) columnValuesMA <- csrIndicesArray.readAsMultiArray(offset = fromPtr, shape = (toPtr - fromPtr).toInt) columnValues: Array[Long] <- tryo(columnValuesMA.getStorage.asInstanceOf[Array[Long]]).toFox - columnOffset <- searchSorted(columnValues, dstAgglomerateId).toFox + columnOffset = SequenceUtils.searchSorted(columnValues, dstAgglomerateId) pairIndex = fromPtr + columnOffset synapses <- if ((columnOffset >= columnValues.length) || (columnValues(columnOffset) != dstAgglomerateId)) Fox.successful(List.empty) @@ -188,13 +188,6 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS } yield Seq.range(from, to) } yield synapses - // TODO move to utils? - private def searchSorted(haystack: Array[Long], needle: Long): Box[Int] = - haystack.search(needle) match { - case Found(i) => Full(i) - case InsertionPoint(i) => Full(i) - } - private def openZarrArray(connectomeFileKey: ConnectomeFileKey, zarrArrayName: String)(implicit ec: ExecutionContext, tc: TokenContext): Fox[DatasetArray] = openArraysCache.getOrLoad( @@ -211,4 +204,15 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS chunkCacheService.sharedChunkContentsCache) } yield zarrArray ) + + def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = { + attributesCache.clear { meshFileKey => + meshFileKey.dataSourceId == dataSourceId && layerNameOpt.forall(meshFileKey.layerName == _) + } + + openArraysCache.clear { + case (meshFileKey, _) => + meshFileKey.dataSourceId == dataSourceId && layerNameOpt.forall(meshFileKey.layerName == _) + } + } } From 67063438b318668d4591d184810e6d34f84152f1 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 26 Jun 2025 14:48:22 +0200 Subject: [PATCH 087/100] changelog --- unreleased_changes/8717.md | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 unreleased_changes/8717.md diff --git a/unreleased_changes/8717.md b/unreleased_changes/8717.md new file mode 100644 index 00000000000..add2acb305b --- /dev/null +++ b/unreleased_changes/8717.md @@ -0,0 +1,2 @@ +### Added +- Connectomes can now also be read from the new zarr3-based format, and from remote object storage. From aed235e1b91550eebc5d8a8581f2a1304a61c42b Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 30 Jun 2025 09:53:58 +0200 Subject: [PATCH 088/100] fix error message --- .../services/segmentindex/ZarrSegmentIndexFileService.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala index 187b055f05b..78a9fc7820f 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala @@ -78,7 +78,7 @@ class ZarrSegmentIndexFileService @Inject()(remoteSourceDescriptorService: Remot groupHeaderBytes <- (groupVaultPath / SegmentIndexFileAttributes.FILENAME_ZARR_JSON).readBytes() segmentIndexFileAttributes <- JsonHelper .parseAs[SegmentIndexFileAttributes](groupHeaderBytes) - .toFox ?~> "Could not parse meshFile attributes from zarr group file" + .toFox ?~> "Could not parse segment index file attributes from zarr group file" } yield segmentIndexFileAttributes def readSegmentIndex(segmentIndexFileKey: SegmentIndexFileKey, From 7761d417a3e643357d29263aa29cdb04e36211ab Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 30 Jun 2025 11:23:10 +0200 Subject: [PATCH 089/100] normalize paths; fix reading connectome file metadata; inline delegateToService --- .../connectome/ConnectomeFileService.scala | 92 ++++++++++--------- .../ZarrConnectomeFileService.scala | 2 +- .../RemoteSourceDescriptorService.scala | 4 +- 3 files changed, 50 insertions(+), 48 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala index 98bec6395e9..df1f621219b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala @@ -169,11 +169,11 @@ class ConnectomeFileService @Inject()(config: DataStoreConfig, private def mappingNameForConnectomeFile(connectomeFileKey: ConnectomeFileKey)(implicit ec: ExecutionContext, tc: TokenContext): Fox[String] = - delegateToService( - connectomeFileKey, - zarrFn = zarrConnectomeFileService.mappingNameForConnectomeFile(connectomeFileKey), - hdf5Fn = hdf5ConnectomeFileService.mappingNameForConnectomeFile(connectomeFileKey) - ) + connectomeFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => zarrConnectomeFileService.mappingNameForConnectomeFile(connectomeFileKey) + case LayerAttachmentDataformat.hdf5 => hdf5ConnectomeFileService.mappingNameForConnectomeFile(connectomeFileKey) + case _ => unsupportedDataFormat(connectomeFileKey) + } def synapsesForAgglomerates(connectomeFileKey: ConnectomeFileKey, agglomerateIds: Seq[Long])( implicit ec: ExecutionContext, @@ -216,60 +216,68 @@ class ConnectomeFileService @Inject()(config: DataStoreConfig, private def ingoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = - delegateToService( - connectomeFileKey, - zarrFn = zarrConnectomeFileService.ingoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId), - hdf5Fn = hdf5ConnectomeFileService.ingoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId) - ) + connectomeFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => + zarrConnectomeFileService.ingoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId) + case LayerAttachmentDataformat.hdf5 => + hdf5ConnectomeFileService.ingoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId) + case _ => unsupportedDataFormat(connectomeFileKey) + } private def outgoingSynapsesForAgglomerate(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long)( implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = - delegateToService( - connectomeFileKey, - zarrFn = zarrConnectomeFileService.outgoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId), - hdf5Fn = hdf5ConnectomeFileService.outgoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId) - ) + connectomeFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => + zarrConnectomeFileService.outgoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId) + case LayerAttachmentDataformat.hdf5 => + hdf5ConnectomeFileService.outgoingSynapsesForAgglomerate(connectomeFileKey, agglomerateId) + case _ => unsupportedDataFormat(connectomeFileKey) + } private def synapseIdsForDirectedPair( connectomeFileKey: ConnectomeFileKey, srcAgglomerateId: Long, dstAgglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = - delegateToService( - connectomeFileKey, - zarrFn = - zarrConnectomeFileService.synapseIdsForDirectedPair(connectomeFileKey, srcAgglomerateId, dstAgglomerateId), - hdf5Fn = + connectomeFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => + zarrConnectomeFileService.synapseIdsForDirectedPair(connectomeFileKey, srcAgglomerateId, dstAgglomerateId) + case LayerAttachmentDataformat.hdf5 => hdf5ConnectomeFileService.synapseIdsForDirectedPair(connectomeFileKey, srcAgglomerateId, dstAgglomerateId) - ) + case _ => unsupportedDataFormat(connectomeFileKey) + } def synapticPartnerForSynapses( connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long], direction: SynapticPartnerDirection)(implicit ec: ExecutionContext, tc: TokenContext): Fox[List[Long]] = - delegateToService( - connectomeFileKey, - zarrFn = zarrConnectomeFileService.synapticPartnerForSynapses(connectomeFileKey, synapseIds, direction), - hdf5Fn = hdf5ConnectomeFileService.synapticPartnerForSynapses(connectomeFileKey, synapseIds, direction) - ) + connectomeFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => + zarrConnectomeFileService.synapticPartnerForSynapses(connectomeFileKey, synapseIds, direction) + case LayerAttachmentDataformat.hdf5 => + hdf5ConnectomeFileService.synapticPartnerForSynapses(connectomeFileKey, synapseIds, direction) + case _ => unsupportedDataFormat(connectomeFileKey) + } def positionsForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long])( implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Seq[Long]]] = - delegateToService( - connectomeFileKey, - zarrFn = zarrConnectomeFileService.positionsForSynapses(connectomeFileKey, synapseIds), - hdf5Fn = hdf5ConnectomeFileService.positionsForSynapses(connectomeFileKey, synapseIds) - ) + connectomeFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => + zarrConnectomeFileService.positionsForSynapses(connectomeFileKey, synapseIds) + case LayerAttachmentDataformat.hdf5 => + hdf5ConnectomeFileService.positionsForSynapses(connectomeFileKey, synapseIds) + case _ => unsupportedDataFormat(connectomeFileKey) + } def typesForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long])( implicit ec: ExecutionContext, tc: TokenContext): Fox[SynapseTypesWithLegend] = - delegateToService( - connectomeFileKey, - zarrFn = zarrConnectomeFileService.typesForSynapses(connectomeFileKey, synapseIds), - hdf5Fn = hdf5ConnectomeFileService.typesForSynapses(connectomeFileKey, synapseIds) - ) + connectomeFileKey.attachment.dataFormat match { + case LayerAttachmentDataformat.zarr3 => zarrConnectomeFileService.typesForSynapses(connectomeFileKey, synapseIds) + case LayerAttachmentDataformat.hdf5 => hdf5ConnectomeFileService.typesForSynapses(connectomeFileKey, synapseIds) + case _ => unsupportedDataFormat(connectomeFileKey) + } def clearCache(dataSourceId: DataSourceId, layerNameOpt: Option[String]): Int = { connectomeFileKeyCache.clear { @@ -284,14 +292,8 @@ class ConnectomeFileService @Inject()(config: DataStoreConfig, clearedHdf5Count + clearedZarrCount } - private def delegateToService[A](connectomeFileKey: ConnectomeFileKey, zarrFn: Fox[A], hdf5Fn: Fox[A])( - implicit ec: ExecutionContext): Fox[A] = - connectomeFileKey.attachment.dataFormat match { - case LayerAttachmentDataformat.zarr3 => zarrFn - case LayerAttachmentDataformat.hdf5 => hdf5Fn - case _ => - Fox.failure( - s"Trying to load connectome file with unsupported data format ${connectomeFileKey.attachment.dataFormat}") - } + private def unsupportedDataFormat(connectomeFileKey: ConnectomeFileKey)(implicit ec: ExecutionContext) = + Fox.failure( + s"Trying to load connectome file with unsupported data format ${connectomeFileKey.attachment.dataFormat}") } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala index a0f6249b7f9..b9cf359262c 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala @@ -34,7 +34,7 @@ object ConnectomeFileAttributes { val connectomeFileAttrs = json \ keyAttributes \ keyVx \ keyArtifactAttrs for { formatVersion <- (json \ keyAttributes \ keyVx \ keyFormatVersion).validate[Long] - mappingName <- (connectomeFileAttrs \ "mapping_name").validate[String] + mappingName <- (connectomeFileAttrs \ "metadata/mapping_name").validate[String] synapseTypeNames <- (connectomeFileAttrs \ "synapse_type_names").validate[Seq[String]] } yield ConnectomeFileAttributes( diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/RemoteSourceDescriptorService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/RemoteSourceDescriptorService.scala index 758a8bbe41f..d3010132b42 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/RemoteSourceDescriptorService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/RemoteSourceDescriptorService.scala @@ -77,8 +77,8 @@ class RemoteSourceDescriptorService @Inject()(dSRemoteWebknossosClient: DSRemote throw new Exception( s"Absolute path $localPath in local file system is not in path whitelist. Consider adding it to datastore.localDirectoryWhitelist") } else { // relative local path, resolve in dataset dir - val pathRelativeToDataset = localDatasetDir.resolve(localPath) - val pathRelativeToLayer = localDatasetDir.resolve(layerName).resolve(localPath) + val pathRelativeToDataset = localDatasetDir.resolve(localPath).normalize + val pathRelativeToLayer = localDatasetDir.resolve(layerName).resolve(localPath).normalize if (pathRelativeToDataset.toFile.exists) { pathRelativeToDataset.toUri } else { From 1fbf7b7a4306f94bc9fc3eb660f291897baad08d Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 30 Jun 2025 13:24:45 +0200 Subject: [PATCH 090/100] correctly look up local fallback hdf5 files --- .../webknossos/datastore/services/AgglomerateService.scala | 6 +++++- .../services/connectome/ConnectomeFileService.scala | 6 +++++- .../datastore/services/mesh/MeshFileService.scala | 6 +++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala index cbd514c30ec..b552830127b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala @@ -98,7 +98,11 @@ class AgglomerateService @Inject()(config: DataStoreConfig, }) localFallbackAttachment = LayerAttachment( mappingName, - localDatasetDir.resolve(dataLayer.name).resolve(localAgglomeratesDir).toUri, + localDatasetDir + .resolve(dataLayer.name) + .resolve(localAgglomeratesDir) + .resolve(mappingName + "." + hdf5AgglomerateFileExtension) + .toUri, LayerAttachmentDataformat.hdf5 ) selectedAttachment = registeredAttachmentNormalized.getOrElse(localFallbackAttachment) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala index df1f621219b..3f8ed692322 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileService.scala @@ -118,7 +118,11 @@ class ConnectomeFileService @Inject()(config: DataStoreConfig, }) localFallbackAttachment = LayerAttachment( connectomeFileName, - localDatasetDir.resolve(dataLayer.name).resolve(localConnectomesDir).toUri, + localDatasetDir + .resolve(dataLayer.name) + .resolve(localConnectomesDir) + .resolve(connectomeFileName + "." + hdf5ConnectomeFileExtension) + .toUri, LayerAttachmentDataformat.hdf5 ) selectedAttachment = registeredAttachmentNormalized.getOrElse(localFallbackAttachment) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala index f0141be5621..116862ba04e 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileService.scala @@ -100,7 +100,11 @@ class MeshFileService @Inject()(config: DataStoreConfig, }) localFallbackAttachment = LayerAttachment( meshFileName, - localDatasetDir.resolve(dataLayer.name).resolve(localMeshesDir).toUri, + localDatasetDir + .resolve(dataLayer.name) + .resolve(localMeshesDir) + .resolve(meshFileName + "." + hdf5MeshFileExtension) + .toUri, LayerAttachmentDataformat.hdf5 ) selectedAttachment = registeredAttachmentNormalized.getOrElse(localFallbackAttachment) From f3f507779863d04bab2df524edb47d9d869832af Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 30 Jun 2025 14:23:56 +0200 Subject: [PATCH 091/100] read correct arrays; fix returning fill value when whole shard is missing --- .../datastore/datareaders/ChunkReader.scala | 11 ++++++++--- .../datastore/datareaders/DatasetArray.scala | 13 +++++++++---- .../datareaders/zarr3/Zarr3Array.scala | 2 +- .../connectome/ZarrConnectomeFileService.scala | 17 ++++++++++------- 4 files changed, 28 insertions(+), 15 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkReader.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkReader.scala index 30ca3d16668..3fa673aa530 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkReader.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/ChunkReader.scala @@ -25,17 +25,22 @@ class ChunkReader(header: DatasetHeader) extends FoxImplicits { typed <- chunkBytesAndShapeBox.map(_._1) match { case Full(chunkBytes) if useSkipTypingShortcut => shortcutChunkTyper.wrapAndType(chunkBytes, chunkShape).toFox ?~> "chunk.shortcutWrapAndType.failed" - case Empty if useSkipTypingShortcut => - shortcutChunkTyper.createFromFillValueCached(chunkShape) ?~> "chunk.shortcutCreateFromFillValue.failed" case Full(chunkBytes) => chunkTyper.wrapAndType(chunkBytes, chunkShape).toFox ?~> "chunk.wrapAndType.failed" case Empty => - chunkTyper.createFromFillValueCached(chunkShape) ?~> "chunk.createFromFillValue.failed" + createFromFillValue(chunkShape, useSkipTypingShortcut) case f: Failure => f.toFox ?~> s"Reading chunk at $path failed" } } yield typed + def createFromFillValue(chunkShape: Array[Int], useSkipTypingShortcut: Boolean)( + implicit ec: ExecutionContext): Fox[MultiArray] = + if (useSkipTypingShortcut) + shortcutChunkTyper.createFromFillValueCached(chunkShape) ?~> "chunk.shortcutCreateFromFillValue.failed" + else + chunkTyper.createFromFillValueCached(chunkShape) ?~> "chunk.createFromFillValue.failed" + // Returns bytes (optional, Fox.empty may later be replaced with fill value) // and chunk shape (optional, only for data formats where each chunk reports its own shape, e.g. N5) protected def readChunkBytesAndShape(path: VaultPath, range: Option[NumericRange[Long]])( diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala index e8155da355a..53be6aa9b5e 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/DatasetArray.scala @@ -3,7 +3,7 @@ package com.scalableminds.webknossos.datastore.datareaders import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.geometry.Vec3Int -import com.scalableminds.util.tools.{Fox, FoxImplicits} +import com.scalableminds.util.tools.{Empty, Failure, Fox, FoxImplicits, Full} import com.scalableminds.webknossos.datastore.datavault.VaultPath import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId import com.scalableminds.webknossos.datastore.models.AdditionalCoordinate @@ -248,10 +248,15 @@ class DatasetArray(vaultPath: VaultPath, implicit ec: ExecutionContext, tc: TokenContext): Fox[MultiArray] = if (header.isSharded) { + val chunkShape = chunkShapeAtIndex(chunkIndex) for { - (shardPath, chunkRange) <- getShardedChunkPathAndRange(chunkIndex) ?~> "chunk.getShardedPathAndRange.failed" - chunkShape = chunkShapeAtIndex(chunkIndex) - multiArray <- chunkReader.read(shardPath, chunkShape, Some(chunkRange), useSkipTypingShortcut) + shardPathAndChunkRangeBox <- getShardedChunkPathAndRange(chunkIndex).shiftBox + multiArray <- shardPathAndChunkRangeBox match { + case Full((shardPath, chunkRange)) => + chunkReader.read(shardPath, chunkShape, Some(chunkRange), useSkipTypingShortcut) + case Empty => chunkReader.createFromFillValue(chunkShape, useSkipTypingShortcut) + case f: Failure => f.toFox + } } yield multiArray } else { val chunkPath = vaultPath / getChunkFilename(chunkIndex) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala index 55b1bef42ea..7638f2eb5bb 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datareaders/zarr3/Zarr3Array.scala @@ -120,7 +120,7 @@ class Zarr3Array(vaultPath: VaultPath, private def readAndParseShardIndex(shardPath: VaultPath)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Array[(Long, Long)]] = for { - shardIndexRaw <- readShardIndex(shardPath) ?~> "zarr.readShardIndex.failed" + shardIndexRaw <- readShardIndex(shardPath) ?=> "zarr.readShardIndex.failed" parsed = parseShardIndex(shardIndexRaw) } yield parsed diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala index b9cf359262c..06207fa1165 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala @@ -53,6 +53,7 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS private lazy val attributesCache = AlfuCache[ConnectomeFileKey, ConnectomeFileAttributes]() private val keyCsrIndptr = "CSR_indptr" + private val keyCscIndptr = "CSC_indptr" private val keyCsrIndices = "CSR_indices" private val keyAgglomeratePairOffsets = "agglomerate_pair_offsets" private val keyCscAgglomeratePair = "CSC_agglomerate_pair" @@ -109,7 +110,8 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS synapsePositions <- Fox.serialCombined(synapseIds) { synapseId: Long => for { synapsePositionMA <- arraySynapsePositions.readAsMultiArray(offset = Array(synapseId, 0), shape = Array(1, 3)) // TODO should offset and shape be transposed? - synapsePosition <- tryo(synapsePositionMA.getStorage.asInstanceOf[Array[Long]].toSeq).toFox + synapsePosition <- tryo( + Seq(synapsePositionMA.getLong(0), synapsePositionMA.getLong(1), synapsePositionMA.getLong(2))).toFox } yield synapsePosition } } yield synapsePositions @@ -132,7 +134,7 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS implicit ec: ExecutionContext, tc: TokenContext): Fox[List[Long]] = for { - (fromPtr, toPtr) <- getToAndFromPtr(connectomeFileKey, agglomerateId) + (fromPtr, toPtr) <- getToAndFromPtr(connectomeFileKey, agglomerateId, keyCscIndptr) agglomeratePairOffsetsArray <- openZarrArray(connectomeFileKey, keyAgglomeratePairOffsets) cscAgglomeratePairArray <- openZarrArray(connectomeFileKey, keyCscAgglomeratePair) agglomeratePairsMA <- cscAgglomeratePairArray.readAsMultiArray(offset = fromPtr, shape = (toPtr - fromPtr).toInt) @@ -146,10 +148,11 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS } } yield synapseIdsNested.flatten - private def getToAndFromPtr(connectomeFileKey: ConnectomeFileKey, - agglomerateId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[(Long, Long)] = + private def getToAndFromPtr(connectomeFileKey: ConnectomeFileKey, agglomerateId: Long, arrayKey: String)( + implicit ec: ExecutionContext, + tc: TokenContext): Fox[(Long, Long)] = for { - csrIndptrArray <- openZarrArray(connectomeFileKey, keyCsrIndptr) + csrIndptrArray <- openZarrArray(connectomeFileKey, arrayKey) fromAndToPtr <- csrIndptrArray.readAsMultiArray(offset = agglomerateId, shape = 2) fromPtr <- tryo(fromAndToPtr.getLong(0)).toFox toPtr <- tryo(fromAndToPtr.getLong(1)).toFox @@ -159,7 +162,7 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS implicit ec: ExecutionContext, tc: TokenContext): Fox[Seq[Long]] = for { - (fromPtr, toPtr) <- getToAndFromPtr(connectomeFileKey, agglomerateId) + (fromPtr, toPtr) <- getToAndFromPtr(connectomeFileKey, agglomerateId, keyCsrIndptr) agglomeratePairOffsetsArray <- openZarrArray(connectomeFileKey, keyAgglomeratePairOffsets) fromMA <- agglomeratePairOffsetsArray.readAsMultiArray(offset = fromPtr, shape = 1) from <- tryo(fromMA.getLong(0)).toFox @@ -172,7 +175,7 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS tc: TokenContext): Fox[Seq[Long]] = for { csrIndicesArray <- openZarrArray(connectomeFileKey, keyCsrIndices) - (fromPtr, toPtr) <- getToAndFromPtr(connectomeFileKey, srcAgglomerateId) + (fromPtr, toPtr) <- getToAndFromPtr(connectomeFileKey, srcAgglomerateId, keyCsrIndptr) columnValuesMA <- csrIndicesArray.readAsMultiArray(offset = fromPtr, shape = (toPtr - fromPtr).toInt) columnValues: Array[Long] <- tryo(columnValuesMA.getStorage.asInstanceOf[Array[Long]]).toFox columnOffset = SequenceUtils.searchSorted(columnValues, dstAgglomerateId) From 661b258678187ce5aab76fae8815fb5b67355c9b Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 2 Jul 2025 11:25:35 +0200 Subject: [PATCH 092/100] implement pr feedback; extract common string values to traits --- conf/messages | 2 + .../datastore/DataStoreModule.scala | 15 ++------ .../controllers/BinaryDataController.scala | 7 +--- .../controllers/DataSourceController.scala | 14 ++----- .../datasource/DatasetLayerAttachments.scala | 2 +- .../services/BinaryDataService.scala | 1 + .../services/BinaryDataServiceHolder.scala | 1 + .../VoxelyticsZarrArtifactUtils.scala | 20 ++++++++++ .../mapping/AgglomerateFileUtils.scala | 13 +++++++ .../{ => mapping}/AgglomerateService.scala | 14 ++----- .../Hdf5AgglomerateService.scala | 38 ++++++------------- .../{ => mapping}/MappingParser.scala | 8 ++-- .../{ => mapping}/MappingService.scala | 2 +- .../ZarrAgglomerateService.scala | 16 +++----- .../services/mesh/AdHocMeshService.scala | 3 +- .../mesh/AdHocMeshServiceHolder.scala | 3 +- .../services/mesh/DSFullMeshService.scala | 1 + .../services/mesh/Hdf5MeshFileService.scala | 15 +++++--- .../services/mesh/MeshFileUtils.scala | 16 ++++++++ .../services/mesh/ZarrMeshFileService.scala | 35 ++++++++--------- .../Hdf5SegmentIndexFileService.scala | 12 +++--- .../SegmentIndexFileService.scala | 4 +- .../segmentindex/SegmentIndexFileUtils.scala | 13 +++++++ .../ZarrSegmentIndexFileService.scala | 34 +++++++---------- .../datastore/storage/Hdf5FileCache.scala | 10 +++-- 25 files changed, 156 insertions(+), 143 deletions(-) create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/VoxelyticsZarrArtifactUtils.scala create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/AgglomerateFileUtils.scala rename webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/{ => mapping}/AgglomerateService.scala (97%) rename webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/{ => mapping}/Hdf5AgglomerateService.scala (93%) rename webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/{ => mapping}/MappingParser.scala (97%) rename webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/{ => mapping}/MappingService.scala (96%) rename webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/{ => mapping}/ZarrAgglomerateService.scala (96%) create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileUtils.scala create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileUtils.scala diff --git a/conf/messages b/conf/messages index 873aeec8038..09231959e10 100644 --- a/conf/messages +++ b/conf/messages @@ -270,6 +270,8 @@ mesh.file.readVersion.failed=Failed to read format version from file “{0}” mesh.file.readMappingName.failed=Failed to read mapping name from mesh file “{0}” mesh.meshFileName.required=Trying to load mesh from mesh file, but mesh file name was not supplied. +segmentIndexFile.notFound=Could not find requested segment index file + task.create.noTasks=Zero tasks were requested task.create.failed=Failed to create Task task.create.limitExceeded=Cannot create more than 1000 tasks in one request. diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala index 85711bbd246..8c429288976 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala @@ -4,18 +4,9 @@ import org.apache.pekko.actor.ActorSystem import com.google.inject.AbstractModule import com.google.inject.name.Names import com.scalableminds.webknossos.datastore.services._ -import com.scalableminds.webknossos.datastore.services.mesh.{ - AdHocMeshServiceHolder, - Hdf5MeshFileService, - MeshFileService, - NeuroglancerPrecomputedMeshFileService, - ZarrMeshFileService -} -import com.scalableminds.webknossos.datastore.services.segmentindex.{ - Hdf5SegmentIndexFileService, - SegmentIndexFileService, - ZarrSegmentIndexFileService -} +import com.scalableminds.webknossos.datastore.services.mapping.{AgglomerateService, Hdf5AgglomerateService, MappingService, ZarrAgglomerateService} +import com.scalableminds.webknossos.datastore.services.mesh.{AdHocMeshServiceHolder, Hdf5MeshFileService, MeshFileService, NeuroglancerPrecomputedMeshFileService, ZarrMeshFileService} +import com.scalableminds.webknossos.datastore.services.segmentindex.{Hdf5SegmentIndexFileService, SegmentIndexFileService, ZarrSegmentIndexFileService} import com.scalableminds.webknossos.datastore.services.uploading.UploadService import com.scalableminds.webknossos.datastore.storage.{DataVaultService, RemoteSourceDescriptorService} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/BinaryDataController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/BinaryDataController.scala index e1fe95fdd5f..2a379bedb6c 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/BinaryDataController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/BinaryDataController.scala @@ -11,16 +11,13 @@ import com.scalableminds.webknossos.datastore.helpers.MissingBucketHeaders import com.scalableminds.webknossos.datastore.image.{ImageCreator, ImageCreatorParameters} import com.scalableminds.webknossos.datastore.models.DataRequestCollection._ import com.scalableminds.webknossos.datastore.models.datasource._ -import com.scalableminds.webknossos.datastore.models.requests.{ - DataServiceDataRequest, - DataServiceMappingRequest, - DataServiceRequestSettings -} +import com.scalableminds.webknossos.datastore.models.requests.{DataServiceDataRequest, DataServiceMappingRequest, DataServiceRequestSettings} import com.scalableminds.webknossos.datastore.models._ import com.scalableminds.webknossos.datastore.services._ import com.scalableminds.webknossos.datastore.services.mesh.{AdHocMeshRequest, AdHocMeshService, AdHocMeshServiceHolder} import com.scalableminds.webknossos.datastore.slacknotification.DSSlackNotificationService import com.scalableminds.util.tools.Box.tryo +import com.scalableminds.webknossos.datastore.services.mapping.MappingService import play.api.i18n.Messages import play.api.libs.json.Json import play.api.mvc.{AnyContent, _} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 3ec909ed7fc..2c06528994d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -5,17 +5,8 @@ import com.scalableminds.util.geometry.Vec3Int import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.Fox import com.scalableminds.webknossos.datastore.ListOfLong.ListOfLong -import com.scalableminds.webknossos.datastore.explore.{ - ExploreRemoteDatasetRequest, - ExploreRemoteDatasetResponse, - ExploreRemoteLayerService -} -import com.scalableminds.webknossos.datastore.helpers.{ - GetMultipleSegmentIndexParameters, - GetSegmentIndexParameters, - SegmentIndexData, - SegmentStatisticsParameters -} +import com.scalableminds.webknossos.datastore.explore.{ExploreRemoteDatasetRequest, ExploreRemoteDatasetResponse, ExploreRemoteLayerService} +import com.scalableminds.webknossos.datastore.helpers.{GetMultipleSegmentIndexParameters, GetSegmentIndexParameters, SegmentIndexData, SegmentStatisticsParameters} import com.scalableminds.webknossos.datastore.models.datasource.inbox.InboxDataSource import com.scalableminds.webknossos.datastore.models.datasource.{DataLayer, DataSource, DataSourceId, GenericDataSource} import com.scalableminds.webknossos.datastore.services._ @@ -25,6 +16,7 @@ import com.scalableminds.webknossos.datastore.services.uploading._ import com.scalableminds.webknossos.datastore.storage.DataVaultService import com.scalableminds.util.tools.Box.tryo import com.scalableminds.util.tools.{Box, Empty, Failure, Full} +import com.scalableminds.webknossos.datastore.services.mapping.AgglomerateService import play.api.data.Form import play.api.data.Forms.{longNumber, nonEmptyText, number, tuple} import play.api.i18n.Messages diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala index 533e530fdc5..8a8138d5e75 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DatasetLayerAttachments.scala @@ -43,7 +43,7 @@ case class LayerAttachment(name: String, def localPath: Path = { if (path.getScheme.nonEmpty && path.getScheme != "file") { throw new Exception( - "Trying to open non-local hdf5 file. Hdf5 files are only supported on the datastore-local file system") + "Trying to open non-local hdf5 file. Hdf5 files are only supported on the datastore-local file system.") } Path.of(path) } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala index 84ac04ccd42..fcabe40096d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataService.scala @@ -14,6 +14,7 @@ import com.typesafe.scalalogging.LazyLogging import com.scalableminds.util.tools.{Box, Empty, Full} import ucar.ma2.{Array => MultiArray} import com.scalableminds.util.tools.Box.tryo +import com.scalableminds.webknossos.datastore.services.mapping.AgglomerateService import java.nio.file.Path import scala.concurrent.ExecutionContext diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala index 3951a07e360..813fd5bc347 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/BinaryDataServiceHolder.scala @@ -2,6 +2,7 @@ package com.scalableminds.webknossos.datastore.services import java.nio.file.Paths import com.scalableminds.webknossos.datastore.DataStoreConfig +import com.scalableminds.webknossos.datastore.services.mapping.AgglomerateService import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService import javax.inject.Inject diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/VoxelyticsZarrArtifactUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/VoxelyticsZarrArtifactUtils.scala new file mode 100644 index 00000000000..f70c67c7fc4 --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/VoxelyticsZarrArtifactUtils.scala @@ -0,0 +1,20 @@ +package com.scalableminds.webknossos.datastore.services + +import play.api.libs.json.{JsLookupResult, JsResult, JsValue} + +trait VoxelyticsZarrArtifactUtils { + + val FILENAME_ZARR_JSON = "zarr.json" + + private val keyAttributes = "attributes" + private val keyVx = "voxelytics" + private val keyFormatVersion = "artifact_schema_version" + private val keyArtifactAttrs = "artifact_attributes" + + protected def readArtifactSchemaVersion(zarrGroupJson: JsValue): JsResult[Long] = + (zarrGroupJson \ keyAttributes \ keyVx \ keyFormatVersion).validate[Long] + + protected def lookUpArtifactAttributes(zarrGroupJson: JsValue): JsLookupResult = + zarrGroupJson \ keyAttributes \ keyVx \ keyArtifactAttrs + +} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/AgglomerateFileUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/AgglomerateFileUtils.scala new file mode 100644 index 00000000000..8ec9ab6e61b --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/AgglomerateFileUtils.scala @@ -0,0 +1,13 @@ +package com.scalableminds.webknossos.datastore.services.mapping + +trait AgglomerateFileUtils { + + protected val keySegmentToAgglomerate = "segment_to_agglomerate" + protected val keyAgglomerateToSegmentsOffsets = "agglomerate_to_segments_offsets" + protected val keyAgglomerateToSegments = "agglomerate_to_segments" + protected val keyAgglomerateToPositions = "agglomerate_to_positions" + protected val keyAgglomerateToEdges = "agglomerate_to_edges" + protected val keyAgglomerateToEdgesOffsets = "agglomerate_to_edges_offsets" + protected val keyAgglomerateToAffinities = "agglomerate_to_affinities" + +} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/AgglomerateService.scala similarity index 97% rename from webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala rename to webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/AgglomerateService.scala index b552830127b..51c2864f674 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/AgglomerateService.scala @@ -1,25 +1,19 @@ -package com.scalableminds.webknossos.datastore.services +package com.scalableminds.webknossos.datastore.services.mapping import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.geometry.Vec3Int import com.scalableminds.util.io.PathUtils import com.scalableminds.util.time.Instant -import com.scalableminds.util.tools.{Fox, FoxImplicits} +import com.scalableminds.util.tools.Box.tryo +import com.scalableminds.util.tools.{Box, Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.AgglomerateGraph.AgglomerateGraph import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.SkeletonTracing.SkeletonTracing -import com.scalableminds.webknossos.datastore.models.datasource.{ - DataLayer, - DataSourceId, - LayerAttachment, - LayerAttachmentDataformat -} +import com.scalableminds.webknossos.datastore.models.datasource.{DataLayer, DataSourceId, LayerAttachment, LayerAttachmentDataformat} import com.scalableminds.webknossos.datastore.models.requests.DataServiceDataRequest import com.scalableminds.webknossos.datastore.storage.{AgglomerateFileKey, RemoteSourceDescriptorService} import com.typesafe.scalalogging.LazyLogging -import com.scalableminds.util.tools.Box -import com.scalableminds.util.tools.Box.tryo import org.apache.commons.io.FilenameUtils import java.nio.file.Paths diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/Hdf5AgglomerateService.scala similarity index 93% rename from webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala rename to webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/Hdf5AgglomerateService.scala index e83bf437abc..d9c80cbda40 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/Hdf5AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/Hdf5AgglomerateService.scala @@ -1,7 +1,9 @@ -package com.scalableminds.webknossos.datastore.services +package com.scalableminds.webknossos.datastore.services.mapping import ch.systemsx.cisd.hdf5.{HDF5DataSet, HDF5FactoryProvider, IHDF5Reader} import com.scalableminds.util.geometry.Vec3Int +import com.scalableminds.util.tools.Box.tryo +import com.scalableminds.util.tools.{Box, Failure, Full} import com.scalableminds.webknossos.datastore.AgglomerateGraph.{AgglomerateEdge, AgglomerateGraph} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.SkeletonTracing.{Edge, SkeletonTracing, Tree, TreeTypeProto} @@ -9,35 +11,19 @@ import com.scalableminds.webknossos.datastore.geometry.Vec3IntProto import com.scalableminds.webknossos.datastore.helpers.{NodeDefaults, SkeletonTracingDefaults} import com.scalableminds.webknossos.datastore.models.datasource.ElementClass import com.scalableminds.webknossos.datastore.models.requests.DataServiceDataRequest -import com.scalableminds.webknossos.datastore.storage.{ - AgglomerateFileCache, - AgglomerateFileKey, - AgglomerateIdCache, - BoundingBoxCache, - CachedAgglomerateFile, - CumsumParser -} -import com.scalableminds.util.tools.{Box, Failure, Full} -import com.scalableminds.util.tools.Box.tryo +import com.scalableminds.webknossos.datastore.services.DataConverter +import com.scalableminds.webknossos.datastore.storage._ -import java.nio.{ByteBuffer, ByteOrder, LongBuffer} import java.nio.file.{Files, Path} +import java.nio.{ByteBuffer, ByteOrder, LongBuffer} import javax.inject.Inject import scala.annotation.tailrec import scala.collection.compat.immutable.ArraySeq -class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConverter { +class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConverter with AgglomerateFileUtils { private val cumsumFileName = "cumsum.json" - private val keySegmentToAgglomerate = "/segment_to_agglomerate" - private val keyAgglomerateToSegmentsOffsets = "/agglomerate_to_segments_offsets" - private val keyAgglomerateToSegments = "/agglomerate_to_segments" - private val keyAgglomerateToPositions = "/agglomerate_to_positions" - private val keyAgglomerateToEdges = "/agglomerate_to_edges" - private val keyAgglomerateToEdgesOffsets = "/agglomerate_to_edges_offsets" - private val keyAgglomerateToAffinities = "/agglomerate_to_affinities" - private lazy val agglomerateFileCache = new AgglomerateFileCache( config.Datastore.Cache.AgglomerateFile.maxFileHandleEntries) @@ -101,9 +87,9 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv } } - def agglomerateIdsForSegmentIds(agglomerateFileKey: AgglomerateFileKey, segmentIds: Seq[Long]): Box[Seq[Long]] = { - val cachedAgglomerateFile = agglomerateFileCache.withCache(agglomerateFileKey)(openAsCachedAgglomerateFile) + def agglomerateIdsForSegmentIds(agglomerateFileKey: AgglomerateFileKey, segmentIds: Seq[Long]): Box[Seq[Long]] = tryo { + val cachedAgglomerateFile = agglomerateFileCache.withCache(agglomerateFileKey)(openAsCachedAgglomerateFile) val agglomerateIds = segmentIds.map { segmentId: Long => cachedAgglomerateFile.agglomerateIdCache.withCache(segmentId, cachedAgglomerateFile.reader, @@ -112,7 +98,6 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv cachedAgglomerateFile.finishAccess() agglomerateIds } - } def generateSkeleton(agglomerateFileKey: AgglomerateFileKey, agglomerateId: Long): Box[SkeletonTracing] = try { @@ -237,9 +222,9 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv segmentIds.toSeq } - def positionForSegmentId(agglomerateFileKey: AgglomerateFileKey, segmentId: Long): Box[Vec3Int] = { - val reader: IHDF5Reader = openHdf5(agglomerateFileKey) + def positionForSegmentId(agglomerateFileKey: AgglomerateFileKey, segmentId: Long): Box[Vec3Int] = for { + reader: IHDF5Reader <- tryo(openHdf5(agglomerateFileKey)) agglomerateIdArr: Array[Long] <- tryo( reader.uint64().readArrayBlockWithOffset(keySegmentToAgglomerate, 1, segmentId)) agglomerateId = agglomerateIdArr(0) @@ -248,7 +233,6 @@ class Hdf5AgglomerateService @Inject()(config: DataStoreConfig) extends DataConv segmentIndex <- binarySearchForSegment(segmentsRange(0), segmentsRange(1), segmentId, reader) position <- tryo(reader.uint64().readMatrixBlockWithOffset(keyAgglomerateToPositions, 1, 3, segmentIndex, 0)(0)) } yield Vec3Int(position(0).toInt, position(1).toInt, position(2).toInt) - } @tailrec private def binarySearchForSegment(rangeStart: Long, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MappingParser.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/MappingParser.scala similarity index 97% rename from webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MappingParser.scala rename to webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/MappingParser.scala index a42575fa2ee..aadfe6e9fda 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MappingParser.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/MappingParser.scala @@ -1,14 +1,14 @@ -package com.scalableminds.webknossos.datastore.services +package com.scalableminds.webknossos.datastore.services.mapping -import java.io._ -import java.nio.file.Path import com.google.gson.JsonParseException import com.google.gson.stream.JsonReader import com.scalableminds.util.time.Instant +import com.scalableminds.util.tools.{Box, Failure} import com.scalableminds.webknossos.datastore.models.datasource.DataLayerMapping import com.typesafe.scalalogging.LazyLogging -import com.scalableminds.util.tools.{Box, Failure} +import java.io._ +import java.nio.file.Path import scala.collection.mutable object MappingParser extends LazyLogging { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MappingService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/MappingService.scala similarity index 96% rename from webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MappingService.scala rename to webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/MappingService.scala index 48fa09a6ed1..c93f456f54b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/MappingService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/MappingService.scala @@ -1,4 +1,4 @@ -package com.scalableminds.webknossos.datastore.services +package com.scalableminds.webknossos.datastore.services.mapping import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.DataStoreConfig diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/ZarrAgglomerateService.scala similarity index 96% rename from webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala rename to webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/ZarrAgglomerateService.scala index 94070b4d4eb..75950cb4fdb 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ZarrAgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/ZarrAgglomerateService.scala @@ -1,20 +1,21 @@ -package com.scalableminds.webknossos.datastore.services +package com.scalableminds.webknossos.datastore.services.mapping import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.geometry.Vec3Int +import com.scalableminds.util.tools.Box.tryo import com.scalableminds.util.tools.Fox import com.scalableminds.webknossos.datastore.AgglomerateGraph.{AgglomerateEdge, AgglomerateGraph} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.SkeletonTracing.{Edge, SkeletonTracing, Tree, TreeTypeProto} -import com.scalableminds.webknossos.datastore.datareaders.{DatasetArray, MultiArrayUtils} import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array +import com.scalableminds.webknossos.datastore.datareaders.{DatasetArray, MultiArrayUtils} import com.scalableminds.webknossos.datastore.geometry.Vec3IntProto import com.scalableminds.webknossos.datastore.helpers.{NativeBucketScanner, NodeDefaults, SkeletonTracingDefaults} import com.scalableminds.webknossos.datastore.models.datasource.{DataSourceId, ElementClass} +import com.scalableminds.webknossos.datastore.services.{ChunkCacheService, DataConverter} import com.scalableminds.webknossos.datastore.storage.{AgglomerateFileKey, RemoteSourceDescriptorService} import com.typesafe.scalalogging.LazyLogging -import com.scalableminds.util.tools.Box.tryo import ucar.ma2.{Array => MultiArray} import java.nio.{ByteBuffer, ByteOrder, LongBuffer} @@ -26,6 +27,7 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, remoteSourceDescriptorService: RemoteSourceDescriptorService, chunkCacheService: ChunkCacheService) extends DataConverter + with AgglomerateFileUtils with LazyLogging { private lazy val openArraysCache = AlfuCache[(AgglomerateFileKey, String), DatasetArray]() @@ -35,14 +37,6 @@ class ZarrAgglomerateService @Inject()(config: DataStoreConfig, protected lazy val bucketScanner = new NativeBucketScanner() - private val keySegmentToAgglomerate = "segment_to_agglomerate" - private val keyAgglomerateToSegmentsOffsets = "agglomerate_to_segments_offsets" - private val keyAgglomerateToSegments = "agglomerate_to_segments" - private val keyAgglomerateToPositions = "agglomerate_to_positions" - private val keyAgglomerateToEdges = "agglomerate_to_edges" - private val keyAgglomerateToEdgesOffsets = "agglomerate_to_edges_offsets" - private val keyAgglomerateToAffinities = "agglomerate_to_affinities" - private def mapSingleSegment(segmentToAgglomerate: DatasetArray, segmentId: Long)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Long] = for { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/AdHocMeshService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/AdHocMeshService.scala index fc503743ef8..a39a763c64b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/AdHocMeshService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/AdHocMeshService.scala @@ -12,9 +12,10 @@ import com.scalableminds.webknossos.datastore.models.requests.{ DataServiceRequestSettings } import com.scalableminds.webknossos.datastore.services.mcubes.MarchingCubes -import com.scalableminds.webknossos.datastore.services.{BinaryDataService, MappingService} +import com.scalableminds.webknossos.datastore.services.BinaryDataService import com.typesafe.scalalogging.LazyLogging import com.scalableminds.util.tools.{Box, Failure} +import com.scalableminds.webknossos.datastore.services.mapping.MappingService import org.apache.pekko.actor.{Actor, ActorRef, ActorSystem, Props} import org.apache.pekko.pattern.ask import org.apache.pekko.routing.RoundRobinPool diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/AdHocMeshServiceHolder.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/AdHocMeshServiceHolder.scala index 31918295ffd..63f4779ba82 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/AdHocMeshServiceHolder.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/AdHocMeshServiceHolder.scala @@ -1,6 +1,7 @@ package com.scalableminds.webknossos.datastore.services.mesh -import com.scalableminds.webknossos.datastore.services.{BinaryDataService, MappingService} +import com.scalableminds.webknossos.datastore.services.BinaryDataService +import com.scalableminds.webknossos.datastore.services.mapping.MappingService import org.apache.pekko.actor.ActorSystem import javax.inject.Inject diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala index ab6237f85bb..c577a4b636a 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/DSFullMeshService.scala @@ -12,6 +12,7 @@ import com.scalableminds.webknossos.datastore.models.{AdditionalCoordinate, Voxe import com.scalableminds.webknossos.datastore.services._ import com.typesafe.scalalogging.LazyLogging import com.scalableminds.util.tools.Box.tryo +import com.scalableminds.webknossos.datastore.services.mapping.MappingService import play.api.i18n.MessagesProvider import play.api.libs.json.{Json, OFormat} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala index 93fd8910149..6bf04a98c81 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/Hdf5MeshFileService.scala @@ -12,7 +12,10 @@ import play.api.i18n.{Messages, MessagesProvider} import java.nio.file.Paths import scala.concurrent.ExecutionContext -class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends NeuroglancerMeshHelper with FoxImplicits { +class Hdf5MeshFileService @Inject()(config: DataStoreConfig) + extends NeuroglancerMeshHelper + with MeshFileUtils + with FoxImplicits { private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) @@ -31,8 +34,8 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance private def readMeshFileMetadata(meshFileKey: MeshFileKey): Box[(String, Double, Array[Array[Double]])] = fileHandleCache.withCachedHdf5(meshFileKey.attachment) { cachedMeshFile => - val lodScaleMultiplier = cachedMeshFile.float64Reader.getAttr("/", "lod_scale_multiplier") - val transform = cachedMeshFile.float64Reader.getMatrixAttr("/", "transform") + val lodScaleMultiplier = cachedMeshFile.float64Reader.getAttr("/", attrKeyLodScaleMultiplier) + val transform = cachedMeshFile.float64Reader.getMatrixAttr("/", attrKeyTransform) (cachedMeshFile.meshFormat, lodScaleMultiplier, transform) } @@ -66,13 +69,13 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance private def getNeuroglancerSegmentManifestOffsets(segmentId: Long, cachedMeshFile: CachedHdf5File): (Long, Long) = { val bucketIndex = cachedMeshFile.hashFunction(segmentId) % cachedMeshFile.nBuckets - val bucketOffsets = cachedMeshFile.uint64Reader.readArrayBlockWithOffset("bucket_offsets", 2, bucketIndex) + val bucketOffsets = cachedMeshFile.uint64Reader.readArrayBlockWithOffset(keyBucketOffsets, 2, bucketIndex) val bucketStart = bucketOffsets(0) val bucketEnd = bucketOffsets(1) if (bucketEnd - bucketStart == 0) throw new Exception(s"No entry for segment $segmentId") - val buckets = cachedMeshFile.uint64Reader.readMatrixBlockWithOffset("buckets", + val buckets = cachedMeshFile.uint64Reader.readMatrixBlockWithOffset(keyBuckets, (bucketEnd - bucketStart + 1).toInt, 3, bucketStart, @@ -125,7 +128,7 @@ class Hdf5MeshFileService @Inject()(config: DataStoreConfig) extends Neuroglance val data: List[(Array[Byte], Int)] = requestsReordered.map { requestAndIndex => val meshChunkDataRequest = requestAndIndex._1 val data = - cachedMeshFile.uint8Reader.readArrayBlockWithOffset("neuroglancer", + cachedMeshFile.uint8Reader.readArrayBlockWithOffset(keyNeuroglancer, meshChunkDataRequest.byteSize, meshChunkDataRequest.byteOffset) (data, requestAndIndex._2) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileUtils.scala new file mode 100644 index 00000000000..337083c7c6f --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/MeshFileUtils.scala @@ -0,0 +1,16 @@ +package com.scalableminds.webknossos.datastore.services.mesh + +trait MeshFileUtils { + + protected val keyBucketOffsets = "bucket_offsets" + protected val keyBuckets = "buckets" + protected val keyNeuroglancer = "neuroglancer" + + protected val attrKeyLodScaleMultiplier = "lod_scale_multiplier" + protected val attrKeyTransform = "transform" + protected val attrKeyMeshFormat = "mesh_format" + protected val attrKeyHashFunction = "hash_function" + protected val attrKeyNBuckets = "n_buckets" + protected val attrKeyMappingName = "mapping_name" + +} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala index 32c91116961..d8af9a88c1b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala @@ -8,7 +8,11 @@ import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} import com.scalableminds.webknossos.datastore.datareaders.DatasetArray import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId -import com.scalableminds.webknossos.datastore.services.{ChunkCacheService, ArrayArtifactHashing} +import com.scalableminds.webknossos.datastore.services.{ + ArrayArtifactHashing, + ChunkCacheService, + VoxelyticsZarrArtifactUtils +} import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService import play.api.i18n.{Messages, MessagesProvider} import play.api.libs.json.{JsResult, JsValue, Reads} @@ -29,24 +33,18 @@ case class MeshFileAttributes( lazy val applyHashFunction: Long => Long = getHashFunction(hashFunction) } -object MeshFileAttributes { - val FILENAME_ZARR_JSON = "zarr.json" - +object MeshFileAttributes extends MeshFileUtils with VoxelyticsZarrArtifactUtils { implicit object MeshFileAttributesZarr3GroupHeaderReads extends Reads[MeshFileAttributes] { override def reads(json: JsValue): JsResult[MeshFileAttributes] = { - val keyAttributes = "attributes" - val keyVx = "voxelytics" - val keyFormatVersion = "artifact_schema_version" - val keyArtifactAttrs = "artifact_attributes" - val meshFileAttrs = json \ keyAttributes \ keyVx \ keyArtifactAttrs + val meshFileAttrs = lookUpArtifactAttributes(json) for { - formatVersion <- (json \ keyAttributes \ keyVx \ keyFormatVersion).validate[Long] - meshFormat <- (meshFileAttrs \ "mesh_format").validate[String] - lodScaleMultiplier <- (meshFileAttrs \ "lod_scale_multiplier").validate[Double] - transform <- (meshFileAttrs \ "transform").validate[Array[Array[Double]]] - hashFunction <- (meshFileAttrs \ "hash_function").validate[String] - nBuckets <- (meshFileAttrs \ "n_buckets").validate[Int] - mappingName <- (meshFileAttrs \ "mapping_name").validateOpt[String] + formatVersion <- readArtifactSchemaVersion(json) + meshFormat <- (meshFileAttrs \ attrKeyMeshFormat).validate[String] + lodScaleMultiplier <- (meshFileAttrs \ attrKeyLodScaleMultiplier).validate[Double] + transform <- (meshFileAttrs \ attrKeyTransform).validate[Array[Array[Double]]] + hashFunction <- (meshFileAttrs \ attrKeyHashFunction).validate[String] + nBuckets <- (meshFileAttrs \ attrKeyNBuckets).validate[Int] + mappingName <- (meshFileAttrs \ attrKeyMappingName).validateOpt[String] } yield MeshFileAttributes( formatVersion, @@ -64,12 +62,9 @@ object MeshFileAttributes { class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, remoteSourceDescriptorService: RemoteSourceDescriptorService) extends FoxImplicits + with MeshFileUtils with NeuroglancerMeshHelper { - private val keyBucketOffsets = "bucket_offsets" - private val keyBuckets = "buckets" - private val keyNeuroglancer = "neuroglancer" - private lazy val openArraysCache = AlfuCache[(MeshFileKey, String), DatasetArray]() private lazy val attributesCache = AlfuCache[MeshFileKey, MeshFileAttributes]() diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/Hdf5SegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/Hdf5SegmentIndexFileService.scala index 531bd6ad3f4..baab0ba0c89 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/Hdf5SegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/Hdf5SegmentIndexFileService.scala @@ -11,7 +11,7 @@ import java.nio.file.Paths import javax.inject.Inject import scala.concurrent.ExecutionContext -class Hdf5SegmentIndexFileService @Inject()(config: DataStoreConfig) extends FoxImplicits { +class Hdf5SegmentIndexFileService @Inject()(config: DataStoreConfig) extends FoxImplicits with SegmentIndexFileUtils { private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) @@ -21,10 +21,10 @@ class Hdf5SegmentIndexFileService @Inject()(config: DataStoreConfig) extends Fox implicit ec: ExecutionContext): Fox[Array[Vec3Int]] = for { segmentIndex <- fileHandleCache.getCachedHdf5File(segmentIndexFileKey.attachment)(CachedHdf5File.fromPath).toFox - nBuckets = segmentIndex.uint64Reader.getAttr("/", "n_hash_buckets") + nBuckets = segmentIndex.uint64Reader.getAttr("/", attrKeyNHashBuckets) bucketIndex = segmentIndex.hashFunction(segmentId) % nBuckets - bucketOffsets = segmentIndex.uint64Reader.readArrayBlockWithOffset("hash_bucket_offsets", 2, bucketIndex) + bucketOffsets = segmentIndex.uint64Reader.readArrayBlockWithOffset(keyHashBucketOffsets, 2, bucketIndex) bucketStart = bucketOffsets(0) bucketEnd = bucketOffsets(1) @@ -41,7 +41,7 @@ class Hdf5SegmentIndexFileService @Inject()(config: DataStoreConfig) extends Fox implicit ec: ExecutionContext): Fox[Option[Array[Array[Short]]]] = for { _ <- Fox.successful(()) - buckets = segmentIndex.uint64Reader.readMatrixBlockWithOffset("hash_buckets", + buckets = segmentIndex.uint64Reader.readMatrixBlockWithOffset(keyHashBuckets, (bucketEnd - bucketStart + 1).toInt, 3, bucketStart, @@ -51,10 +51,10 @@ class Hdf5SegmentIndexFileService @Inject()(config: DataStoreConfig) extends Fox _ <- Fox.successful(()) topLeftStart = buckets(bucketLocalOffset)(1) topLeftEnd = buckets(bucketLocalOffset)(2) - bucketEntriesDtype <- tryo(segmentIndex.stringReader.getAttr("/", "dtype_bucket_entries")).toFox + bucketEntriesDtype <- tryo(segmentIndex.stringReader.getAttr("/", attrKeyDtypeBucketEntries)).toFox _ <- Fox .fromBool(bucketEntriesDtype == "uint16") ?~> "value for dtype_bucket_entries in segment index file is not supported, only uint16 is supported" - topLefts = segmentIndex.uint16Reader.readMatrixBlockWithOffset("top_lefts", + topLefts = segmentIndex.uint16Reader.readMatrixBlockWithOffset(keyTopLefts, (topLeftEnd - topLeftStart).toInt, 3, topLeftStart, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala index 2029c74962c..2b2767ad415 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala @@ -22,8 +22,8 @@ import com.scalableminds.webknossos.datastore.models.requests.{ DataServiceRequestSettings } import com.scalableminds.webknossos.datastore.models.{AdditionalCoordinate, VoxelPosition} +import com.scalableminds.webknossos.datastore.services.mapping.AgglomerateService import com.scalableminds.webknossos.datastore.services.{ - AgglomerateService, ArrayArtifactHashing, BinaryDataServiceHolder } @@ -179,7 +179,7 @@ class SegmentIndexFileService @Inject()(config: DataStoreConfig, } yield (bucketData, dataLayer.elementClass) } - // Reads bucket positions froms egment index file. Returns target-mag bucket positions + // Reads bucket positions from segment index file. Returns target-mag bucket positions // (even though the file stores mag1 bucket positions) private def getBucketPositions(segmentIndexFileKey: SegmentIndexFileKey, agglomerateFileKeyOpt: Option[AgglomerateFileKey])( diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileUtils.scala new file mode 100644 index 00000000000..3ff48793da0 --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileUtils.scala @@ -0,0 +1,13 @@ +package com.scalableminds.webknossos.datastore.services.segmentindex + +trait SegmentIndexFileUtils { + + protected val keyHashBucketOffsets = "hash_bucket_offsets" + protected val keyHashBuckets = "hash_buckets" + protected val keyTopLefts = "top_lefts" + + protected val attrKeyHashFunction = "hash_function" + protected val attrKeyNHashBuckets = "n_hash_buckets" + protected val attrKeyDtypeBucketEntries = "dtype_bucket_entries" + +} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala index 78a9fc7820f..15dc3c847b9 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala @@ -8,7 +8,11 @@ import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} import com.scalableminds.webknossos.datastore.datareaders.DatasetArray import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId -import com.scalableminds.webknossos.datastore.services.{ArrayArtifactHashing, ChunkCacheService} +import com.scalableminds.webknossos.datastore.services.{ + ArrayArtifactHashing, + ChunkCacheService, + VoxelyticsZarrArtifactUtils +} import ucar.ma2.{Array => MultiArray} import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService import play.api.libs.json.{JsResult, JsValue, Reads} @@ -18,7 +22,6 @@ import scala.concurrent.ExecutionContext case class SegmentIndexFileAttributes( formatVersion: Long, - mag: Vec3Int, nHashBuckets: Long, hashFunction: String, dtypeBucketEntries: String, @@ -26,26 +29,18 @@ case class SegmentIndexFileAttributes( lazy val applyHashFunction: Long => Long = getHashFunction(hashFunction) } -object SegmentIndexFileAttributes { - val FILENAME_ZARR_JSON = "zarr.json" - +object SegmentIndexFileAttributes extends SegmentIndexFileUtils with VoxelyticsZarrArtifactUtils { implicit object SegmentIndexFileAttributesZarr3GroupHeaderReads extends Reads[SegmentIndexFileAttributes] { override def reads(json: JsValue): JsResult[SegmentIndexFileAttributes] = { - val keyAttributes = "attributes" - val keyVx = "voxelytics" - val keyFormatVersion = "artifact_schema_version" - val keyArtifactAttrs = "artifact_attributes" - val segmentIndexFileAttrs = json \ keyAttributes \ keyVx \ keyArtifactAttrs + val attrs = lookUpArtifactAttributes(json) for { - formatVersion <- (json \ keyAttributes \ keyVx \ keyFormatVersion).validate[Long] - mag <- (segmentIndexFileAttrs \ "mag").validate[Vec3Int] - nHashBuckets <- (segmentIndexFileAttrs \ "n_hash_buckets").validate[Long] - hashFunction <- (segmentIndexFileAttrs \ "hash_function").validate[String] - dtypeBucketEntries <- (segmentIndexFileAttrs \ "dtype_bucket_entries").validate[String] + formatVersion <- readArtifactSchemaVersion(json) + nHashBuckets <- (attrs \ attrKeyNHashBuckets).validate[Long] + hashFunction <- (attrs \ attrKeyHashFunction).validate[String] + dtypeBucketEntries <- (attrs \ attrKeyDtypeBucketEntries).validate[String] } yield SegmentIndexFileAttributes( formatVersion, - mag, nHashBuckets, hashFunction, dtypeBucketEntries @@ -56,11 +51,8 @@ object SegmentIndexFileAttributes { class ZarrSegmentIndexFileService @Inject()(remoteSourceDescriptorService: RemoteSourceDescriptorService, chunkCacheService: ChunkCacheService) - extends FoxImplicits { - - private val keyHashBucketOffsets = "hash_bucket_offsets" - private val keyHashBuckets = "hash_buckets" - private val keyTopLefts = "top_lefts" + extends FoxImplicits + with SegmentIndexFileUtils { private lazy val openArraysCache = AlfuCache[(SegmentIndexFileKey, String), DatasetArray]() private lazy val attributesCache = AlfuCache[SegmentIndexFileKey, SegmentIndexFileAttributes]() diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala index 2a3ced2af21..edb3055a917 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/storage/Hdf5FileCache.scala @@ -15,6 +15,7 @@ import com.scalableminds.util.tools.{Box, Failure, Full} import com.scalableminds.webknossos.datastore.dataformats.SafeCachable import com.scalableminds.webknossos.datastore.models.datasource.LayerAttachment import com.scalableminds.webknossos.datastore.services.ArrayArtifactHashing +import com.scalableminds.webknossos.datastore.services.mesh.MeshFileUtils import com.typesafe.scalalogging.LazyLogging import java.nio.file.Path @@ -24,6 +25,7 @@ class CachedHdf5File(reader: IHDF5Reader) extends SafeCachable with AutoCloseable with ArrayArtifactHashing + with MeshFileUtils with LazyLogging { override protected def onFinalize(): Unit = reader.close() @@ -38,12 +40,12 @@ class CachedHdf5File(reader: IHDF5Reader) lazy val float64Reader: IHDF5DoubleReader = reader.float64() // For MeshFile - lazy val nBuckets: Long = uint64Reader.getAttr("/", "n_buckets") - lazy val meshFormat: String = stringReader.getAttr("/", "mesh_format") - lazy val mappingName: String = stringReader.getAttr("/", "mapping_name") + lazy val nBuckets: Long = uint64Reader.getAttr("/", attrKeyNBuckets) + lazy val meshFormat: String = stringReader.getAttr("/", attrKeyMeshFormat) + lazy val mappingName: String = stringReader.getAttr("/", attrKeyMappingName) // For MeshFile and SegmentIndexFile - lazy val hashFunction: Long => Long = getHashFunction(stringReader.getAttr("/", "hash_function")) + lazy val hashFunction: Long => Long = getHashFunction(stringReader.getAttr("/", attrKeyHashFunction)) lazy val artifactSchemaVersion: Long = int64Reader.getAttr("/", "artifact_schema_version") } From 313838cea9725d10e8f1f093d6eac0a0bbf55a13 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 2 Jul 2025 11:32:27 +0200 Subject: [PATCH 093/100] format --- .../datastore/DataStoreModule.scala | 21 ++++++++++++++++--- .../controllers/BinaryDataController.scala | 6 +++++- .../controllers/DataSourceController.scala | 13 ++++++++++-- .../services/mapping/AgglomerateService.scala | 7 ++++++- .../SegmentIndexFileService.scala | 5 +---- 5 files changed, 41 insertions(+), 11 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala index 8c429288976..daa4cc9c927 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala @@ -4,9 +4,24 @@ import org.apache.pekko.actor.ActorSystem import com.google.inject.AbstractModule import com.google.inject.name.Names import com.scalableminds.webknossos.datastore.services._ -import com.scalableminds.webknossos.datastore.services.mapping.{AgglomerateService, Hdf5AgglomerateService, MappingService, ZarrAgglomerateService} -import com.scalableminds.webknossos.datastore.services.mesh.{AdHocMeshServiceHolder, Hdf5MeshFileService, MeshFileService, NeuroglancerPrecomputedMeshFileService, ZarrMeshFileService} -import com.scalableminds.webknossos.datastore.services.segmentindex.{Hdf5SegmentIndexFileService, SegmentIndexFileService, ZarrSegmentIndexFileService} +import com.scalableminds.webknossos.datastore.services.mapping.{ + AgglomerateService, + Hdf5AgglomerateService, + MappingService, + ZarrAgglomerateService +} +import com.scalableminds.webknossos.datastore.services.mesh.{ + AdHocMeshServiceHolder, + Hdf5MeshFileService, + MeshFileService, + NeuroglancerPrecomputedMeshFileService, + ZarrMeshFileService +} +import com.scalableminds.webknossos.datastore.services.segmentindex.{ + Hdf5SegmentIndexFileService, + SegmentIndexFileService, + ZarrSegmentIndexFileService +} import com.scalableminds.webknossos.datastore.services.uploading.UploadService import com.scalableminds.webknossos.datastore.storage.{DataVaultService, RemoteSourceDescriptorService} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/BinaryDataController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/BinaryDataController.scala index 2a379bedb6c..7c2fd129b55 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/BinaryDataController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/BinaryDataController.scala @@ -11,7 +11,11 @@ import com.scalableminds.webknossos.datastore.helpers.MissingBucketHeaders import com.scalableminds.webknossos.datastore.image.{ImageCreator, ImageCreatorParameters} import com.scalableminds.webknossos.datastore.models.DataRequestCollection._ import com.scalableminds.webknossos.datastore.models.datasource._ -import com.scalableminds.webknossos.datastore.models.requests.{DataServiceDataRequest, DataServiceMappingRequest, DataServiceRequestSettings} +import com.scalableminds.webknossos.datastore.models.requests.{ + DataServiceDataRequest, + DataServiceMappingRequest, + DataServiceRequestSettings +} import com.scalableminds.webknossos.datastore.models._ import com.scalableminds.webknossos.datastore.services._ import com.scalableminds.webknossos.datastore.services.mesh.{AdHocMeshRequest, AdHocMeshService, AdHocMeshServiceHolder} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 2c06528994d..5ea67e147d6 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -5,8 +5,17 @@ import com.scalableminds.util.geometry.Vec3Int import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.Fox import com.scalableminds.webknossos.datastore.ListOfLong.ListOfLong -import com.scalableminds.webknossos.datastore.explore.{ExploreRemoteDatasetRequest, ExploreRemoteDatasetResponse, ExploreRemoteLayerService} -import com.scalableminds.webknossos.datastore.helpers.{GetMultipleSegmentIndexParameters, GetSegmentIndexParameters, SegmentIndexData, SegmentStatisticsParameters} +import com.scalableminds.webknossos.datastore.explore.{ + ExploreRemoteDatasetRequest, + ExploreRemoteDatasetResponse, + ExploreRemoteLayerService +} +import com.scalableminds.webknossos.datastore.helpers.{ + GetMultipleSegmentIndexParameters, + GetSegmentIndexParameters, + SegmentIndexData, + SegmentStatisticsParameters +} import com.scalableminds.webknossos.datastore.models.datasource.inbox.InboxDataSource import com.scalableminds.webknossos.datastore.models.datasource.{DataLayer, DataSource, DataSourceId, GenericDataSource} import com.scalableminds.webknossos.datastore.services._ diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/AgglomerateService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/AgglomerateService.scala index 51c2864f674..83129eef903 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/AgglomerateService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mapping/AgglomerateService.scala @@ -10,7 +10,12 @@ import com.scalableminds.util.tools.{Box, Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.AgglomerateGraph.AgglomerateGraph import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.SkeletonTracing.SkeletonTracing -import com.scalableminds.webknossos.datastore.models.datasource.{DataLayer, DataSourceId, LayerAttachment, LayerAttachmentDataformat} +import com.scalableminds.webknossos.datastore.models.datasource.{ + DataLayer, + DataSourceId, + LayerAttachment, + LayerAttachmentDataformat +} import com.scalableminds.webknossos.datastore.models.requests.DataServiceDataRequest import com.scalableminds.webknossos.datastore.storage.{AgglomerateFileKey, RemoteSourceDescriptorService} import com.typesafe.scalalogging.LazyLogging diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala index 2b2767ad415..f63e66d6ceb 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/SegmentIndexFileService.scala @@ -23,10 +23,7 @@ import com.scalableminds.webknossos.datastore.models.requests.{ } import com.scalableminds.webknossos.datastore.models.{AdditionalCoordinate, VoxelPosition} import com.scalableminds.webknossos.datastore.services.mapping.AgglomerateService -import com.scalableminds.webknossos.datastore.services.{ - ArrayArtifactHashing, - BinaryDataServiceHolder -} +import com.scalableminds.webknossos.datastore.services.{ArrayArtifactHashing, BinaryDataServiceHolder} import com.scalableminds.webknossos.datastore.storage.{AgglomerateFileKey, RemoteSourceDescriptorService} import java.nio.file.{Path, Paths} From f6ddc57637767bc87bea60512660c715e4a10d60 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 3 Jul 2025 11:23:48 +0200 Subject: [PATCH 094/100] extract string constants to ConnectomeFileUtils --- .../connectome/ConnectomeFileUtils.scala | 25 ++++++++++++ .../Hdf5ConnectomeFileService.scala | 32 +++++++-------- .../ZarrConnectomeFileService.scala | 39 +++++-------------- 3 files changed, 50 insertions(+), 46 deletions(-) create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileUtils.scala diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileUtils.scala new file mode 100644 index 00000000000..51bf2a8315c --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ConnectomeFileUtils.scala @@ -0,0 +1,25 @@ +package com.scalableminds.webknossos.datastore.services.connectome + +import com.scalableminds.webknossos.datastore.services.connectome.SynapticPartnerDirection.SynapticPartnerDirection + +trait ConnectomeFileUtils { + + protected val keyCsrIndptr = "CSR_indptr" + protected val keyCscIndptr = "CSC_indptr" + protected val keyCsrIndices = "CSR_indices" + protected val keyAgglomeratePairOffsets = "agglomerate_pair_offsets" + protected val keyCscAgglomeratePair = "CSC_agglomerate_pair" + protected val keySynapseTypes = "synapse_types" + protected val keySynapsePositions = "synapse_positions" + protected val keySynapseToSrcAgglomerate = "synapse_to_src_agglomerate" + protected val keySynapseToDstAgglomerate = "synapse_to_dst_agglomerate" + + protected val attrKeyMetadataMappingName = "metadata/mapping_name" + protected val attrKeySynapseTypeNames = "synapse_type_names" + + protected def synapticPartnerKey(direction: SynapticPartnerDirection): String = + direction match { + case SynapticPartnerDirection.src => keySynapseToSrcAgglomerate + case SynapticPartnerDirection.dst => keySynapseToDstAgglomerate + } +} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala index 7820e40702d..15c1f6aa02b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala @@ -12,7 +12,7 @@ import java.nio.file.Paths import javax.inject.Inject import scala.concurrent.ExecutionContext -class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxImplicits { +class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxImplicits with ConnectomeFileUtils { private val dataBaseDir = Paths.get(config.Datastore.baseDirectory) @@ -24,7 +24,7 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) .toFox ?~> "connectome.file.open.failed" mappingName <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.stringReader.getAttr("/", "metadata/mapping_name") + cachedConnectomeFile.stringReader.getAttr("/", attrKeyMetadataMappingName) } ?~> "connectome.file.readEncoding.failed" _ = cachedConnectomeFile.finishAccess() } yield mappingName @@ -36,7 +36,7 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) .toFox ?~> "connectome.file.open.failed" fromAndToPtr: Array[Long] <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/CSC_indptr", 2, agglomerateId) + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyCscIndptr, 2, agglomerateId) } ?~> "Could not read offsets from connectome file" from <- fromAndToPtr.lift(0).toFox ?~> "Could not read start offset from connectome file" to <- fromAndToPtr.lift(1).toFox ?~> "Could not read end offset from connectome file" @@ -44,15 +44,15 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm agglomeratePairs: Array[Long] <- if (to - from == 0L) Fox.successful(Array.empty[Long]) else finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/CSC_agglomerate_pair", (to - from).toInt, from) + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyCscAgglomeratePair, (to - from).toInt, from) } ?~> "Could not read agglomerate pairs from connectome file" synapseIdsNested <- Fox.serialCombined(agglomeratePairs.toList) { agglomeratePair: Long => for { from <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/agglomerate_pair_offsets", 1, agglomeratePair) + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyAgglomeratePairOffsets, 1, agglomeratePair) }.flatMap(_.headOption.toFox) to <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/agglomerate_pair_offsets", + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyAgglomeratePairOffsets, 1, agglomeratePair + 1) }.flatMap(_.headOption.toFox) @@ -68,15 +68,15 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) .toFox ?~> "connectome.file.open.failed" fromAndToPtr: Array[Long] <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/CSR_indptr", 2, agglomerateId) + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyCscIndptr, 2, agglomerateId) } ?~> "Could not read offsets from connectome file" fromPtr <- fromAndToPtr.lift(0).toFox ?~> "Could not read start offset from connectome file" toPtr <- fromAndToPtr.lift(1).toFox ?~> "Could not read end offset from connectome file" from <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/agglomerate_pair_offsets", 1, fromPtr) + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyAgglomeratePairOffsets, 1, fromPtr) }.flatMap(_.headOption.toFox) ?~> "Could not synapses from connectome file" to <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/agglomerate_pair_offsets", 1, toPtr) + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyAgglomeratePairOffsets, 1, toPtr) }.flatMap(_.headOption.toFox) ?~> "Could not synapses from connectome file" } yield Seq.range(from, to) @@ -89,9 +89,7 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm .toFox ?~> "connectome.file.open.failed" agglomerateIds <- Fox.serialCombined(synapseIds) { synapseId: Long => finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(s"/synapse_to_${direction.toString}_agglomerate", - 1, - synapseId) + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(synapticPartnerKey(direction), 1, synapseId) }.flatMap(_.headOption.toFox) } } yield agglomerateIds @@ -104,7 +102,7 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm .toFox ?~> "connectome.file.open.failed" synapsePositions <- Fox.serialCombined(synapseIds) { synapseId: Long => finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readMatrixBlockWithOffset("/synapse_positions", 1, 3, synapseId, 0) + cachedConnectomeFile.uint64Reader.readMatrixBlockWithOffset(keySynapsePositions, 1, 3, synapseId, 0) }.flatMap(_.headOption.toFox) } } yield synapsePositions.map(_.toList) @@ -119,7 +117,7 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm typeNames = List("dendritic-shaft-synapse", "spine-head-synapse", "soma-synapse") synapseTypes <- Fox.serialCombined(synapseIds) { synapseId: Long => finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/synapse_types", 1, synapseId) + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keySynapseTypes, 1, synapseId) }.flatMap(_.headOption.toFox) } } yield SynapseTypesWithLegend(synapseTypes, typeNames) @@ -131,14 +129,14 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) .toFox ?~> "connectome.file.open.failed" fromAndToPtr: Array[Long] <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/CSR_indptr", 2, srcAgglomerateId) + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyCscIndptr, 2, srcAgglomerateId) } ?~> "Could not read offsets from connectome file" fromPtr <- fromAndToPtr.lift(0).toFox ?~> "Could not read start offset from connectome file" toPtr <- fromAndToPtr.lift(1).toFox ?~> "Could not read end offset from connectome file" columnValues: Array[Long] <- if (toPtr - fromPtr == 0L) Fox.successful(Array.empty[Long]) else finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/CSR_indices", (toPtr - fromPtr).toInt, fromPtr) + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyCsrIndices, (toPtr - fromPtr).toInt, fromPtr) } ?~> "Could not read agglomerate pairs from connectome file" columnOffset = SequenceUtils.searchSorted(columnValues, dstAgglomerateId) pairIndex = fromPtr + columnOffset @@ -147,7 +145,7 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm else for { fromAndTo <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset("/agglomerate_pair_offsets", 2, pairIndex) + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyAgglomeratePairOffsets, 2, pairIndex) } from <- fromAndTo.lift(0).toFox to <- fromAndTo.lift(1).toFox diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala index 06207fa1165..9a54b039d5a 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala @@ -8,7 +8,7 @@ import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} import com.scalableminds.webknossos.datastore.datareaders.DatasetArray import com.scalableminds.webknossos.datastore.datareaders.zarr3.Zarr3Array import com.scalableminds.webknossos.datastore.models.datasource.DataSourceId -import com.scalableminds.webknossos.datastore.services.ChunkCacheService +import com.scalableminds.webknossos.datastore.services.{ChunkCacheService, VoxelyticsZarrArtifactUtils} import com.scalableminds.webknossos.datastore.services.connectome.SynapticPartnerDirection.SynapticPartnerDirection import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService import jakarta.inject.Inject @@ -22,20 +22,15 @@ case class ConnectomeFileAttributes( synapseTypeNames: Seq[String] ) -object ConnectomeFileAttributes { - val FILENAME_ZARR_JSON = "zarr.json" +object ConnectomeFileAttributes extends VoxelyticsZarrArtifactUtils with ConnectomeFileUtils { implicit object ConnectomeFileAttributesZarr3GroupHeaderReads extends Reads[ConnectomeFileAttributes] { override def reads(json: JsValue): JsResult[ConnectomeFileAttributes] = { - val keyAttributes = "attributes" - val keyVx = "voxelytics" - val keyFormatVersion = "artifact_schema_version" - val keyArtifactAttrs = "artifact_attributes" - val connectomeFileAttrs = json \ keyAttributes \ keyVx \ keyArtifactAttrs + val connectomeFileAttrs = lookUpArtifactAttributes(json) for { - formatVersion <- (json \ keyAttributes \ keyVx \ keyFormatVersion).validate[Long] - mappingName <- (connectomeFileAttrs \ "metadata/mapping_name").validate[String] - synapseTypeNames <- (connectomeFileAttrs \ "synapse_type_names").validate[Seq[String]] + formatVersion <- readArtifactSchemaVersion(json) + mappingName <- (connectomeFileAttrs \ attrKeyMetadataMappingName).validate[String] + synapseTypeNames <- (connectomeFileAttrs \ attrKeySynapseTypeNames).validate[Seq[String]] } yield ConnectomeFileAttributes( formatVersion, @@ -48,20 +43,11 @@ object ConnectomeFileAttributes { class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteSourceDescriptorService, chunkCacheService: ChunkCacheService) - extends FoxImplicits { + extends FoxImplicits + with ConnectomeFileUtils { private lazy val openArraysCache = AlfuCache[(ConnectomeFileKey, String), DatasetArray]() private lazy val attributesCache = AlfuCache[ConnectomeFileKey, ConnectomeFileAttributes]() - private val keyCsrIndptr = "CSR_indptr" - private val keyCscIndptr = "CSC_indptr" - private val keyCsrIndices = "CSR_indices" - private val keyAgglomeratePairOffsets = "agglomerate_pair_offsets" - private val keyCscAgglomeratePair = "CSC_agglomerate_pair" - private val keySynapseTypes = "synapse_types" - private val keySynapsePositions = "synapse_positions" - private val keySynapseToSrcAgglomerate = "synapse_to_src_agglomerate" - private val keySynapseToDstAgglomerate = "synapse_to_dst_agglomerate" - private def readConnectomeFileAttributes(connectomeFileKey: ConnectomeFileKey)( implicit ec: ExecutionContext, tc: TokenContext): Fox[ConnectomeFileAttributes] = @@ -86,13 +72,9 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS def synapticPartnerForSynapses( connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long], - direction: SynapticPartnerDirection)(implicit ec: ExecutionContext, tc: TokenContext): Fox[List[Long]] = { - val arrayKey = direction match { - case SynapticPartnerDirection.src => keySynapseToSrcAgglomerate - case SynapticPartnerDirection.dst => keySynapseToDstAgglomerate - } + direction: SynapticPartnerDirection)(implicit ec: ExecutionContext, tc: TokenContext): Fox[List[Long]] = for { - synapseToPartnerAgglomerateArray <- openZarrArray(connectomeFileKey, arrayKey) + synapseToPartnerAgglomerateArray <- openZarrArray(connectomeFileKey, synapticPartnerKey(direction)) agglomerateIds <- Fox.serialCombined(synapseIds) { synapseId: Long => for { agglomerateIdMA <- synapseToPartnerAgglomerateArray.readAsMultiArray(offset = synapseId, shape = 1) @@ -100,7 +82,6 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS } yield agglomerateId } } yield agglomerateIds - } def positionsForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long])( implicit ec: ExecutionContext, From b18eb66c1a17f8f4c1b9444231cc2cd1785fe308 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 3 Jul 2025 14:03:48 +0200 Subject: [PATCH 095/100] once more csc csr typo --- .../services/connectome/Hdf5ConnectomeFileService.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala index 15c1f6aa02b..f77f15898cd 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala @@ -68,7 +68,7 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) .toFox ?~> "connectome.file.open.failed" fromAndToPtr: Array[Long] <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyCscIndptr, 2, agglomerateId) + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyCsrIndptr, 2, agglomerateId) } ?~> "Could not read offsets from connectome file" fromPtr <- fromAndToPtr.lift(0).toFox ?~> "Could not read start offset from connectome file" toPtr <- fromAndToPtr.lift(1).toFox ?~> "Could not read end offset from connectome file" @@ -129,7 +129,7 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) .toFox ?~> "connectome.file.open.failed" fromAndToPtr: Array[Long] <- finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyCscIndptr, 2, srcAgglomerateId) + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyCsrIndptr, 2, srcAgglomerateId) } ?~> "Could not read offsets from connectome file" fromPtr <- fromAndToPtr.lift(0).toFox ?~> "Could not read start offset from connectome file" toPtr <- fromAndToPtr.lift(1).toFox ?~> "Could not read end offset from connectome file" From 871256dad737deff27bc0420fa474a70fb718f0e Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 3 Jul 2025 14:34:33 +0200 Subject: [PATCH 096/100] fix typos in error messages --- .../services/connectome/Hdf5ConnectomeFileService.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala index f77f15898cd..80e65142184 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala @@ -74,10 +74,10 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm toPtr <- fromAndToPtr.lift(1).toFox ?~> "Could not read end offset from connectome file" from <- finishAccessOnFailure(cachedConnectomeFile) { cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyAgglomeratePairOffsets, 1, fromPtr) - }.flatMap(_.headOption.toFox) ?~> "Could not synapses from connectome file" + }.flatMap(_.headOption.toFox) ?~> "Could not read synapses from connectome file" to <- finishAccessOnFailure(cachedConnectomeFile) { cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyAgglomeratePairOffsets, 1, toPtr) - }.flatMap(_.headOption.toFox) ?~> "Could not synapses from connectome file" + }.flatMap(_.headOption.toFox) ?~> "Could not read synapses from connectome file" } yield Seq.range(from, to) def synapticPartnerForSynapses(connectomeFileKey: ConnectomeFileKey, From 09ef7a567fcab4002aa03cd29000cb61644c2c28 Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 7 Jul 2025 09:24:31 +0200 Subject: [PATCH 097/100] implement pr feedback (part 1) --- .../services/connectome/Hdf5ConnectomeFileService.scala | 8 ++++++-- .../services/connectome/ZarrConnectomeFileService.scala | 4 ++-- .../datastore/services/mesh/ZarrMeshFileService.scala | 2 +- .../segmentindex/ZarrSegmentIndexFileService.scala | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala index 80e65142184..b39e01e7ea5 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala @@ -18,6 +18,11 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm private lazy val fileHandleCache = new Hdf5FileCache(30) + // Cannot read type names from the hdf5 file due to a limitation in jhdf5. + // However, all existing hdf5 connectome files have this exact type name set. + // Also compare https://scm.slack.com/archives/C5AKLAV0B/p1750852209211939?thread_ts=1705502230.128199&cid=C5AKLAV0B + private lazy val legacySynapseTypeNames = List("dendritic-shaft-synapse", "spine-head-synapse", "soma-synapse") + def mappingNameForConnectomeFile(connectomeFileKey: ConnectomeFileKey)(implicit ec: ExecutionContext): Fox[String] = for { cachedConnectomeFile <- fileHandleCache @@ -114,13 +119,12 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm .getCachedHdf5File(connectomeFileKey.attachment)(CachedHdf5File.fromPath) .toFox ?~> "connectome.file.open.failed" // Hard coded type name list, as all legacy files have this value. - typeNames = List("dendritic-shaft-synapse", "spine-head-synapse", "soma-synapse") synapseTypes <- Fox.serialCombined(synapseIds) { synapseId: Long => finishAccessOnFailure(cachedConnectomeFile) { cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keySynapseTypes, 1, synapseId) }.flatMap(_.headOption.toFox) } - } yield SynapseTypesWithLegend(synapseTypes, typeNames) + } yield SynapseTypesWithLegend(synapseTypes, legacySynapseTypeNames) def synapseIdsForDirectedPair(connectomeFileKey: ConnectomeFileKey, srcAgglomerateId: Long, dstAgglomerateId: Long)( implicit ec: ExecutionContext): Fox[Seq[Long]] = diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala index 9a54b039d5a..8a752c00ddf 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala @@ -59,7 +59,7 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS groupHeaderBytes <- (groupVaultPath / ConnectomeFileAttributes.FILENAME_ZARR_JSON).readBytes() connectomeFileAttributes <- JsonHelper .parseAs[ConnectomeFileAttributes](groupHeaderBytes) - .toFox ?~> "Could not parse connectome file attributes from zarr group file" + .toFox ?~> "Could not parse connectome file attributes from zarr group file." } yield connectomeFileAttributes ) @@ -90,7 +90,7 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS arraySynapsePositions <- openZarrArray(connectomeFileKey, keySynapsePositions) synapsePositions <- Fox.serialCombined(synapseIds) { synapseId: Long => for { - synapsePositionMA <- arraySynapsePositions.readAsMultiArray(offset = Array(synapseId, 0), shape = Array(1, 3)) // TODO should offset and shape be transposed? + synapsePositionMA <- arraySynapsePositions.readAsMultiArray(offset = Array(synapseId, 0), shape = Array(1, 3)) synapsePosition <- tryo( Seq(synapsePositionMA.getLong(0), synapsePositionMA.getLong(1), synapsePositionMA.getLong(2))).toFox } yield synapsePosition diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala index d8af9a88c1b..8f7139af6e8 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala @@ -75,7 +75,7 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, groupHeaderBytes <- (groupVaultPath / MeshFileAttributes.FILENAME_ZARR_JSON).readBytes() meshFileAttributes <- JsonHelper .parseAs[MeshFileAttributes](groupHeaderBytes) - .toFox ?~> "Could not parse meshFile attributes from zarr group file" + .toFox ?~> "Could not parse meshFile attributes from zarr group file." } yield meshFileAttributes private def readMeshFileAttributes(meshFileKey: MeshFileKey)(implicit ec: ExecutionContext, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala index 15dc3c847b9..874f4dd48e5 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala @@ -70,7 +70,7 @@ class ZarrSegmentIndexFileService @Inject()(remoteSourceDescriptorService: Remot groupHeaderBytes <- (groupVaultPath / SegmentIndexFileAttributes.FILENAME_ZARR_JSON).readBytes() segmentIndexFileAttributes <- JsonHelper .parseAs[SegmentIndexFileAttributes](groupHeaderBytes) - .toFox ?~> "Could not parse segment index file attributes from zarr group file" + .toFox ?~> "Could not parse segment index file attributes from zarr group file." } yield segmentIndexFileAttributes def readSegmentIndex(segmentIndexFileKey: SegmentIndexFileKey, From 6beaf69727a3025859deed2c38044a0bfb4194b2 Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 7 Jul 2025 09:48:18 +0200 Subject: [PATCH 098/100] catch toPtr o - v }.padTo(offset.length, 0) val chunkIndices = ChunkUtils.computeChunkIndices(datasetShape, chunkShape, shape, totalOffset) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala index b39e01e7ea5..147676bbaf5 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala @@ -43,13 +43,16 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm fromAndToPtr: Array[Long] <- finishAccessOnFailure(cachedConnectomeFile) { cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyCscIndptr, 2, agglomerateId) } ?~> "Could not read offsets from connectome file" - from <- fromAndToPtr.lift(0).toFox ?~> "Could not read start offset from connectome file" - to <- fromAndToPtr.lift(1).toFox ?~> "Could not read end offset from connectome file" + fromPtr <- fromAndToPtr.lift(0).toFox ?~> "Could not read start offset from connectome file" + toPtr <- fromAndToPtr.lift(1).toFox ?~> "Could not read end offset from connectome file" + _ <- Fox.fromBool(toPtr >= fromPtr) ?~> s"Agglomerate $agglomerateId not present in agglomerate file" // readArrayBlockWithOffset has a bug and does not return the empty array when block size 0 is passed, hence the if. - agglomeratePairs: Array[Long] <- if (to - from == 0L) Fox.successful(Array.empty[Long]) + agglomeratePairs: Array[Long] <- if (toPtr - fromPtr == 0L) Fox.successful(Array.empty[Long]) else finishAccessOnFailure(cachedConnectomeFile) { - cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyCscAgglomeratePair, (to - from).toInt, from) + cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyCscAgglomeratePair, + (toPtr - fromPtr).toInt, + fromPtr) } ?~> "Could not read agglomerate pairs from connectome file" synapseIdsNested <- Fox.serialCombined(agglomeratePairs.toList) { agglomeratePair: Long => for { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala index 8a752c00ddf..d0cb12a95c8 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala @@ -118,13 +118,14 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS (fromPtr, toPtr) <- getToAndFromPtr(connectomeFileKey, agglomerateId, keyCscIndptr) agglomeratePairOffsetsArray <- openZarrArray(connectomeFileKey, keyAgglomeratePairOffsets) cscAgglomeratePairArray <- openZarrArray(connectomeFileKey, keyCscAgglomeratePair) + _ <- Fox.fromBool(toPtr >= fromPtr) ?~> s"Agglomerate $agglomerateId not present in agglomerate file" agglomeratePairsMA <- cscAgglomeratePairArray.readAsMultiArray(offset = fromPtr, shape = (toPtr - fromPtr).toInt) agglomeratePairs <- tryo(agglomeratePairsMA.getStorage.asInstanceOf[Array[Long]]).toFox synapseIdsNested <- Fox.serialCombined(agglomeratePairs.toList) { agglomeratePair: Long => for { fromTo <- agglomeratePairOffsetsArray.readAsMultiArray(offset = agglomeratePair, shape = 2) - from <- tryo(fromTo.getLong(0)).toFox - to <- tryo(fromTo.getLong(1)).toFox + from <- tryo(fromTo.getLong(0)).toFox ?~> "Could not read start offset from connectome file" + to <- tryo(fromTo.getLong(1)).toFox ?~> "Could not read end offset from connectome file" } yield Seq.range(from, to) } } yield synapseIdsNested.flatten From 6280991900b7408b9e3cbe0818fc4c9cad7e484d Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 7 Jul 2025 09:51:48 +0200 Subject: [PATCH 099/100] add finishAccess calls to allow cache release --- .../services/connectome/Hdf5ConnectomeFileService.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala index 147676bbaf5..8bdd5dae9cf 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala @@ -86,6 +86,7 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm to <- finishAccessOnFailure(cachedConnectomeFile) { cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keyAgglomeratePairOffsets, 1, toPtr) }.flatMap(_.headOption.toFox) ?~> "Could not read synapses from connectome file" + _ = cachedConnectomeFile.finishAccess() } yield Seq.range(from, to) def synapticPartnerForSynapses(connectomeFileKey: ConnectomeFileKey, @@ -100,6 +101,7 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(synapticPartnerKey(direction), 1, synapseId) }.flatMap(_.headOption.toFox) } + _ = cachedConnectomeFile.finishAccess() } yield agglomerateIds def positionsForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long])( @@ -113,6 +115,7 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm cachedConnectomeFile.uint64Reader.readMatrixBlockWithOffset(keySynapsePositions, 1, 3, synapseId, 0) }.flatMap(_.headOption.toFox) } + _ = cachedConnectomeFile.finishAccess() } yield synapsePositions.map(_.toList) def typesForSynapses(connectomeFileKey: ConnectomeFileKey, synapseIds: List[Long])( @@ -127,6 +130,7 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm cachedConnectomeFile.uint64Reader.readArrayBlockWithOffset(keySynapseTypes, 1, synapseId) }.flatMap(_.headOption.toFox) } + _ = cachedConnectomeFile.finishAccess() } yield SynapseTypesWithLegend(synapseTypes, legacySynapseTypeNames) def synapseIdsForDirectedPair(connectomeFileKey: ConnectomeFileKey, srcAgglomerateId: Long, dstAgglomerateId: Long)( @@ -157,6 +161,7 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm from <- fromAndTo.lift(0).toFox to <- fromAndTo.lift(1).toFox } yield Seq.range(from, to) + _ = cachedConnectomeFile.finishAccess() } yield synapses private def finishAccessOnFailure[T](f: CachedHdf5File)(block: => T)(implicit ec: ExecutionContext): Fox[T] = From 2974e9fa683672689fa5944e85c2f2ea39d7de64 Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 7 Jul 2025 13:17:29 +0200 Subject: [PATCH 100/100] add missing cache clear; flip >= --- .../datastore/controllers/DataSourceController.scala | 4 +++- .../services/connectome/Hdf5ConnectomeFileService.scala | 2 +- .../services/connectome/ZarrConnectomeFileService.scala | 5 +++-- .../datastore/services/mesh/ZarrMeshFileService.scala | 3 ++- .../services/segmentindex/ZarrSegmentIndexFileService.scala | 3 ++- 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 741af212a99..f55638b663e 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -458,6 +458,8 @@ class DataSourceController @Inject()( meshFileService.clearCache(dataSourceId, layerName) val closedSegmentIndexFileHandleCount = segmentIndexFileService.clearCache(dataSourceId, layerName) + val closedConnectomeFileHandleCount = + connectomeFileService.clearCache(dataSourceId, layerName) val reloadedDataSource: InboxDataSource = dataSourceService.dataSourceFromDir( dataSourceService.dataBaseDir.resolve(organizationId).resolve(datasetDirectoryName), organizationId) @@ -465,7 +467,7 @@ class DataSourceController @Inject()( val clearedVaultCacheEntriesOpt = dataSourceService.invalidateVaultCache(reloadedDataSource, layerName) clearedVaultCacheEntriesOpt.foreach { clearedVaultCacheEntries => logger.info( - s"Cleared caches for ${layerName.map(l => s"layer '$l' of ").getOrElse("")}dataset $organizationId/$datasetDirectoryName: closed $closedAgglomerateFileHandleCount agglomerate file handles, $closedMeshFileHandleCount mesh file handles, $closedSegmentIndexFileHandleCount segment index file handles, removed $clearedBucketProviderCount bucketProviders, $clearedVaultCacheEntries vault cache entries and $removedChunksCount image chunk cache entries.") + s"Cleared caches for ${layerName.map(l => s"layer '$l' of ").getOrElse("")}dataset $organizationId/$datasetDirectoryName: closed $closedAgglomerateFileHandleCount agglomerate file handles, $closedMeshFileHandleCount mesh file handles, $closedSegmentIndexFileHandleCount segment index file handles, $closedConnectomeFileHandleCount connectome file handles, removed $clearedBucketProviderCount bucketProviders, $clearedVaultCacheEntries vault cache entries and $removedChunksCount image chunk cache entries.") } reloadedDataSource } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala index 8bdd5dae9cf..cc3d5b6402c 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/Hdf5ConnectomeFileService.scala @@ -45,7 +45,7 @@ class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxIm } ?~> "Could not read offsets from connectome file" fromPtr <- fromAndToPtr.lift(0).toFox ?~> "Could not read start offset from connectome file" toPtr <- fromAndToPtr.lift(1).toFox ?~> "Could not read end offset from connectome file" - _ <- Fox.fromBool(toPtr >= fromPtr) ?~> s"Agglomerate $agglomerateId not present in agglomerate file" + _ <- Fox.fromBool(fromPtr <= toPtr) ?~> s"Agglomerate $agglomerateId not present in agglomerate file" // readArrayBlockWithOffset has a bug and does not return the empty array when block size 0 is passed, hence the if. agglomeratePairs: Array[Long] <- if (toPtr - fromPtr == 0L) Fox.successful(Array.empty[Long]) else diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala index d0cb12a95c8..c2eaf2ec6e0 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/connectome/ZarrConnectomeFileService.scala @@ -56,7 +56,8 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS _ => for { groupVaultPath <- remoteSourceDescriptorService.vaultPathFor(connectomeFileKey.attachment) - groupHeaderBytes <- (groupVaultPath / ConnectomeFileAttributes.FILENAME_ZARR_JSON).readBytes() + groupHeaderBytes <- (groupVaultPath / ConnectomeFileAttributes.FILENAME_ZARR_JSON) + .readBytes() ?~> "Could not read connectome file zarr group file." connectomeFileAttributes <- JsonHelper .parseAs[ConnectomeFileAttributes](groupHeaderBytes) .toFox ?~> "Could not parse connectome file attributes from zarr group file." @@ -118,7 +119,7 @@ class ZarrConnectomeFileService @Inject()(remoteSourceDescriptorService: RemoteS (fromPtr, toPtr) <- getToAndFromPtr(connectomeFileKey, agglomerateId, keyCscIndptr) agglomeratePairOffsetsArray <- openZarrArray(connectomeFileKey, keyAgglomeratePairOffsets) cscAgglomeratePairArray <- openZarrArray(connectomeFileKey, keyCscAgglomeratePair) - _ <- Fox.fromBool(toPtr >= fromPtr) ?~> s"Agglomerate $agglomerateId not present in agglomerate file" + _ <- Fox.fromBool(fromPtr <= toPtr) ?~> s"Agglomerate $agglomerateId not present in agglomerate file" agglomeratePairsMA <- cscAgglomeratePairArray.readAsMultiArray(offset = fromPtr, shape = (toPtr - fromPtr).toInt) agglomeratePairs <- tryo(agglomeratePairsMA.getStorage.asInstanceOf[Array[Long]]).toFox synapseIdsNested <- Fox.serialCombined(agglomeratePairs.toList) { agglomeratePair: Long => diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala index 8f7139af6e8..1c9474a19de 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/mesh/ZarrMeshFileService.scala @@ -72,7 +72,8 @@ class ZarrMeshFileService @Inject()(chunkCacheService: ChunkCacheService, tc: TokenContext): Fox[MeshFileAttributes] = for { groupVaultPath <- remoteSourceDescriptorService.vaultPathFor(meshFileKey.attachment) - groupHeaderBytes <- (groupVaultPath / MeshFileAttributes.FILENAME_ZARR_JSON).readBytes() + groupHeaderBytes <- (groupVaultPath / MeshFileAttributes.FILENAME_ZARR_JSON) + .readBytes() ?~> "Could not read mesh file zarr group file" meshFileAttributes <- JsonHelper .parseAs[MeshFileAttributes](groupHeaderBytes) .toFox ?~> "Could not parse meshFile attributes from zarr group file." diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala index 874f4dd48e5..8852fbc19b3 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/segmentindex/ZarrSegmentIndexFileService.scala @@ -67,7 +67,8 @@ class ZarrSegmentIndexFileService @Inject()(remoteSourceDescriptorService: Remot tc: TokenContext): Fox[SegmentIndexFileAttributes] = for { groupVaultPath <- remoteSourceDescriptorService.vaultPathFor(segmentIndexFileKey.attachment) - groupHeaderBytes <- (groupVaultPath / SegmentIndexFileAttributes.FILENAME_ZARR_JSON).readBytes() + groupHeaderBytes <- (groupVaultPath / SegmentIndexFileAttributes.FILENAME_ZARR_JSON) + .readBytes() ?~> "Could not read segment index file zarr group file" segmentIndexFileAttributes <- JsonHelper .parseAs[SegmentIndexFileAttributes](groupHeaderBytes) .toFox ?~> "Could not parse segment index file attributes from zarr group file."