Skip to content

Stricter Checks on Paths in DataSources #8741

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jul 23, 2025
2 changes: 2 additions & 0 deletions conf/messages
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ dataVault.setup.failed=Failed to set up remote file system
dataVault.getPath.failed=Failed to get remote path

dataSource.notFound=Datasource not found on datastore server. Might still be initializing.
dataSource.add.pathsNotAllowed=Cannot directly add a datasource with local paths that leave the dataset, or with paths that match the WEBKNOSSOS object storage.
dataSource.update.newExplicitPaths=Cannot update a dataset with new explicit paths. To add mags or layers, please use the compose functionality.

dataStore.list.failed=Failed to retrieve list of data stores.
dataStore.notFound=DataStore not found.
Expand Down
4 changes: 2 additions & 2 deletions test/e2e/End2EndSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import play.api.libs.ws.{WSClient, WSResponse}
import play.api.test.WithServer

import java.io.File
import java.nio.file.Paths
import java.nio.file.Path
import scala.concurrent.Await
import scala.concurrent.duration._
import scala.sys.process._
Expand Down Expand Up @@ -64,7 +64,7 @@ class End2EndSpec(arguments: Arguments) extends Specification with GuiceFakeAppl
if (!dataDirectory.listFiles().exists(_.getName == "test-dataset"))
ZipIO.unzipToDirectory(
testDatasetZip,
Paths.get(dataDirectory.toPath.toString, "test-dataset"),
Path.of(dataDirectory.toPath.toString, "test-dataset"),
includeHiddenFiles = true,
hiddenFilesWhitelist = List(),
truncateCommonPrefix = true,
Expand Down
10 changes: 5 additions & 5 deletions util/src/main/scala/com/scalableminds/util/io/PathUtils.scala
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package com.scalableminds.util.io

import java.io.File
import java.nio.file.{Path, _}
import java.nio.file._
import com.typesafe.scalalogging.LazyLogging
import com.scalableminds.util.tools.Box.tryo
import com.scalableminds.util.tools.{Box, Failure, Full}
Expand Down Expand Up @@ -42,7 +42,7 @@ trait PathUtils extends LazyLogging {
val elements = p1.iterator.asScala.zip(p2.iterator.asScala).takeWhile(Function.tupled(_ == _)).map(_._1)
val joined = elements.mkString("/")
val absoluteIfNeeded = if (p1.startsWith("/")) f"/$joined" else joined
Paths.get(absoluteIfNeeded)
Path.of(absoluteIfNeeded)
}

def commonPrefix(ps: List[Path]): Path =
Expand Down Expand Up @@ -162,14 +162,14 @@ trait PathUtils extends LazyLogging {
lastCutOffIndex match {
case -1 => path
// subpath(0, 0) is forbidden, therefore we handle this special case ourselves
case 0 => Paths.get("")
case 0 => Path.of("")
case i => path.subpath(0, i)
}
}

// Remove a single file name from previously computed common prefix
def removeSingleFileNameFromPrefix(prefix: Path, fileNames: List[String]): Path = {
def isFileNameInPrefix(prefix: Path, fileName: String) = prefix.endsWith(Paths.get(fileName).getFileName)
def isFileNameInPrefix(prefix: Path, fileName: String) = prefix.endsWith(Path.of(fileName).getFileName)

fileNames match {
case head :: tail if tail.isEmpty && isFileNameInPrefix(prefix, head) =>
Expand All @@ -180,7 +180,7 @@ trait PathUtils extends LazyLogging {

private def removeOneName(path: Path): Path =
if (path.getNameCount == 1) {
Paths.get("")
Path.of("")
} else path.getParent

def deleteDirectoryRecursively(path: Path): Box[Unit] = {
Expand Down
18 changes: 9 additions & 9 deletions util/src/main/scala/com/scalableminds/util/io/ZipIO.scala
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package com.scalableminds.util.io

import java.io._
import java.nio.file.{Files, Path, Paths}
import java.nio.file.{Files, Path}
import java.util.zip.{GZIPOutputStream => DefaultGZIPOutputStream, _}
import com.scalableminds.util.tools.{Fox, FoxImplicits, TextUtils}
import com.typesafe.scalalogging.LazyLogging
Expand Down Expand Up @@ -178,23 +178,23 @@ object ZipIO extends LazyLogging with FoxImplicits {

val zipEntries = zip.entries.asScala.filter { e: ZipEntry =>
!e.isDirectory && (includeHiddenFiles || !isFileHidden(e) || hiddenFilesWhitelist.contains(
Paths.get(e.getName).getFileName.toString))
Path.of(e.getName).getFileName.toString))
}.toList

val commonPrefix = if (truncateCommonPrefix) {
val commonPrefixNotFixed = PathUtils.commonPrefix(zipEntries.map(e => Paths.get(e.getName)))
val commonPrefixNotFixed = PathUtils.commonPrefix(zipEntries.map(e => Path.of(e.getName)))
val strippedPrefix =
PathUtils.cutOffPathAtLastOccurrenceOf(commonPrefixNotFixed, excludeFromPrefix.getOrElse(List.empty))
PathUtils.removeSingleFileNameFromPrefix(strippedPrefix, zipEntries.map(_.getName))
} else {
Paths.get("")
Path.of("")
}

val resultFox = zipEntries.foldLeft[Fox[List[A]]](Fox.successful(List.empty)) { (results, entry) =>
results.shiftBox.map {
case Full(rs) =>
val input: InputStream = zip.getInputStream(entry)
val path = commonPrefix.relativize(Paths.get(entry.getName))
val path = commonPrefix.relativize(Path.of(entry.getName))
val innerResultFox: Fox[List[A]] = Fox.fromFutureBox(f(path, input).futureBox.map {
case Full(result) =>
input.close()
Expand Down Expand Up @@ -230,16 +230,16 @@ object ZipIO extends LazyLogging with FoxImplicits {

val zipEntries = zip.entries.asScala.filter { e: ZipEntry =>
!e.isDirectory && (includeHiddenFiles || !isFileHidden(e) || hiddenFilesWhitelist.contains(
Paths.get(e.getName).getFileName.toString))
Path.of(e.getName).getFileName.toString))
}.toList

val commonPrefix = if (truncateCommonPrefix) {
val commonPrefixNotFixed = PathUtils.commonPrefix(zipEntries.map(e => Paths.get(e.getName)))
val commonPrefixNotFixed = PathUtils.commonPrefix(zipEntries.map(e => Path.of(e.getName)))
val strippedPrefix =
PathUtils.cutOffPathAtLastOccurrenceOf(commonPrefixNotFixed, excludeFromPrefix.getOrElse(List.empty))
PathUtils.removeSingleFileNameFromPrefix(strippedPrefix, zipEntries.map(_.getName))
} else {
Paths.get("")
Path.of("")
}

val result = zipEntries.foldLeft[Box[List[A]]](Full(Nil)) { (results, entry) =>
Expand All @@ -248,7 +248,7 @@ object ZipIO extends LazyLogging with FoxImplicits {
var input: InputStream = null
try {
input = zip.getInputStream(entry)
val path = commonPrefix.relativize(Paths.get(entry.getName))
val path = commonPrefix.relativize(Path.of(entry.getName))
val r = f(path, input) match {
case Full(result) =>
Full(rs :+ result)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import com.scalableminds.webknossos.datastore.services._
import com.scalableminds.webknossos.datastore.services.mesh.{MeshFileService, MeshMappingHelper}
import com.scalableminds.webknossos.datastore.services.segmentindex.SegmentIndexFileService
import com.scalableminds.webknossos.datastore.services.uploading._
import com.scalableminds.webknossos.datastore.storage.DataVaultService
import com.scalableminds.webknossos.datastore.storage.{DataVaultService, RemoteSourceDescriptorService}
import com.scalableminds.util.tools.Box.tryo
import com.scalableminds.util.tools.{Box, Empty, Failure, Full}
import com.scalableminds.webknossos.datastore.services.connectome.{
Expand Down Expand Up @@ -56,6 +56,7 @@ class DataSourceController @Inject()(
agglomerateService: AgglomerateService,
storageUsageService: DSUsedStorageService,
datasetErrorLoggingService: DSDatasetErrorLoggingService,
remoteSourceDescriptorService: RemoteSourceDescriptorService,
exploreRemoteLayerService: ExploreRemoteLayerService,
uploadService: UploadService,
composeService: ComposeService,
Expand Down Expand Up @@ -385,7 +386,9 @@ class DataSourceController @Inject()(
for {
dataSource <- dataSourceRepository.get(DataSourceId(datasetDirectoryName, organizationId)).toFox ?~> Messages(
"dataSource.notFound") ~> NOT_FOUND
_ <- dataSourceService.updateDataSource(request.body.copy(id = dataSource.id), expectExisting = true)
_ <- dataSourceService.updateDataSource(request.body.copy(id = dataSource.id),
expectExisting = true,
preventNewPaths = true)
} yield Ok
}
}
Expand All @@ -395,6 +398,9 @@ class DataSourceController @Inject()(
Action.async(validateJson[DataSource]) { implicit request =>
accessTokenService.validateAccessFromTokenContext(UserAccessRequest.administrateDataSources) {
for {
_ <- Fox.fromBool(
request.body.allExplicitPaths
.forall(remoteSourceDescriptorService.pathIsAllowedToAddDirectly)) ?~> "dataSource.add.pathsNotAllowed"
reservedAdditionalInfo <- dsRemoteWebknossosClient.reserveDataSourceUpload(
ReserveUploadInformation(
uploadId = "", // Set by core backend
Expand All @@ -410,7 +416,9 @@ class DataSourceController @Inject()(
)
) ?~> "dataset.upload.validation.failed"
datasourceId = DataSourceId(reservedAdditionalInfo.directoryName, organizationId)
_ <- dataSourceService.updateDataSource(request.body.copy(id = datasourceId), expectExisting = false)
_ <- dataSourceService.updateDataSource(request.body.copy(id = datasourceId),
expectExisting = false,
preventNewPaths = false)
uploadedDatasetId <- dsRemoteWebknossosClient.reportUpload(datasourceId,
0L,
needsConversion = false,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package com.scalableminds.webknossos.datastore.controllers

import java.nio.file.{Files, Path, Paths}
import java.nio.file.{Files, Path}
import com.google.inject.Inject
import com.scalableminds.util.tools.{Fox, FoxImplicits}
import com.scalableminds.webknossos.datastore.DataStoreConfig
Expand Down Expand Up @@ -30,7 +30,7 @@ class ExportsController @Inject()(webknossosClient: DSRemoteWebknossosClient,
extends Controller
with FoxImplicits {

private val dataBaseDir: Path = Paths.get(config.Datastore.baseDirectory)
private val dataBaseDir: Path = Path.of(config.Datastore.baseDirectory)

override def allowRemoteOrigin: Boolean = true

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import org.apache.commons.lang3.builder.HashCodeBuilder

import java.nio.ByteBuffer
import java.nio.channels.{AsynchronousFileChannel, CompletionHandler}
import java.nio.file.{Files, Path, Paths, StandardOpenOption}
import java.nio.file.{Files, Path, StandardOpenOption}
import java.util.stream.Collectors
import scala.concurrent.{ExecutionContext, Promise}
import scala.jdk.CollectionConverters._
Expand Down Expand Up @@ -96,7 +96,7 @@ class FileSystemDataVault extends DataVault {
for {
_ <- Fox.fromBool(uri.getScheme == null || uri.getScheme == DataVaultService.schemeFile) ?~> "trying to read from FileSystemDataVault, but uri scheme is not file or null"
_ <- Fox.fromBool(uri.getHost == null || uri.getHost.isEmpty) ?~> s"trying to read from FileSystemDataVault, but hostname ${uri.getHost} is non-empty"
localPath = Paths.get(uri.getPath)
localPath = Path.of(uri.getPath)
_ <- Fox.fromBool(localPath.isAbsolute) ?~> "trying to read from FileSystemDataVault, but hostname is non-empty"
} yield localPath
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,9 @@ trait DataLayerLike {
case _ => None
}

def allExplicitPaths: Seq[String] =
magsOpt.map(_.flatMap(_.path)).orElse(wkwResolutionsOpt.map(_.flatMap(_.path))).getOrElse(Seq.empty) ++
attachments.map(_.allAttachments.map(_.path.toString)).getOrElse(Seq.empty)
}

object DataLayerLike {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ package object datasource {

def withUpdatedId(newId: DataSourceId): GenericDataSource[T] = copy(id = newId)

def allExplicitPaths: Seq[String] = dataLayers.flatMap(_.allExplicitPaths)
}

object GenericDataSource {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package com.scalableminds.webknossos.datastore.services

import java.nio.file.Paths
import java.nio.file.Path
import com.scalableminds.webknossos.datastore.DataStoreConfig
import com.scalableminds.webknossos.datastore.services.mapping.AgglomerateService
import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService
Expand All @@ -23,7 +23,7 @@ class BinaryDataServiceHolder @Inject()(config: DataStoreConfig,
agglomerateService: AgglomerateService)(implicit ec: ExecutionContext) {

val binaryDataService: BinaryDataService = new BinaryDataService(
Paths.get(config.Datastore.baseDirectory),
Path.of(config.Datastore.baseDirectory),
Some(agglomerateService),
Some(remoteSourceDescriptorService),
Some(chunkCacheService.sharedChunkContentsCache),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import com.scalableminds.util.tools.Box.tryo
import org.apache.commons.io.FileUtils
import play.api.libs.json.{Json, OFormat}

import java.nio.file.{Files, Path, Paths}
import java.nio.file.{Files, Path}
import javax.inject.Inject
import scala.concurrent.ExecutionContext

Expand All @@ -30,7 +30,7 @@ class DSUsedStorageService @Inject()(config: DataStoreConfig)(implicit ec: Execu
extends FoxImplicits
with LazyLogging {

private val baseDir: Path = Paths.get(config.Datastore.baseDirectory)
private val baseDir: Path = Path.of(config.Datastore.baseDirectory)

private def noSymlinksFilter(p: Path) = !Files.isSymbolicLink(p)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import play.api.libs.json.Json

import java.io.{File, FileWriter}
import java.net.URI
import java.nio.file.{Files, Path, Paths}
import java.nio.file.{Files, Path}
import scala.concurrent.ExecutionContext
import scala.concurrent.duration._
import scala.io.Source
Expand All @@ -46,10 +46,10 @@ class DataSourceService @Inject()(

override protected def tickerInitialDelay: FiniteDuration = config.Datastore.WatchFileSystem.initialDelay

val dataBaseDir: Path = Paths.get(config.Datastore.baseDirectory)
val dataBaseDir: Path = Path.of(config.Datastore.baseDirectory)

private val propertiesFileName = Paths.get(GenericDataSource.FILENAME_DATASOURCE_PROPERTIES_JSON)
private val logFileName = Paths.get("datasource-properties-backups.log")
private val propertiesFileName = Path.of(GenericDataSource.FILENAME_DATASOURCE_PROPERTIES_JSON)
private val logFileName = Path.of("datasource-properties-backups.log")

private var inboxCheckVerboseCounter = 0

Expand Down Expand Up @@ -138,7 +138,7 @@ class DataSourceService @Inject()(
if (isRemote) {
MagPathInfo(dataLayer.name, mag.mag, magURI.toString, magURI.toString, hasLocalData = false)
} else {
val magPath = Paths.get(magURI)
val magPath = Path.of(magURI)
val realPath = magPath.toRealPath()
// Does this dataset have local data, i.e. the data that is referenced by the mag path is within the dataset directory
val isLocal = realPath.startsWith(datasetPath.toAbsolutePath)
Expand Down Expand Up @@ -269,20 +269,38 @@ class DataSourceService @Inject()(
}
}

def updateDataSource(dataSource: DataSource, expectExisting: Boolean): Fox[Unit] = {
def updateDataSource(dataSource: DataSource, expectExisting: Boolean, preventNewPaths: Boolean): Fox[Unit] = {
val organizationDir = dataBaseDir.resolve(dataSource.id.organizationId)
val dataSourcePath = organizationDir.resolve(dataSource.id.directoryName)
for {
_ <- validateDataSource(dataSource, organizationDir).toFox
propertiesFile = dataSourcePath.resolve(propertiesFileName)
_ <- Fox.runIf(!expectExisting)(ensureDirectoryBox(dataSourcePath).toFox)
_ <- Fox.runIf(!expectExisting)(Fox.fromBool(!Files.exists(propertiesFile))) ?~> "dataSource.alreadyPresent"
_ <- Fox.runIf(expectExisting && preventNewPaths)(assertNoNewPaths(dataSourcePath, dataSource)) ?~> "dataSource.update.newExplicitPaths"
_ <- Fox.runIf(expectExisting)(backupPreviousProperties(dataSourcePath).toFox) ?~> "Could not update datasource-properties.json"
_ <- JsonHelper.writeToFile(propertiesFile, dataSource).toFox ?~> "Could not update datasource-properties.json"
_ <- dataSourceRepository.updateDataSource(dataSource)
} yield ()
}

private def assertNoNewPaths(dataSourcePath: Path, newDataSource: DataSource): Fox[Unit] = {
val propertiesPath = dataSourcePath.resolve(propertiesFileName)
if (Files.exists(propertiesPath)) {
Fox
.runOptional(newDataSource.toUsable) { newUsableDataSource =>
Fox.runOptional(dataSourceFromDir(dataSourcePath, newDataSource.id.organizationId).toUsable) {
oldUsableDataSource =>
val oldPaths = oldUsableDataSource.allExplicitPaths.toSet
Fox.fromBool(newUsableDataSource.allExplicitPaths.forall(oldPaths.contains))
}
}
.map(_ => ())
} else {
Fox.successful(())
}
}

private def backupPreviousProperties(dataSourcePath: Path): Box[Unit] = {
val propertiesFile = dataSourcePath.resolve(propertiesFileName)
val previousContentOrEmpty = if (Files.exists(propertiesFile)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import org.apache.commons.io.FilenameUtils
import play.api.i18n.{Messages, MessagesProvider}
import play.api.libs.json.{Json, OFormat}

import java.nio.file.Paths
import java.nio.file.Path
import javax.inject.Inject
import scala.collection.mutable.ListBuffer
import scala.concurrent.ExecutionContext
Expand Down Expand Up @@ -89,7 +89,7 @@ class ConnectomeFileService @Inject()(config: DataStoreConfig,
extends FoxImplicits
with LazyLogging {

private val dataBaseDir = Paths.get(config.Datastore.baseDirectory)
private val dataBaseDir = Path.of(config.Datastore.baseDirectory)
private val localConnectomesDir = "connectomes"
private val hdf5ConnectomeFileExtension = "hdf5"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ import com.scalableminds.webknossos.datastore.services.connectome.SynapticPartne
import com.scalableminds.webknossos.datastore.storage.{CachedHdf5File, Hdf5FileCache}
import com.scalableminds.webknossos.datastore.DataStoreConfig

import java.nio.file.Paths
import java.nio.file.Path
import javax.inject.Inject
import scala.concurrent.ExecutionContext

class Hdf5ConnectomeFileService @Inject()(config: DataStoreConfig) extends FoxImplicits with ConnectomeFileUtils {

private val dataBaseDir = Paths.get(config.Datastore.baseDirectory)
private val dataBaseDir = Path.of(config.Datastore.baseDirectory)

private lazy val fileHandleCache = new Hdf5FileCache(30)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import com.scalableminds.webknossos.datastore.storage.{AgglomerateFileKey, Remot
import com.typesafe.scalalogging.LazyLogging
import org.apache.commons.io.FilenameUtils

import java.nio.file.Paths
import java.nio.file.Path
import javax.inject.Inject
import scala.concurrent.ExecutionContext
import scala.concurrent.duration.DurationInt
Expand All @@ -34,7 +34,7 @@ class AgglomerateService @Inject()(config: DataStoreConfig,
with FoxImplicits {
private val localAgglomeratesDir = "agglomerates"
private val hdf5AgglomerateFileExtension = "hdf5"
private val dataBaseDir = Paths.get(config.Datastore.baseDirectory)
private val dataBaseDir = Path.of(config.Datastore.baseDirectory)

private val agglomerateFileKeyCache
: AlfuCache[(DataSourceId, String, String), AgglomerateFileKey] = AlfuCache() // dataSourceId, layerName, mappingName → AgglomerateFileKey
Expand Down
Loading