Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,31 +1,68 @@
package org.broadinstitute.dsde.firecloud.utils

import org.broadinstitute.dsde.firecloud.utils.TsvFormatterBenchmark.Inputs
import org.broadinstitute.dsde.firecloud.model.{FlexibleModelSchema, ModelSchema}
import org.broadinstitute.dsde.firecloud.utils.TsvFormatterBenchmark.EntityData
import org.broadinstitute.dsde.rawls.model._
import org.openjdk.jmh.annotations.{Benchmark, Scope, State}
import org.openjdk.jmh.infra.Blackhole

object TsvFormatterBenchmark {

@State(Scope.Thread)
class Inputs {
val inputNoTab = "foo"
val inputWithTab = "foo\tbar"
class EntityData {
val entityType: String = "sample"

val model: ModelSchema = FlexibleModelSchema

val headers: List[String] = List("sample_id", "col1", "col2", "fourth", "last")

val entities: Seq[Entity] = Seq(
Entity(
"1",
entityType,
Map(
AttributeName.withDefaultNS("col1") -> AttributeString("foo"),
AttributeName.withDefaultNS("col2") -> AttributeBoolean(true),
AttributeName.withDefaultNS("fourth") -> AttributeNumber(42),
AttributeName.withDefaultNS("last") -> AttributeString("gs://some-bucket/somefile.ext")
)
),
Entity(
"0005",
entityType,
Map(
AttributeName.withDefaultNS("col1") -> AttributeString("bar"),
AttributeName.withDefaultNS("col2") -> AttributeBoolean(false),
AttributeName.withDefaultNS("fourth") -> AttributeNumber(98.765),
AttributeName.withDefaultNS("last") -> AttributeEntityReference("targetType", "targetName")
)
),
Entity(
"789",
entityType,
Map(
AttributeName.withDefaultNS("col1") -> AttributeString("baz\tqux"),
AttributeName.withDefaultNS("col2") -> AttributeBoolean(true),
AttributeName.withDefaultNS("fourth") -> AttributeNumber(-123.45),
AttributeName.withDefaultNS("last") -> AttributeValueList(
Seq(AttributeString("gs://some-bucket/somefile1.ext"),
AttributeString("gs://some-bucket/somefile2.ext"),
AttributeString("gs://some-bucket/somefile3.ext")
)
)
)
)
)
}

}

class TsvFormatterBenchmark {

@Benchmark
def tsvSafeStringNoTab(blackHole: Blackhole, inputs: Inputs): String = {
val result = TSVFormatter.tsvSafeString(inputs.inputNoTab)
blackHole.consume(result)
result
}

@Benchmark
def tsvSafeStringWithTab(blackHole: Blackhole, inputs: Inputs): String = {
val result = TSVFormatter.tsvSafeString(inputs.inputWithTab)
def makeEntityRows(blackHole: Blackhole, entityData: EntityData): List[List[String]] = {
val result =
TSVFormatter.makeEntityRows(entityData.entityType, entityData.entities, entityData.headers)(entityData.model)
blackHole.consume(result)
result
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I deleted the two example benchmarks and replaced with a benchmark that is actually useful for the current case.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ case class ExportEntitiesByTypeArguments(
workspaceNamespace: String,
workspaceName: String,
entityType: String,
attributeNames: Option[IndexedSeq[String]],
attributeNames: Option[List[String]],
model: Option[String]
)

Expand Down Expand Up @@ -80,7 +80,7 @@ class ExportEntitiesByTypeActor(rawlsDAO: RawlsDAO,
workspaceNamespace: String,
workspaceName: String,
entityType: String,
attributeNames: Option[IndexedSeq[String]],
attributeNames: Option[List[String]],
model: Option[String],
argSystem: ActorSystem
)(implicit protected val executionContext: ExecutionContext)
Expand Down Expand Up @@ -213,7 +213,7 @@ class ExportEntitiesByTypeActor(rawlsDAO: RawlsDAO,

private def streamSingularType(entityQueries: Seq[EntityQuery],
metadata: EntityTypeMetadata,
entityHeaders: IndexedSeq[String]
entityHeaders: List[String]
): Future[File] = {
val tempEntityFile: File = File.newTemporaryFile(prefix = entityType)
val entitySink: Sink[ByteString, Future[IOResult]] = FileIO.toPath(tempEntityFile.path)
Expand Down Expand Up @@ -275,9 +275,9 @@ class ExportEntitiesByTypeActor(rawlsDAO: RawlsDAO,
val membershipSink: Sink[ByteString, Future[IOResult]] = FileIO.toPath(tempMembershipFile.path)

// Headers
val entityHeaders: IndexedSeq[String] =
val entityHeaders: List[String] =
TSVFormatter.makeEntityHeaders(entityType, metadata.attributeNames, attributeNames)
val membershipHeaders: IndexedSeq[String] = TSVFormatter.makeMembershipHeaders(entityType)
val membershipHeaders: List[String] = TSVFormatter.makeMembershipHeaders(entityType)

// Run the Split Entity Flow that pipes entities through the two flows to the two file sinks
// Result of this will be a tuple of Future[IOResult] that represents the success or failure of
Expand Down Expand Up @@ -432,7 +432,7 @@ class ExportEntitiesByTypeActor(rawlsDAO: RawlsDAO,
logger.info(s"completed pairing; result is ${pairs.length} rows")

// TSV headers
val entityHeaders: IndexedSeq[String] = IndexedSeq(s"entity:${entityType}_id", read1Name, read2Name)
val entityHeaders: List[String] = List(s"entity:${entityType}_id", read1Name, read2Name)

// transform the matched pairs into entities
val entities: List[Entity] = pairs.map {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package org.broadinstitute.dsde.firecloud.utils

import com.google.common.annotations.VisibleForTesting
import org.broadinstitute.dsde.rawls.model._
import org.broadinstitute.dsde.firecloud.model._
import org.broadinstitute.dsde.firecloud.service.TsvTypes

import scala.collection.LinearSeq

object TSVFormatter {

// for serializing entity references
Expand All @@ -12,11 +15,12 @@ object TSVFormatter {
/**
* Generate file content from headers and rows.
*
* @param headers IndexedSeq of header string values
* @param rows IndexedSeq of rows, each row an IndexedSeq of string values
* @param headers List of header string values
* @param rows List of rows, each row a List of string values
* @return Headers and rows combined.
*/
def exportToString(headers: IndexedSeq[String], rows: IndexedSeq[IndexedSeq[String]]): String = {
@VisibleForTesting
def exportToString(headers: List[String], rows: List[List[String]]): String = {
val headerString: String = headers.mkString("\t") + "\n"
val rowsString: String = rows.map(_.mkString("\t")).mkString("\n")
headerString + rowsString + "\n"
Expand All @@ -40,30 +44,20 @@ object TSVFormatter {
* Generate a row of values in the same order as the headers.
*
* @param entity The Entity object to extract data from
* @param headerValues List of ordered header values to determine order of values
* @return IndexedSeq of ordered data fields
* @param headerAttributes ordered header values to determine order of values
* @return ordered data fields
*/
private def makeRow(entity: Entity, headerValues: IndexedSeq[String]): IndexedSeq[String] = {
val rowMap: Map[Int, String] = entity.attributes map { case (attributeName, attribute) =>
val columnPosition = headerValues.indexOf(AttributeName.toDelimitedName(attributeName))
val cellValue = tsvSafeAttribute(attribute)
columnPosition -> cellValue
}
// If there are entities that don't have a value for which there is a known header, that will
// be missing in the row. Fill up those positions with empty strings in that case.
val completedRowMap: IndexedSeq[(Int, String)] =
IndexedSeq.range(1, headerValues.size).map { i =>
(i, rowMap.getOrElse(i, ""))
private def makeRow(entity: Entity, headerAttributes: List[AttributeName]): List[String] =
// first column of the TSV is always the entity name
List(tsvSafeString(entity.name)) ++
// remainder of columns are attributes of the entity, or "" if not found on this entity
headerAttributes.tail.map { colname =>
entity.attributes.get(colname) match {
case Some(attrValue) => tsvSafeAttribute(attrValue)
case None => ""
}
}

// This rowMap manipulation:
// 1. sorts the position-value map by the key
// 2. converts it to a seq of tuples
// 3. pulls out the second element of the tuple (column value)
// 4. resulting in a seq of the column values sorted by the column position
entity.name +: completedRowMap.sortBy(_._1).map(_._2).toIndexedSeq
}

/**
* Given an Attribute, creates a string that is safe to output into a TSV as a cell value.
* - if the input attribute contains a tab character, then double-quote it
Expand Down Expand Up @@ -99,11 +93,11 @@ object TSVFormatter {
* Generate a header for a membership file.
*
* @param entityType The EntityType
* @return IndexedSeq of header Strings
* @return ordered header Strings
*/
def makeMembershipHeaders(entityType: String)(implicit modelSchema: ModelSchema): IndexedSeq[String] =
IndexedSeq[String](s"${TsvTypes.MEMBERSHIP}:${entityType}_id",
modelSchema.getCollectionMemberType(entityType).get.getOrElse(entityType.replace("_set", ""))
def makeMembershipHeaders(entityType: String)(implicit modelSchema: ModelSchema): List[String] =
List[String](s"${TsvTypes.MEMBERSHIP}:${entityType}_id",
modelSchema.getCollectionMemberType(entityType).get.getOrElse(entityType.replace("_set", ""))
)

/**
Expand All @@ -115,9 +109,9 @@ object TSVFormatter {
*/
def makeMembershipRows(entityType: String, entities: Seq[Entity])(implicit
modelSchema: ModelSchema
): Seq[IndexedSeq[String]] = {
): List[List[String]] = {
val memberPlural = pluralizeMemberType(memberTypeFromEntityType(entityType, modelSchema), modelSchema)
entities
entities.toList
.filter {
_.entityType == entityType
}
Expand All @@ -130,10 +124,10 @@ object TSVFormatter {
}
.flatMap {
case (_, AttributeEntityReference(`entityType`, entityName)) =>
Seq(IndexedSeq[String](entity.name, entityName))
List(List[String](entity.name, entityName))
case (_, AttributeEntityReferenceList(refs)) =>
refs.map(ref => IndexedSeq[String](entity.name, ref.entityName))
case _ => Seq.empty
refs.toList.map(ref => List[String](entity.name, ref.entityName))
case _ => List.empty
}
}
}
Expand All @@ -146,9 +140,9 @@ object TSVFormatter {
* @param requestedHeaders Which, if any, columns were requested. If none, return allHeaders (subject to sanitization)
* @return Entity name as first column header, followed by matching entity attribute labels
*/
def makeEntityHeaders(entityType: String, allHeaders: Seq[String], requestedHeaders: Option[IndexedSeq[String]])(
implicit modelSchema: ModelSchema
): IndexedSeq[String] = {
def makeEntityHeaders(entityType: String, allHeaders: Seq[String], requestedHeaders: Option[List[String]])(implicit
modelSchema: ModelSchema
): List[String] = {
// will throw exception if firecloud model was requested and the entity type
val memberPlural = pluralizeMemberType(memberTypeFromEntityType(entityType, modelSchema), modelSchema)

Expand Down Expand Up @@ -179,7 +173,7 @@ object TSVFormatter {
s"${TsvTypes.UPDATE}:${entityType}_id"
case _ => s"${TsvTypes.ENTITY}:${entityType}_id"
}
(entityHeader +: requestedHeadersSansId.getOrElse(filteredAllHeaders)).toIndexedSeq
(entityHeader +: requestedHeadersSansId.getOrElse(filteredAllHeaders)).toList
}

/**
Expand All @@ -190,9 +184,9 @@ object TSVFormatter {
* @param headers The universe of available column headers
* @return Ordered list of rows, each row entry value ordered by its corresponding header position
*/
def makeEntityRows(entityType: String, entities: Seq[Entity], headers: IndexedSeq[String])(implicit
def makeEntityRows(entityType: String, entities: Seq[Entity], headers: List[String])(implicit
modelSchema: ModelSchema
): IndexedSeq[IndexedSeq[String]] = {
): List[List[String]] = {
// if we have a set entity, we need to filter out the attribute array of the members so that we only
// have top-level attributes to construct columns from.
val filteredEntities = if (modelSchema.isCollectionType(entityType)) {
Expand All @@ -201,11 +195,14 @@ object TSVFormatter {
} else {
entities
}

// headers as AttributeNames
val headerAttributes: List[AttributeName] = headers.map(AttributeName.fromDelimitedName)

// Turn them into rows
filteredEntities
filteredEntities.toList
.filter(_.entityType == entityType)
.map(entity => makeRow(entity, headers))
.toIndexedSeq
.map(entity => makeRow(entity, headerAttributes))
}

def memberTypeFromEntityType(entityType: String, modelSchema: ModelSchema): String =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ trait CookieAuthedApiService extends Directives with RequestBuilding with LazyLo
post {
formFields(Symbol("FCtoken"), Symbol("attributeNames").?, Symbol("model").?) {
(tokenValue, attributeNamesString, modelString) =>
val attributeNames = attributeNamesString.map(_.split(",").toIndexedSeq)
val attributeNames = attributeNamesString.map(_.split(",").toList)
val userInfo = dummyUserInfo(tokenValue)
val exportArgs = ExportEntitiesByTypeArguments(userInfo,
workspaceNamespace,
Expand All @@ -50,7 +50,7 @@ trait CookieAuthedApiService extends Directives with RequestBuilding with LazyLo
get {
cookie("FCtoken") { tokenCookie =>
parameters(Symbol("attributeNames").?, Symbol("model").?) { (attributeNamesString, modelString) =>
val attributeNames = attributeNamesString.map(_.split(",").toIndexedSeq)
val attributeNames = attributeNamesString.map(_.split(",").toList)
val userInfo = dummyUserInfo(tokenCookie.value)
val exportArgs = ExportEntitiesByTypeArguments(userInfo,
workspaceNamespace,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ trait ExportEntitiesApiService
requireUserInfo() { userInfo =>
get {
parameters(Symbol("attributeNames").?, Symbol("model").?) { (attributeNamesString, modelString) =>
val attributeNames = attributeNamesString.map(_.split(",").toIndexedSeq)
val attributeNames = attributeNamesString.map(_.split(",").toList)
val exportArgs = ExportEntitiesByTypeArguments(userInfo,
workspaceNamespace,
workspaceName,
Expand All @@ -50,7 +50,7 @@ trait ExportEntitiesApiService
} ~
post {
formFields(Symbol("attributeNames").?, Symbol("model").?) { (attributeNamesString, modelString) =>
val attributeNames = attributeNamesString.map(_.split(",").toIndexedSeq)
val attributeNames = attributeNamesString.map(_.split(",").toList)
val model = if (modelString.nonEmpty && StringUtils.isBlank(modelString.get)) None else modelString
val exportArgs = ExportEntitiesByTypeArguments(userInfo,
workspaceNamespace,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class TSVFormatterSpec extends AnyFreeSpec with ScalaFutures with Matchers with
)
results.head should be("entity:sample_id")

val results2 = testEntityDataSet("sample", sampleList, Option(IndexedSeq.empty))
val results2 = testEntityDataSet("sample", sampleList, Option(List.empty))
results2 should contain theSameElementsAs Seq("entity:sample_id",
"sample_type",
"header_1",
Expand All @@ -92,7 +92,7 @@ class TSVFormatterSpec extends AnyFreeSpec with ScalaFutures with Matchers with
)
results2.head should be("entity:sample_id")

val results3 = testEntityDataSet("sample", sampleList, Option(IndexedSeq("")))
val results3 = testEntityDataSet("sample", sampleList, Option(List("")))
results3 should contain theSameElementsAs Seq("entity:sample_id",
"sample_type",
"header_1",
Expand All @@ -102,10 +102,10 @@ class TSVFormatterSpec extends AnyFreeSpec with ScalaFutures with Matchers with
results3.head should be("entity:sample_id")

Seq(
IndexedSeq("header_2", "does_not_exist", "header_1"),
IndexedSeq("header_2", "sample_id", "header_1"),
IndexedSeq("header_1", "header_2"),
IndexedSeq("header_1")
List("header_2", "does_not_exist", "header_1"),
List("header_2", "sample_id", "header_1"),
List("header_1", "header_2"),
List("header_1")
).foreach { requestedHeaders =>
val resultsWithSpecificHeaders =
testEntityDataSet("sample", sampleList, Option(requestedHeaders), TsvTypes.UPDATE)
Expand All @@ -115,7 +115,7 @@ class TSVFormatterSpec extends AnyFreeSpec with ScalaFutures with Matchers with

testEntityDataSet("sample",
sampleList,
Option(IndexedSeq("participant"))
Option(List("participant"))
) should contain theSameElementsInOrderAs Seq("entity:sample_id", "participant")

}
Expand Down Expand Up @@ -287,7 +287,7 @@ class TSVFormatterSpec extends AnyFreeSpec with ScalaFutures with Matchers with

private def testEntityDataSet(entityType: String,
entities: List[Entity],
requestedHeaders: Option[IndexedSeq[String]],
requestedHeaders: Option[List[String]],
tsvType: TsvType = TsvTypes.ENTITY
) = {

Expand Down Expand Up @@ -323,7 +323,7 @@ class TSVFormatterSpec extends AnyFreeSpec with ScalaFutures with Matchers with
): Unit = {
val tsvHeaders = TSVFormatter.makeMembershipHeaders(entityType)
val tsvRows = TSVFormatter.makeMembershipRows(entityType, entities)
val tsv = TSVFormatter.exportToString(tsvHeaders, tsvRows.toIndexedSeq)
val tsv = TSVFormatter.exportToString(tsvHeaders, tsvRows)
tsv shouldNot be(empty)

val lines: List[String] = Source.fromString(tsv).getLines().toList
Expand Down
Loading