Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package org.broadinstitute.dsde.firecloud.utils

import org.broadinstitute.dsde.firecloud.model.{EntityUpdateDefinition, FlexibleModelSchema, ModelSchema}
import org.broadinstitute.dsde.firecloud.service.TSVFileSupport
import org.broadinstitute.dsde.firecloud.utils.TsvFileSupportBenchmark.TsvData
import org.openjdk.jmh.annotations.{Benchmark, Scope, State}
import org.openjdk.jmh.infra.Blackhole

object TsvFileSupportBenchmark {
@State(Scope.Thread)
class TsvData {
val entityType: String = "sample"
val memberTypeOpt: Option[String] = None
// representation of the inbound TSV row
val row: Seq[String] = Seq(
"0005",
"foo",
"\"some\tquoted\tvalue\"",
"42",
"true",
"-123.456",
"gs://some-bucket/somefile.ext",
"""{"entityType":"targetType", "entityName":"targetName"}""",
"[1,2,3,4,5]"
)
val colInfo: Seq[(String, Option[String])] = Seq(
("sample_id", None),
("string", None),
("quotedstring", None),
("int", None),
("boolean", None),
("double", None),
("file", None),
("reference", None),
("array", None)
)
val modelSchema: ModelSchema = FlexibleModelSchema
val deleteEmptyValues: Boolean = false
}
}

class TsvFileSupportBenchmark {

@Benchmark
def makeEntityRows(blackHole: Blackhole, tsvData: TsvData): EntityUpdateDefinition = {

val result: EntityUpdateDefinition = TsvFileSupportHarness.setAttributesOnEntity(tsvData.entityType,
tsvData.memberTypeOpt,
tsvData.row,
tsvData.colInfo,
tsvData.modelSchema,
tsvData.deleteEmptyValues
)
blackHole.consume(result)
result
}

}

// helper object to get access to TSVFileSupport trait
object TsvFileSupportHarness extends TSVFileSupport {}
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ import scala.concurrent.{ExecutionContext, Future}
import scala.util.{Failure, Success, Try}

/**
* The different types of tsv import/export formats
*/
* The different types of tsv import/export formats
*/
object TsvTypes {
sealed trait TsvType
case object ENTITY extends TsvType {
Expand Down Expand Up @@ -179,31 +179,64 @@ trait TSVFileSupport {
Creates an AttributeValue whose implementation is more closely tied to the value of the input.
*/
def stringToTypedAttribute(value: String): Attribute =
Try(java.lang.Integer.parseInt(value)) match {
case Success(intValue) => AttributeNumber(intValue)
case Failure(_) =>
Try(java.lang.Double.parseDouble(value)) match {
// because we represent AttributeNumber as a BigDecimal, and BigDecimal has no concept of infinity or NaN,
// if we find infinite/NaN numbers here, don't save them as AttributeNumber; instead let them fall through
// to AttributeString.
case Success(doubleValue)
if !Double.NegativeInfinity.equals(doubleValue)
&& !Double.PositiveInfinity.equals(doubleValue)
&& !Double.NaN.equals(doubleValue)
&& !matchesLiteral(value) =>
AttributeNumber(doubleValue)
case _ =>
Try(BooleanUtils.toBoolean(value.toLowerCase, "true", "false")) match {
case Success(booleanValue) => AttributeBoolean(booleanValue)
case Failure(_) =>
Try(value.parseJson.convertTo[AttributeEntityReference]) match {
case Success(ref) => ref
case Failure(_) => AttributeString(value)
}
}
}
if (value.length > 1 && value.startsWith("\"") && value.endsWith("\"")) {
// if this value starts and ends with a quote, it should always be treated as a string
AttributeString(value.substring(1, value.length - 1))
} else {
// else, inspect the value to find an appropriate datatype
toAttribute(value)
}

// the `toAttribute` method checks the first non-whitespace character of inbound TSV cells.
private val possibleNumbers = "0123456789+-." // possible first characters for a number
private val possibleBooleans = "tfTF" // possible first characters for a boolean
private val possibleReferences = "{" // possible first characters for an entity reference
private val possibleNonStrings = possibleNumbers + possibleBooleans + possibleReferences

private def toAttribute(value: String): Attribute = {
val trimmed = value.trim
// empty string
if (trimmed.isEmpty) {
AttributeString(value)
} else {
// find the first character of the inbound cell
val firstChar = trimmed.charAt(0)

// using the first character, try to convert the cell to a number, boolean, or reference
val maybeNonString: Option[Attribute] = firstChar match {
// first char doesn't match any known starters; this is a string
case _ if !possibleNonStrings.contains(firstChar) => Some(AttributeString(value))
// could this be a number?
case _ if possibleNumbers.contains(firstChar) =>
Try(java.lang.Integer.parseInt(value)) match {
case Success(intValue) => Some(AttributeNumber(intValue))
case Failure(_) =>
Try(java.lang.Double.parseDouble(value)) match {
// because we represent AttributeNumber as a BigDecimal, and BigDecimal has no concept of infinity or NaN,
// if we find infinite/NaN numbers here, don't save them as AttributeNumber; instead let them fall through
// to AttributeString.
case Success(doubleValue)
if !Double.NegativeInfinity.equals(doubleValue)
&& !Double.PositiveInfinity.equals(doubleValue)
&& !Double.NaN.equals(doubleValue)
&& !matchesLiteral(value) =>
Some(AttributeNumber(doubleValue))
case _ => None
}
}
// could this be a boolean?
case _ if possibleBooleans.contains(firstChar) =>
Try(BooleanUtils.toBoolean(value.toLowerCase, "true", "false")).toOption.map(AttributeBoolean)
// could this be a boolean?
case _ if possibleReferences.contains(firstChar) =>
Try(value.parseJson.convertTo[AttributeEntityReference]).toOption
}

// if we didn't find one of the other attributes, it's a string.
maybeNonString.getOrElse(AttributeString(value))
}
}

def checkForJson(value: String): Attribute =
Try(value.parseJson) match {
case Success(_: JsObject) => AttributeValueRawJson(value)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ class TSVFileSupportSpec extends AnyFreeSpec with TSVFileSupport {
Double.MinPositiveValue.toString -> AttributeNumber(Double.MinPositiveValue),
Double.MaxValue.toString -> AttributeNumber(Double.MaxValue)
)
val stringTestCases = List("", "string", "true525600", ",")
val stringTestCases = List("", "string", "true525600", ",", "\"")
val referenceTestCases = Map(
"""{"entityType":"targetType","entityName":"targetName"}""" -> AttributeEntityReference("targetType",
"targetName"
Expand Down
Loading