Skip to content

Commit 27fd209

Browse files
committed
expanded on all comments and made all steps of unsupported data source examples clearer and more reproducible
1 parent 7817357 commit 27fd209

File tree

9 files changed

+218
-110
lines changed

9 files changed

+218
-110
lines changed

examples/idea-examples/unsupported-data-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/exposed/compatibilityLayer.kt

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import org.jetbrains.exposed.v1.core.Table
99
import org.jetbrains.exposed.v1.jdbc.Query
1010
import org.jetbrains.kotlinx.dataframe.AnyFrame
1111
import org.jetbrains.kotlinx.dataframe.DataFrame
12+
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
1213
import org.jetbrains.kotlinx.dataframe.api.convertTo
1314
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
1415
import org.jetbrains.kotlinx.dataframe.codeGen.NameNormalizer
@@ -30,8 +31,9 @@ inline fun <reified T : Any> Iterable<ResultRow>.convertToDataFrame(): DataFrame
3031
convertToDataFrame().convertTo<T>()
3132

3233
/**
33-
* Retrieves all columns of any [Iterable][Iterable]`<`[ResultRow][ResultRow]`>`, like [Query][Query],
34-
* from Exposed row by row and converts the resulting [Map] into a [DataFrame].
34+
* Retrieves all columns of an [Iterable][Iterable]`<`[ResultRow][ResultRow]`>` from Exposed, like [Query][Query],
35+
* row by row and converts the resulting [Map] of lists into a [DataFrame] by calling
36+
* [Map.toDataFrame].
3537
*/
3638
@JvmName("convertToAnyFrame")
3739
fun Iterable<ResultRow>.convertToDataFrame(): AnyFrame {
@@ -62,26 +64,44 @@ val Expression<*>.readableName: String
6264
/**
6365
* Creates a [DataFrameSchema] from the declared [Table] instance.
6466
*
67+
* This is not needed for conversion, but it can be useful to create a DataFrame [@DataSchema][DataSchema] instance.
68+
*
6569
* @param columnNameToAccessor Optional [MutableMap] which will be filled with entries mapping
6670
* the SQL column name to the accessor name from the [Table].
6771
* This can be used to define a [NameNormalizer] later.
72+
* @see toDataFrameSchemaWithNameNormalizer
6873
*/
6974
@Suppress("UNCHECKED_CAST")
7075
fun Table.toDataFrameSchema(columnNameToAccessor: MutableMap<String, String> = mutableMapOf()): DataFrameSchema {
76+
// we use reflection to go over all `Column<*>` properties in the Table object
7177
val columns = this::class.memberProperties
7278
.filter { it.returnType.isSubtypeOf(typeOf<Column<*>>()) }
7379
.associate { prop ->
7480
prop as KProperty1<Table, Column<*>>
7581

76-
// retrieve the actual column name
82+
// retrieve the SQL column name
7783
val columnName = prop.get(this).name
78-
// store the actual column name together with the accessor name in the map
84+
// store the SQL column name together with the accessor name in the map
7985
columnNameToAccessor[columnName] = prop.name
8086

8187
// get the column type from `val a: Column<Type>`
8288
val type = prop.returnType.arguments.first().type!!
8389

90+
// and we add the name and column shema type to the `columns` map :)
8491
columnName to ColumnSchema.Value(type)
8592
}
8693
return DataFrameSchemaImpl(columns)
8794
}
95+
96+
/**
97+
* Creates a [DataFrameSchema] from the declared [Table] instance with a [NameNormalizer] to
98+
* convert the SQL column names to the corresponding Kotlin property names.
99+
*
100+
* This is not needed for conversion, but it can be useful to create a DataFrame [@DataSchema][DataSchema] instance.
101+
*
102+
* @see toDataFrameSchema
103+
*/
104+
fun Table.toDataFrameSchemaWithNameNormalizer(): Pair<DataFrameSchema, NameNormalizer> {
105+
val columnNameToAccessor = mutableMapOf<String, String>()
106+
return Pair(toDataFrameSchema(), NameNormalizer { columnNameToAccessor[it] ?: it })
107+
}

examples/idea-examples/unsupported-data-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/exposed/main.kt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,16 @@ fun main() {
5858

5959
// or make plots using Kandy! It's all up to you
6060

61-
// writing a DataFrame back into an SQL database with Exposed can also be done!
61+
// writing a DataFrame back into an SQL database with Exposed can also be done easily!
6262
transaction(db) {
6363
addLogger(StdOutSqlLogger)
6464

6565
// first delete the original contents
6666
Customers.deleteAll()
6767

68-
// batch insert our rows back into the SQL database
68+
// batch-insert our dataframe back into the SQL database as a sequence of rows
6969
Customers.batchInsert(df.asSequence()) { dfRow ->
70+
// we simply go over each value in the row and put it in the right place in the Exposed statement
7071
for (column in Customers.columns) {
7172
this[column as Column<Any?>] = dfRow[column.name]
7273
}

examples/idea-examples/unsupported-data-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/exposed/tables.kt

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,7 @@ object Customers : Table() {
5151
* This is what we created the [toDataFrameSchema] function for!
5252
*/
5353
fun main() {
54-
val columnNameToAccessor = mutableMapOf<String, String>()
55-
val schema = Customers.toDataFrameSchema(columnNameToAccessor)
54+
val (schema, nameNormalizer) = Customers.toDataFrameSchemaWithNameNormalizer()
5655

5756
// checking whether the schema is converted correctly.
5857
// schema.print()
@@ -62,7 +61,7 @@ fun main() {
6261
// while keeping the correct column names
6362
schema.generateDataClasses(
6463
name = "CustomersDf",
65-
nameNormalizer = NameNormalizer { columnNameToAccessor[it] ?: it },
64+
nameNormalizer = nameNormalizer,
6665
).print()
6766
}
6867

examples/idea-examples/unsupported-data-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/kotlinSpark/typedDataset.kt

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,24 +16,14 @@ import org.jetbrains.kotlinx.dataframe.api.toDataFrame
1616
import org.jetbrains.kotlinx.dataframe.api.toList
1717
import org.jetbrains.kotlinx.spark.api.withSpark
1818

19-
@DataSchema
20-
data class Name(val firstName: String, val lastName: String)
21-
22-
@DataSchema
23-
data class Person(
24-
val name: Name,
25-
val age: Int,
26-
val city: String?,
27-
val weight: Int?,
28-
val isHappy: Boolean,
29-
)
30-
3119
/**
3220
* With the Kotlin Spark API, normal Kotlin data classes are supported,
3321
* meaning we can reuse the same class for Spark and DataFrame!
3422
*
3523
* Also, since we use an actual class to define the schema, we need no type conversion!
3624
*
25+
* See [Person] and [Name] for an example.
26+
*
3727
* NOTE: You will likely need to run this function with Java 8 or 11 for it to work correctly.
3828
* Use the `runKotlinSparkTypedDataset` Gradle task to do so.
3929
*/
@@ -74,3 +64,15 @@ fun main() = withSpark {
7464
sparkDatasetAgain.printSchema()
7565
sparkDatasetAgain.show()
7666
}
67+
68+
@DataSchema
69+
data class Name(val firstName: String, val lastName: String)
70+
71+
@DataSchema
72+
data class Person(
73+
val name: Name,
74+
val age: Int,
75+
val city: String?,
76+
val weight: Int?,
77+
val isHappy: Boolean,
78+
)

0 commit comments

Comments
 (0)