From 7a0a377734293655e3ee07b23b41e566b2210f6c Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Wed, 11 Jun 2025 19:42:43 +0400 Subject: [PATCH 1/2] add nulls parse option in readExcel --- .../jetbrains/kotlinx/dataframe/io/xlsx.kt | 38 +++++++++++++++--- .../kotlinx/dataframe/io/XlsxTest.kt | 18 +++++++++ .../src/test/resources/withNulls.xlsx | Bin 0 -> 3494 bytes 3 files changed, 50 insertions(+), 6 deletions(-) create mode 100644 dataframe-excel/src/test/resources/withNulls.xlsx diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt index 3f39c57837..c266512755 100644 --- a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt +++ b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt @@ -98,6 +98,8 @@ private fun setWorkbookTempDirectory() { * when set to false, it operates as [NameRepairStrategy.MAKE_UNIQUE], * ensuring unique column names will make the columns be named according to excel columns, like "A", "B", "C" etc. * for unstructured data. + * @param parseEmptyAsNull when set to true, empty strings in cells are parsed as null (default true). + * These cells are ignored when inferring the column’s type. */ public fun DataFrame.Companion.readExcel( url: URL, @@ -108,6 +110,7 @@ public fun DataFrame.Companion.readExcel( rowsCount: Int? = null, nameRepairStrategy: NameRepairStrategy = NameRepairStrategy.CHECK_UNIQUE, firstRowIsHeader: Boolean = true, + parseEmptyAsNull: Boolean = true, ): AnyFrame { setWorkbookTempDirectory() val wb = WorkbookFactory.create(url.openStream()) @@ -121,6 +124,7 @@ public fun DataFrame.Companion.readExcel( rowsCount, nameRepairStrategy, firstRowIsHeader, + parseEmptyAsNull ) } } @@ -138,6 +142,8 @@ public fun DataFrame.Companion.readExcel( * when set to false, it operates as [NameRepairStrategy.MAKE_UNIQUE], * ensuring unique column names will make the columns be named according to excel columns, like "A", "B", "C" etc. * for unstructured data. + * @param parseEmptyAsNull when set to true, empty strings in cells are parsed as null (default true). + * These cells are ignored when inferring the column’s type. */ public fun DataFrame.Companion.readExcel( file: File, @@ -148,6 +154,7 @@ public fun DataFrame.Companion.readExcel( rowsCount: Int? = null, nameRepairStrategy: NameRepairStrategy = NameRepairStrategy.CHECK_UNIQUE, firstRowIsHeader: Boolean = true, + parseEmptyAsNull: Boolean = true, ): AnyFrame { setWorkbookTempDirectory() @Suppress("ktlint:standard:comment-wrapping") @@ -162,6 +169,7 @@ public fun DataFrame.Companion.readExcel( rowsCount, nameRepairStrategy, firstRowIsHeader, + parseEmptyAsNull ) } } @@ -179,6 +187,8 @@ public fun DataFrame.Companion.readExcel( * when set to false, it operates as [NameRepairStrategy.MAKE_UNIQUE], * ensuring unique column names will make the columns be named according to excel columns, like "A", "B", "C" etc. * for unstructured data. + * @param parseEmptyAsNull when set to true, empty strings in cells are parsed as null (default true). + * These cells are ignored when inferring the column’s type. */ public fun DataFrame.Companion.readExcel( fileOrUrl: String, @@ -189,6 +199,7 @@ public fun DataFrame.Companion.readExcel( rowsCount: Int? = null, nameRepairStrategy: NameRepairStrategy = NameRepairStrategy.CHECK_UNIQUE, firstRowIsHeader: Boolean = true, + parseEmptyAsNull: Boolean = true, ): AnyFrame = readExcel( asUrl(fileOrUrl), @@ -199,6 +210,7 @@ public fun DataFrame.Companion.readExcel( rowsCount, nameRepairStrategy, firstRowIsHeader, + parseEmptyAsNull ) /** @@ -214,6 +226,8 @@ public fun DataFrame.Companion.readExcel( * when set to false, it operates as [NameRepairStrategy.MAKE_UNIQUE], * ensuring unique column names will make the columns be named according to excel columns, like "A", "B", "C" etc. * for unstructured data. + * @param parseEmptyAsNull when set to true, empty strings in cells are parsed as null (default true). + * These cells are ignored when inferring the column’s type. */ public fun DataFrame.Companion.readExcel( inputStream: InputStream, @@ -224,6 +238,7 @@ public fun DataFrame.Companion.readExcel( rowsCount: Int? = null, nameRepairStrategy: NameRepairStrategy = NameRepairStrategy.CHECK_UNIQUE, firstRowIsHeader: Boolean = true, + parseEmptyAsNull: Boolean = true, ): AnyFrame { setWorkbookTempDirectory() val wb = WorkbookFactory.create(inputStream) @@ -237,6 +252,7 @@ public fun DataFrame.Companion.readExcel( rowsCount, nameRepairStrategy, firstRowIsHeader, + parseEmptyAsNull ) } } @@ -255,6 +271,8 @@ public fun DataFrame.Companion.readExcel( * when set to false, it operates as [NameRepairStrategy.MAKE_UNIQUE], * ensuring unique column names will make the columns be named according to excel columns, like "A", "B", "C" etc. * for unstructured data. + * @param parseEmptyAsNull when set to true, empty strings in cells are parsed as null (default true). + * These cells are ignored when inferring the column’s type. */ public fun DataFrame.Companion.readExcel( wb: Workbook, @@ -265,11 +283,12 @@ public fun DataFrame.Companion.readExcel( rowsCount: Int? = null, nameRepairStrategy: NameRepairStrategy = NameRepairStrategy.CHECK_UNIQUE, firstRowIsHeader: Boolean = true, + parseEmptyAsNull: Boolean = true, ): AnyFrame { val sheet: Sheet = sheetName ?.let { wb.getSheet(it) ?: error("Sheet with name $sheetName not found") } ?: wb.getSheetAt(0) - return readExcel(sheet, columns, formattingOptions, skipRows, rowsCount, nameRepairStrategy, firstRowIsHeader) + return readExcel(sheet, columns, formattingOptions, skipRows, rowsCount, nameRepairStrategy, firstRowIsHeader, parseEmptyAsNull) } /** @@ -312,6 +331,7 @@ public fun DataFrame.Companion.readExcel( rowsCount: Int? = null, nameRepairStrategy: NameRepairStrategy = NameRepairStrategy.CHECK_UNIQUE, firstRowIsHeader: Boolean = true, + parseEmptyAsNull: Boolean = true, ): AnyFrame { val columnIndexes: Iterable = when { columns != null -> getColumnIndices(columns) @@ -364,12 +384,18 @@ public fun DataFrame.Companion.readExcel( ) columnNameCounters[nameFromCell] = columnNameCounters.getOrDefault(nameFromCell, 0) + 1 // increase the counter for specific column name - val getCellValue: (Cell?) -> Any? = when { - formattingOptions != null && index in formattingOptions.columnIndices -> { cell: Cell? -> - formattingOptions.formatter.formatCellValue(cell) + val getCellValue: (Cell?) -> Any? = { cell -> + if (cell == null) { + null + } else { + val rawValue: Any? = if (formattingOptions != null && index in formattingOptions.columnIndices) { + formattingOptions.formatter.formatCellValue(cell) + } else { + cell.cellValue(sheet.sheetName) + } + if (parseEmptyAsNull && rawValue is String && rawValue.isEmpty()) null + else rawValue } - - else -> { cell -> cell.cellValue(sheet.sheetName) } } val values: List = valueRowsRange.map { val row: Row? = sheet.getRow(it) diff --git a/dataframe-excel/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/XlsxTest.kt b/dataframe-excel/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/XlsxTest.kt index faeb91d608..bb03e37e21 100644 --- a/dataframe-excel/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/XlsxTest.kt +++ b/dataframe-excel/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/XlsxTest.kt @@ -6,11 +6,14 @@ import kotlinx.datetime.LocalDateTime import org.apache.poi.ss.usermodel.WorkbookFactory import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.api.concat +import org.jetbrains.kotlinx.dataframe.api.convert import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.api.toColumn +import org.jetbrains.kotlinx.dataframe.api.toInt import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException import org.jetbrains.kotlinx.dataframe.impl.DataFrameSize import org.jetbrains.kotlinx.dataframe.size +import org.jetbrains.kotlinx.dataframe.type import org.junit.Test import java.net.URL import java.nio.file.Files @@ -53,6 +56,7 @@ class XlsxTest { "Sheet1", columns = "A:C", stringColumns = StringColumns("A:C"), + parseEmptyAsNull = false, ) df shouldBe dataFrameOf("col1", "col2", "C")("1", "", "3") } @@ -216,6 +220,7 @@ class XlsxTest { firstRowIsHeader = false, skipRows = 2, rowsCount = 1, + parseEmptyAsNull = false, ) df shouldBe dataFrameOf( @@ -224,4 +229,17 @@ class XlsxTest { "Field 3: ", "", "TEAM 1", "", "", "", "", "Staff Code:", "Staff 1", "", ) } + + @Test + fun `read columns with nulls`() { + val df = DataFrame.readExcel( + testResource("withNulls.xlsx"), + ).convert("age").toInt() + df shouldBe dataFrameOf( + "name" to listOf("Alice", null, "Bob"), + "age" to listOf(23, 27, null), + ) + df["name"].type shouldBe typeOf() + df["age"].type shouldBe typeOf() + } } diff --git a/dataframe-excel/src/test/resources/withNulls.xlsx b/dataframe-excel/src/test/resources/withNulls.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..25f3506a2eae2c15a7514caadd027bb1515367e7 GIT binary patch literal 3494 zcmai12T)Vn76n4+NJo^Wh)O4fgsOBK0RnYf&~>`u7LRX&vD5wS}Ma-d_P(_$QcsK3tf&{K=ADDbXm5 z$^E_kMaPS#A05iw%gwd5xB}{*1vE%3OE^td69T2I=KM`;ldTZ^$6#pYri(9|?f9uB zAFTEjsZD=AK>I)_T}Md7M{6K9y$z>#E7V9DML*#UtKyRQ%%av?=~Og3#b{nxQt3A| zymyw+%$IKnMM@QaTsvJl!u+1rd&~waGQy5G?%+4-FVhx-x)bW%ZIzM3{H#azv2KL( z4)ln1YUT?hxrsg4j}zqLKq?e%Fh3-`r@`{kd zzv=@yI4XaH;)6QEtX?j$^GgP?7o;4*z6}#kqyl-gGj_Jzf~_ZhH9g(h!JgdvL3U65 zy6jhK9XY8zr6!y$_v&WD72@n4DgvBPu2y= zu@kHhw~>gKz#Ps45e%fTSM+=d{li5>z0)U$Ye?jXaXTEU_Q4|&FI;xyga^ES1EYA>%DnYN4}4U;$`@S38-u9gcL)QI{p zySKP>kgl|6A=dzhe>bt5^!m$D(2LZ$F_x*23n40Yi)UEc64u9#)j(tP8+)FKcVV?E z=bzlAFUR7uq#UgEu3CBb2UIJSpG%!TlDjerg)yHvg-LRuLA_g@Fpb3UMoKxZ5xZAU zb>V3RnSu;uudVb8_XW}bC}E%sh`%?%q0*)K6H4rq7aXwm1|C>9Pgz?xH-M)4(PSnH zO?8%nPBf8*=QMS?6N4FUFKDVC$Qe}VL~E7ks*k3>nrwr_U(?q9)usC#p(9YBO-n(+77nNJ0chI-~aPGc3m~zBT8*8Dk z@lM#Or%`by1s;9|>*lZVnL4uWr5`?pjJ2y6<^J}GDQM3c2qjdj1Si0N|I3SQ8Rh&@`Dv;5jm+OBFXTIt}Z4t>rY90I13sl)BSdl3Vl_{*r- zcy{D_1w*1JB$$p1WUG8g9Zg$UUkgH%#6O2TdIi(}a$d$_AT(~edhiD73mWoa^zc}& z%EQW)10rIHw8w5+JeX4Tl_)G2JdR;9M?o(+yG? z8f_a|vxrlND`o&#PNdo$18+4_@z(l#EV(II`k-Y!ooqc&4oIAbGse*qfb)2y=AOC( z>{LrN!3)DOevoI0A8jU>SE#QQT+fk81W$hWd&(V}ux}bW^y)_VwFbJgA#^7ex;cH| z%YJInrYU*#Vn#`bsE2qf*dG7vuNhxM8m|_@N7);F!%YZ@Vb#8QoR{93nIu{?$xMW~ z3@txe1!uFeD1D~_DsZdqJL0p&g0Mfbfpx0_oG%*nFGLKh4~B8B`s;l=ZY|Jcw23PVgZD+a1Z#daSw+xtpL?{M1g2;Y>A0Q zbAE!HnY*+xu`Ypc22;}UvfHfp^*Lh|E@G^%ATgzrTdag|gX+a2cTK-5FBN>a?O8?j zZhq1)A6r}O7>7 zKF&nH@a$EiTg7az$MDbI`L=a668scxyG7pLC~Xk3$*Zl4*Y=vqxc~Ltx8mE&xrT2i zt+3?$YTH(4H~-Hx$G*xgf9rVqsMccT>=nYqk3Lte&cNQ>;-vl`MV@W*W-ntsTPqweBs>fg7 z--lGNAKe>5aJj2t0O_i7>s}7(8&)TmlUxhpI-c`&WQ`qkp79iN6;GILwl{B#({itB z67q}-h=jFv>59)#57%8k`^g7pJ{wP~H zM2&2i78>X#V2|koe6};vzF@wS4a5r2u7oA zy{5_KtXa>L$Gz`#0P)dqRPVqR;Vu8zy8Acb*&Ktvg0wjjYLqN8K%Jtu3ss@6n)FT@j63_Gc@#xDpkjV)#al4y11!l4X-UukAQi zwBh3iZVV=mvxD}fgxZqKG0qy|)_tGCjBRNZ#!hq-d(g+DnvC_hTXLg(b;LV{X0Lq= z6`Va`SvwQZdQ};DDINdBo%P=FH_;7L{%up<@reDVU<(su)`pY);K$^V?C%2NO@h_8 z$(o}v{~Ky(f^|+x*4)1`n{j1zIwfWyY|7l~yClm_jNs&6rA_tJk%cvC-0%3phNm6Pwg7Hk61Wnc*2^Y4c2Q`ayyhSt$ zP-17tQd;Rjm-*F~_%9>V}q(O91@) zp?f7-5>+|5waILphrLR39h0M#+Ea<;@+ILF-DTa&gybyADt>!ueg<%SUcE@fxo-~P zvaRYkO{;8shzcE-BHA^{vg{b+&_kg~o5e4gN>+cRWJt$a_i@jSLY(#w+<9#7VZs+;BbcSb6PP4_FY_tBG4*)9Yf4wCy7}X`A7UbF zsR1GjT-S9p*&=Ym&pV=~k*4DQX{omjA7J(XOa1To4})!h`1c0eUI_3(fFIn6Ym}e< z#U1WW+;$s)(*P{ePCTG&wc9Z5&fcD{z-$Ga$WGuvzhrOwlHKXs!wnd0!0_FPb_x*x zHHvrduq~p1fC9sBCt|>xJNz7lyHmFX1`roOU3bEn{zvMsGO{~)yYYag1GHx+LMeZe rC@24o67Ak+n;ZZ!fcD&pe8!)L_`hZ~K!E9hHyJ69IwjNa$CdvBqpl{y literal 0 HcmV?d00001 From c9f417739ad158b50986ac76f7ff459e464b61cf Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Thu, 12 Jun 2025 13:53:15 +0400 Subject: [PATCH 2/2] readExcel fixes --- dataframe-excel/api/dataframe-excel.api | 24 ++++++++--------- .../jetbrains/kotlinx/dataframe/io/xlsx.kt | 26 ++++++++++++++----- .../kotlinx/dataframe/io/XlsxTest.kt | 3 +-- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/dataframe-excel/api/dataframe-excel.api b/dataframe-excel/api/dataframe-excel.api index 23f3097161..72e26e45fa 100644 --- a/dataframe-excel/api/dataframe-excel.api +++ b/dataframe-excel/api/dataframe-excel.api @@ -39,13 +39,13 @@ public final class org/jetbrains/kotlinx/dataframe/io/WorkBookType : java/lang/E public final class org/jetbrains/kotlinx/dataframe/io/XlsxKt { public static final synthetic fun readExcel (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/apache/poi/ss/usermodel/Sheet;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/FormattingOptions;ILjava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readExcel (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/apache/poi/ss/usermodel/Sheet;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/FormattingOptions;ILjava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readExcel (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/apache/poi/ss/usermodel/Sheet;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/FormattingOptions;ILjava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final synthetic fun readExcel (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/apache/poi/ss/usermodel/Workbook;Ljava/lang/String;ILjava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/FormattingOptions;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readExcel (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/apache/poi/ss/usermodel/Workbook;Ljava/lang/String;ILjava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/FormattingOptions;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readExcel (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/apache/poi/ss/usermodel/Workbook;Ljava/lang/String;ILjava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/FormattingOptions;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static synthetic fun readExcel$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/apache/poi/ss/usermodel/Sheet;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/FormattingOptions;ILjava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readExcel$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/apache/poi/ss/usermodel/Sheet;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/FormattingOptions;ILjava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readExcel$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/apache/poi/ss/usermodel/Sheet;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/FormattingOptions;ILjava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static synthetic fun readExcel$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/apache/poi/ss/usermodel/Workbook;Ljava/lang/String;ILjava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/FormattingOptions;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readExcel$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/apache/poi/ss/usermodel/Workbook;Ljava/lang/String;ILjava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/FormattingOptions;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readExcel$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/apache/poi/ss/usermodel/Workbook;Ljava/lang/String;ILjava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/FormattingOptions;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final synthetic fun readExcel-CWg63oo (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final synthetic fun readExcel-CWg63oo (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final synthetic fun readExcel-CWg63oo (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; @@ -54,14 +54,14 @@ public final class org/jetbrains/kotlinx/dataframe/io/XlsxKt { public static synthetic fun readExcel-CWg63oo$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static synthetic fun readExcel-CWg63oo$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static synthetic fun readExcel-CWg63oo$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readExcel-ssqQo1E (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readExcel-ssqQo1E (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readExcel-ssqQo1E (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readExcel-ssqQo1E (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readExcel-ssqQo1E$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readExcel-ssqQo1E$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readExcel-ssqQo1E$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readExcel-ssqQo1E$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readExcel-Q2e6U8A (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readExcel-Q2e6U8A (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readExcel-Q2e6U8A (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readExcel-Q2e6U8A (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readExcel-Q2e6U8A$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readExcel-Q2e6U8A$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readExcel-Q2e6U8A$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readExcel-Q2e6U8A$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/Integer;Lorg/jetbrains/kotlinx/dataframe/io/NameRepairStrategy;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun toFormattingOptions-lk1XfQA (Ljava/lang/String;Lorg/apache/poi/ss/usermodel/DataFormatter;)Lorg/jetbrains/kotlinx/dataframe/io/FormattingOptions; public static synthetic fun toFormattingOptions-lk1XfQA$default (Ljava/lang/String;Lorg/apache/poi/ss/usermodel/DataFormatter;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/FormattingOptions; public static final fun writeExcel (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/io/File;Lkotlin/jvm/functions/Function2;Ljava/lang/String;ZLorg/jetbrains/kotlinx/dataframe/io/WorkBookType;Z)V diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt index c266512755..2e38e0d379 100644 --- a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt +++ b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt @@ -124,7 +124,7 @@ public fun DataFrame.Companion.readExcel( rowsCount, nameRepairStrategy, firstRowIsHeader, - parseEmptyAsNull + parseEmptyAsNull, ) } } @@ -169,7 +169,7 @@ public fun DataFrame.Companion.readExcel( rowsCount, nameRepairStrategy, firstRowIsHeader, - parseEmptyAsNull + parseEmptyAsNull, ) } } @@ -210,7 +210,7 @@ public fun DataFrame.Companion.readExcel( rowsCount, nameRepairStrategy, firstRowIsHeader, - parseEmptyAsNull + parseEmptyAsNull, ) /** @@ -252,7 +252,7 @@ public fun DataFrame.Companion.readExcel( rowsCount, nameRepairStrategy, firstRowIsHeader, - parseEmptyAsNull + parseEmptyAsNull, ) } } @@ -288,7 +288,16 @@ public fun DataFrame.Companion.readExcel( val sheet: Sheet = sheetName ?.let { wb.getSheet(it) ?: error("Sheet with name $sheetName not found") } ?: wb.getSheetAt(0) - return readExcel(sheet, columns, formattingOptions, skipRows, rowsCount, nameRepairStrategy, firstRowIsHeader, parseEmptyAsNull) + return readExcel( + sheet, + columns, + formattingOptions, + skipRows, + rowsCount, + nameRepairStrategy, + firstRowIsHeader, + parseEmptyAsNull, + ) } /** @@ -393,8 +402,11 @@ public fun DataFrame.Companion.readExcel( } else { cell.cellValue(sheet.sheetName) } - if (parseEmptyAsNull && rawValue is String && rawValue.isEmpty()) null - else rawValue + if (parseEmptyAsNull && rawValue is String && rawValue.isEmpty()) { + null + } else { + rawValue + } } } val values: List = valueRowsRange.map { diff --git a/dataframe-excel/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/XlsxTest.kt b/dataframe-excel/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/XlsxTest.kt index bb03e37e21..95571f6a00 100644 --- a/dataframe-excel/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/XlsxTest.kt +++ b/dataframe-excel/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/XlsxTest.kt @@ -56,9 +56,8 @@ class XlsxTest { "Sheet1", columns = "A:C", stringColumns = StringColumns("A:C"), - parseEmptyAsNull = false, ) - df shouldBe dataFrameOf("col1", "col2", "C")("1", "", "3") + df shouldBe dataFrameOf("col1", "col2", "C")("1", null, "3") } @Test