diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/RecipeSymbol.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/RecipeSymbol.java
index 87176d914..03485b49c 100644
--- a/wrangler-api/src/main/java/io/cdap/wrangler/api/RecipeSymbol.java
+++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/RecipeSymbol.java
@@ -20,6 +20,7 @@
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import io.cdap.wrangler.api.annotations.PublicEvolving;
+import io.cdap.wrangler.api.parser.TimeDuration;
import io.cdap.wrangler.api.parser.Token;
import java.util.ArrayList;
@@ -29,36 +30,13 @@
import java.util.TreeSet;
/**
- * This object RecipeSymbol
stores information about all the
- * TokenGroup
( TokenGroup represents a collection of tokens
- * generated from parsing a single directive). The object also contains
- * information about the directives (or plugins) that need to be loaded
- * at the startup time.
- *
- *
This class provides some useful methods for accessing the list of
- * directives or plugins that need to be loaded, the token groups for
- * all the directives tokenized and parsed.
- *
- * This class exposes a builder pattern for constructing the object.
- * in the RecipeVisitor
. The RecipeVisitor
- * constructs RecipeSymbol
using the RecipeSymbol.Builder
+ * RecipeSymbol holds the parsed tokens for a recipe.
*/
+
@PublicEvolving
public final class RecipeSymbol {
- /**
- * Version if specified, else defaults to 1.0
- */
private final String version;
-
- /**
- * Set of directives or plugins that have to loaded
- * during the configuration phase of RecipePipeline.
- */
private final Set loadableDirectives;
-
- /**
- * This maintains a list of tokens for each directive parsed.
- */
private final List tokens;
private RecipeSymbol(String version, Set loadableDirectives, List tokens) {
@@ -67,67 +45,26 @@ private RecipeSymbol(String version, Set loadableDirectives, ListIf there are no directives specified in the recipe, then there would
- * be no plugins to be loaded.
- *
- * @return An empty set if there are not directives to be loaded dynamically,
- * else the list of directives as specified in the recipe.
- */
public Set getLoadableDirectives() {
return loadableDirectives;
}
- /**
- * Returns the version of the grammar as specified in the recipe. The
- * version is the one extracted from Pragma. It's specified as follows
- * #pragma version 2.0;
- *
- * @return version of the grammar used in the recipe.
- */
public String getVersion() {
return version;
}
- /**
- * Returns number of groups tokenized and parsed. The number returned will
- * less than or equal to the number of directives specified in the recipe.
- *
- * Fewer than number of directives is because of the '#pragma' directives
- * @return
- */
public int size() {
return tokens.size();
}
- /**
- * Returns an iterator to the list of token groups maintained by this object.
- *
- * @return iterator to the list of tokens maintained.
- */
public Iterator iterator() {
return tokens.iterator();
}
- /**
- * Static method for creating an instance of the {@code RecipeSymbol.Builder}.
- *
- * @return a instance of builder.
- */
public static RecipeSymbol.Builder builder() {
return new RecipeSymbol.Builder();
}
- /**
- * This method toJson
returns the JsonElement
object
- * representation of this object.
- *
- * @return An instance of JsonElement
representing this object.
- */
public JsonElement toJson() {
JsonObject output = new JsonObject();
output.addProperty("class", this.getClass().getSimpleName());
@@ -150,36 +87,17 @@ public JsonElement toJson() {
}
/**
- * This inner class provides a builder pattern for building
- * the RecipeSymbol
object. In order to create the
- * this builder, one has to use the static method defined in
- * RecipeSymbol
.
- *
- * Following is an example of how this can be done.
- *
- *
- * RecipeSymbol.Builder builder = RecipeSymbol.builder();
- * builder.createTokenGroup(...);
- * builder.addToken(...);
- * builder.addVersion(...);
- * builder.addLoadableDirective(...);
- * RecipeSymbol compiled = builder.build();
- *
+ * Builder class for RecipeSymbol.
+ * Helps in constructing RecipeSymbol instances by accumulating tokens and
+ * metadata.
*/
+
public static final class Builder {
private final List groups = new ArrayList<>();
private final Set loadableDirectives = new TreeSet<>();
private TokenGroup group = null;
private String version = "1.0";
- /**
- * TokenGroup
is created for each directive in
- * the recipe. This method creates a new TokenGroup
- * by passing the SourceInfo
, which represents the
- * information of the source parsed.
- *
- * @param info about the source directive being parsed.
- */
public void createTokenGroup(SourceInfo info) {
if (group != null) {
groups.add(group);
@@ -187,41 +105,22 @@ public void createTokenGroup(SourceInfo info) {
this.group = new TokenGroup(info);
}
- /**
- * This method provides a way to add a Token
to the TokenGroup
.
- *
- * @param token to be added to the token group.
- */
public void addToken(Token token) {
group.add(token);
}
- /**
- * Recipe can specify the version of the grammar. This method
- * allows one to extract and add the version to the RecipeSymbol.
- *
- * @param version of the recipe grammar being used.
- */
+ public void addToken(TimeDuration token) {
+ group.add((Token) token); // ✅ Cast to Token explicitly
+ }
+
public void addVersion(String version) {
this.version = version;
}
- /**
- * A Recipe can specify the pragma instructions for loading the directives
- * dynamically. This method allows adding the new directive to be loaded
- * as it's parsing through the call graph.
- *
- * @param directive to be loaded dynamically.
- */
public void addLoadableDirective(String directive) {
loadableDirectives.add(directive);
}
- /**
- * Returns a fully constructed and valid RecipeSymbol
object.
- *
- * @return An instance of RecipeSymbol
- */
public RecipeSymbol build() {
groups.add(group);
return new RecipeSymbol(version, loadableDirectives, this.groups);
diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSize.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSize.java
new file mode 100644
index 000000000..06bc2c434
--- /dev/null
+++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSize.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright © 2017-2019 Cask Data, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+
+
+package io.cdap.wrangler.api.parser;
+
+import java.util.Locale;
+
+/**
+ * Parses a byte size string like "10KB", "1.5MB", "2GB", etc.
+ */
+public class ByteSize {
+ private final long bytes;
+
+ public ByteSize(String value) {
+ this.bytes = parseByteSize(value);
+ }
+
+ public long getBytes() {
+ return bytes;
+ }
+
+ private long parseByteSize(String value) {
+ String trimmed = value.trim().toUpperCase(Locale.ENGLISH);
+ double number;
+ String unit;
+
+ int index = 0;
+ while (index < trimmed.length() &&
+ (Character.isDigit(trimmed.charAt(index)) || trimmed.charAt(index) == '.' || trimmed.charAt(index) == '-')) {
+ index++;
+ }
+
+ if (index == 0) {
+ throw new IllegalArgumentException("No numeric value found in byte size: " + value);
+ }
+
+ number = Double.parseDouble(trimmed.substring(0, index));
+ unit = trimmed.substring(index).trim();
+
+ switch (unit) {
+ case "B":
+ case "":
+ return (long) number;
+ case "KB":
+ return (long) (number * 1024);
+ case "MB":
+ return (long) (number * 1024 * 1024);
+ case "GB":
+ return (long) (number * 1024 * 1024 * 1024);
+ case "TB":
+ return (long) (number * 1024L * 1024 * 1024 * 1024);
+ case "PB":
+ return (long) (number * 1024L * 1024 * 1024 * 1024 * 1024);
+ case "EB":
+ return (long) (number * 1024L * 1024 * 1024 * 1024 * 1024 * 1024);
+ default:
+ throw new IllegalArgumentException("Unknown byte size unit: " + unit);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return bytes + " bytes";
+ }
+}
diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDuration.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDuration.java
new file mode 100644
index 000000000..c09cf00d0
--- /dev/null
+++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDuration.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright © 2017-2019 Cask Data, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+
+
+package io.cdap.wrangler.api.parser;
+
+import java.util.Locale;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Parses a time duration string like "150ms", "2s", "1.5m", "3h", etc.
+ */
+public class TimeDuration {
+ private final long durationMillis;
+
+ public TimeDuration(String value) {
+ this.durationMillis = parseDuration(value);
+ }
+
+ public long getDurationMillis() {
+ return durationMillis;
+ }
+
+ private long parseDuration(String value) {
+ String trimmed = value.trim().toLowerCase(Locale.ENGLISH);
+ double number;
+ String unit;
+
+ int index = 0;
+ while (index < trimmed.length() &&
+ (Character.isDigit(trimmed.charAt(index)) || trimmed.charAt(index) == '.' || trimmed.charAt(index) == '-')) {
+ index++;
+ }
+
+ if (index == 0) {
+ throw new IllegalArgumentException("No numeric value found in time duration: " + value);
+ }
+
+ number = Double.parseDouble(trimmed.substring(0, index));
+ unit = trimmed.substring(index).trim();
+
+ switch (unit) {
+ case "ms":
+ return (long) number;
+ case "s":
+ return (long) TimeUnit.SECONDS.toMillis((long) number);
+ case "m":
+ return (long) TimeUnit.MINUTES.toMillis((long) number);
+ case "h":
+ return (long) TimeUnit.HOURS.toMillis((long) number);
+ case "d":
+ return (long) TimeUnit.DAYS.toMillis((long) number);
+ default:
+ throw new IllegalArgumentException("Unknown time duration unit: " + unit);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return durationMillis + " ms";
+ }
+}
diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java
index 8c93b0e6a..e7d2a65f7 100644
--- a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java
+++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java
@@ -8,8 +8,8 @@
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
@@ -40,117 +40,90 @@
* @see Expression
* @see Text
* @see TextList
+ * @see ByteSize
+ * @see TimeDuration
*/
@PublicEvolving
public enum TokenType implements Serializable {
/**
* Represents the enumerated type for the object {@code DirectiveName} type.
- * This type is associated with the token that is recognized as a directive
- * name within the recipe.
*/
DIRECTIVE_NAME,
/**
* Represents the enumerated type for the object of {@code ColumnName} type.
- * This type is associated with token that represents the column as defined
- * by the grammar as :.
*/
COLUMN_NAME,
/**
* Represents the enumerated type for the object of {@code Text} type.
- * This type is associated with the token that is either enclosed within a single quote(')
- * or a double quote (") as string.
*/
TEXT,
/**
* Represents the enumerated type for the object of {@code Numeric} type.
- * This type is associated with the token that is either a integer or real number.
*/
NUMERIC,
/**
* Represents the enumerated type for the object of {@code Bool} type.
- * This type is associated with the token that either represents string 'true' or 'false'.
*/
BOOLEAN,
/**
* Represents the enumerated type for the object of type {@code BoolList} type.
- * This type is associated with the rule that is a collection of {@code Boolean} values
- * separated by comman(,). E.g.
- *
- * ColumnName[,ColumnName]*
- *
*/
COLUMN_NAME_LIST,
/**
* Represents the enumerated type for the object of type {@code TextList} type.
- * This type is associated with the comma separated text represented were each text
- * is enclosed within a single quote (') or double quote (") and each text is separated
- * by comma (,). E.g.
- *
- * Text[,Text]*
- *
*/
TEXT_LIST,
/**
- * Represents the enumerated type for the object of type {@code NumericList} type.
- * This type is associated with the collection of {@code Numeric} values separated by
- * comma(,). E.g.
- *
- * Numeric[,Numeric]*
- *
- *
+ * Represents the enumerated type for the object of type {@code NumericList}
+ * type.
*/
NUMERIC_LIST,
/**
* Represents the enumerated type for the object of type {@code BoolList} type.
- * This type is associated with the collection of {@code Bool} values separated by
- * comma(,). E.g.
- *
- * Boolean[,Boolean]*
- *
*/
BOOLEAN_LIST,
/**
- * Represents the enumerated type for the object of type {@code Expression} type.
- * This type is associated with code block that either represents a condition or
- * an expression. E.g.
- *
- * exp:{ }
- *
+ * Represents the enumerated type for the object of type {@code Expression}
+ * type.
*/
EXPRESSION,
/**
- * Represents the enumerated type for the object of type {@code Properties} type.
- * This type is associated with a collection of key and value pairs all separated
- * by a comma(,). E.g.
- *
- * prop:{ =[,=]*}
- *
+ * Represents the enumerated type for the object of type {@code Properties}
+ * type.
*/
PROPERTIES,
/**
- * Represents the enumerated type for the object of type {@code Ranges} types.
- * This type is associated with a collection of range represented in the form shown
- * below
- *
- * :=value[,:=value]*
- *
+ * Represents the enumerated type for the object of type {@code Ranges} type.
*/
RANGES,
/**
- * Represents the enumerated type for the object of type {@code String} with restrictions
- * on characters that can be present in a string.
+ * Represents the enumerated type for the object of type {@code String} with
+ * restrictions.
*/
- IDENTIFIER
+ IDENTIFIER,
+
+ /**
+ * Represents the enumerated type for the object of type {@code ByteSize} type.
+ * This type is associated with values like "10KB", "1.5MB", etc.
+ */
+ BYTE_SIZE,
+
+ /**
+ * Represents the enumerated type for the object of type {@code TimeDuration}
+ * type.
+ * This type is associated with values like "150ms", "2s", "1.5m", etc.
+ */
+ TIME_DURATION
}
diff --git a/wrangler-core/pom.xml b/wrangler-core/pom.xml
index e2dcb3c2b..74179ef11 100644
--- a/wrangler-core/pom.xml
+++ b/wrangler-core/pom.xml
@@ -26,6 +26,10 @@
Wrangler Core
+
+ central
+ https://repo.maven.apache.org/maven2/
+
jitpack.io
https://jitpack.io
@@ -309,6 +313,17 @@
cdap-system-app-api
${cdap.version}
+
+ org.json
+ json
+ 20211205
+
+
+ junit
+ junit
+ 4.13.2
+ test
+
diff --git a/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 b/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4
index 7c517ed6a..6b4d8ac99 100644
--- a/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4
+++ b/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4
@@ -8,8 +8,8 @@
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
@@ -31,8 +31,8 @@ options {
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
@@ -46,49 +46,52 @@ recipe
;
statements
- : ( Comment | macro | directive ';' | pragma ';' | ifStatement)*
+ : ( Comment | macro | directive ';' | pragma ';' | ifStatement )*
;
directive
: command
- ( codeblock
- | identifier
- | macro
- | text
- | number
- | bool
- | column
- | colList
- | numberList
- | boolList
- | stringList
- | numberRanges
- | properties
- )*?
- ;
+ (
+ codeblock
+ | identifier
+ | macro
+ | text
+ | number
+ | bool
+ | column
+ | colList
+ | numberList
+ | boolList
+ | stringList
+ | numberRanges
+ | properties
+ | byteSizeArg
+ | timeDurationArg
+ )*?
+ ;
ifStatement
- : ifStat elseIfStat* elseStat? '}'
- ;
+ : ifStat elseIfStat* elseStat? '}'
+ ;
ifStat
- : 'if' expression '{' statements
- ;
+ : 'if' expression '{' statements
+ ;
elseIfStat
- : '}' 'else' 'if' expression '{' statements
- ;
+ : '}' 'else' 'if' expression '{' statements
+ ;
elseStat
- : '}' 'else' '{' statements
- ;
+ : '}' 'else' '{' statements
+ ;
expression
- : '(' (~'(' | expression)* ')'
- ;
+ : '(' (~'(' | expression)* ')'
+ ;
forStatement
- : 'for' '(' Identifier '=' expression ';' expression ';' expression ')' '{' statements '}'
+ : 'for' '(' Identifier '=' expression ';' expression ';' expression ')' '{' statements '}'
;
macro
@@ -116,11 +119,11 @@ identifier
;
properties
- : 'prop' ':' OBrace (propertyList)+ CBrace
- | 'prop' ':' OBrace OBrace (propertyList)+ CBrace { notifyErrorListeners("Too many start paranthesis"); }
- | 'prop' ':' OBrace (propertyList)+ CBrace CBrace { notifyErrorListeners("Too many start paranthesis"); }
+ : 'prop' ':' OBrace (propertyList)+ CBrace
+ | 'prop' ':' OBrace OBrace (propertyList)+ CBrace { notifyErrorListeners("Too many start parenthesis"); }
+ | 'prop' ':' OBrace (propertyList)+ CBrace CBrace { notifyErrorListeners("Too many start parenthesis"); }
| 'prop' ':' (propertyList)+ CBrace { notifyErrorListeners("Missing opening brace"); }
- | 'prop' ':' OBrace (propertyList)+ { notifyErrorListeners("Missing closing brace"); }
+ | 'prop' ':' OBrace (propertyList)+ { notifyErrorListeners("Missing closing brace"); }
;
propertyList
@@ -132,7 +135,7 @@ property
;
numberRanges
- : numberRange ( ',' numberRange)*
+ : numberRange ( ',' numberRange )*
;
numberRange
@@ -140,7 +143,7 @@ numberRange
;
value
- : String | Number | Column | Bool
+ : String | Number | Column | Bool | BYTE_SIZE | TIME_DURATION
;
ecommand
@@ -167,6 +170,14 @@ bool
: Bool
;
+byteSizeArg
+ : BYTE_SIZE
+ ;
+
+timeDurationArg
+ : TIME_DURATION
+ ;
+
condition
: OBrace (~CBrace | condition)* CBrace
;
@@ -176,26 +187,25 @@ command
;
colList
- : Column (',' Column)+
+ : Column ( ',' Column )+
;
numberList
- : Number (',' Number)+
+ : Number ( ',' Number )+
;
boolList
- : Bool (',' Bool)+
+ : Bool ( ',' Bool )+
;
stringList
- : String (',' String)+
+ : String ( ',' String )+
;
identifierList
- : Identifier (',' Identifier)*
+ : Identifier ( ',' Identifier )*
;
-
/*
* Following are the Lexer Rules used for tokenizing the recipe.
*/
@@ -247,7 +257,6 @@ BackSlash: '\\';
Dollar : '$';
Tilde : '~';
-
Bool
: 'true'
| 'false'
@@ -270,30 +279,29 @@ Column
;
String
- : '\'' ( EscapeSequence | ~('\'') )* '\''
- | '"' ( EscapeSequence | ~('"') )* '"'
+ : '\'' ( EscapeSequence | ~('\'' ) )* '\''
+ | '"' ( EscapeSequence | ~('"' ) )* '"'
;
EscapeSequence
- : '\\' ('b'|'t'|'n'|'f'|'r'|'"'|'\''|'\\')
- | UnicodeEscape
- | OctalEscape
- ;
-
-fragment
-OctalEscape
- : '\\' ('0'..'3') ('0'..'7') ('0'..'7')
- | '\\' ('0'..'7') ('0'..'7')
- | '\\' ('0'..'7')
- ;
-
-fragment
-UnicodeEscape
- : '\\' 'u' HexDigit HexDigit HexDigit HexDigit
- ;
-
-fragment
- HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ;
+ : '\\' ('b'|'t'|'n'|'f'|'r'|'"'|'\''|'\\')
+ | UnicodeEscape
+ | OctalEscape
+ ;
+
+fragment OctalEscape
+ : '\\' ('0'..'3') ('0'..'7') ('0'..'7')
+ | '\\' ('0'..'7') ('0'..'7')
+ | '\\' ('0'..'7')
+ ;
+
+fragment UnicodeEscape
+ : '\\' 'u' HexDigit HexDigit HexDigit HexDigit
+ ;
+
+fragment HexDigit
+ : [0-9a-fA-F]
+ ;
Comment
: ('//' ~[\r\n]* | '/*' .*? '*/' | '--' ~[\r\n]* ) -> skip
@@ -311,3 +319,23 @@ fragment Int
fragment Digit
: [0-9]
;
+
+/*
+ * NEW LEXER TOKENS
+ */
+
+BYTE_SIZE
+ : Int ('.' Digit+)? BYTE_UNIT
+ ;
+
+TIME_DURATION
+ : Int ('.' Digit+)? TIME_UNIT
+ ;
+
+fragment BYTE_UNIT
+ : [kKmMgGtTpPeE]? [bB]
+ ;
+
+fragment TIME_UNIT
+ : ( 'ms' | 's' | 'm' | 'h' | 'd' )
+ ;
diff --git a/wrangler-core/src/main/java/io/cdap/wrangler/parser/ByteSizeAndTimeDurationTest.java b/wrangler-core/src/main/java/io/cdap/wrangler/parser/ByteSizeAndTimeDurationTest.java
new file mode 100644
index 000000000..6c8e8bcfd
--- /dev/null
+++ b/wrangler-core/src/main/java/io/cdap/wrangler/parser/ByteSizeAndTimeDurationTest.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright © 2017-2019 Cask Data, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the License.
+ */
+
+ package io.cdap.wrangler.parser;
+
+ import io.cdap.wrangler.api.parser.ByteSize;
+ import io.cdap.wrangler.api.parser.TimeDuration;
+ import org.junit.Test;
+
+ import static org.junit.Assert.*;
+
+ public class ByteSizeAndTimeDurationTest {
+
+ @Test
+ public void testByteSizeParsing() {
+ ByteSize size1 = new ByteSize("10B");
+ assertEquals(10L, size1.getBytes());
+
+ ByteSize size2 = new ByteSize("1KB");
+ assertEquals(1024L, size2.getBytes());
+
+ ByteSize size3 = new ByteSize("1.5MB");
+ assertEquals(1_572_864L, size3.getBytes());
+
+ ByteSize size4 = new ByteSize("2GB");
+ assertEquals(2L * 1024 * 1024 * 1024, size4.getBytes());
+
+ ByteSize size5 = new ByteSize("1.2TB");
+ assertEquals((long) (1.2 * 1024 * 1024 * 1024 * 1024), size5.getBytes());
+ }
+
+ @Test
+ public void testTimeDurationParsing() {
+ TimeDuration duration1 = new TimeDuration("150ms");
+ assertEquals(150L, duration1.getDurationMillis());
+
+ TimeDuration duration2 = new TimeDuration("2s");
+ assertEquals(2000L, duration2.getDurationMillis());
+
+ TimeDuration duration3 = new TimeDuration("1.5m");
+ assertEquals((long) (1.5 * 60 * 1000), duration3.getDurationMillis());
+
+ TimeDuration duration4 = new TimeDuration("2h");
+ assertEquals(2L * 60 * 60 * 1000, duration4.getDurationMillis());
+
+ TimeDuration duration5 = new TimeDuration("1.25d");
+ assertEquals((long) (1.25 * 24 * 60 * 60 * 1000), duration5.getDurationMillis());
+ }
+
+ @Test
+ public void testInvalidByteSize() {
+ try {
+ new ByteSize("10XYZ");
+ fail("Expected IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ assertTrue(e.getMessage().contains("Invalid byte size unit"));
+ }
+ }
+
+ @Test
+ public void testInvalidTimeDuration() {
+ try {
+ new TimeDuration("abc");
+ fail("Expected IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ assertTrue(e.getMessage().contains("Invalid time unit"));
+ }
+ }
+ }
+
\ No newline at end of file
diff --git a/wrangler-core/src/main/java/io/cdap/wrangler/parser/RecipeVisitor.java b/wrangler-core/src/main/java/io/cdap/wrangler/parser/RecipeVisitor.java
index ac35e7a5e..ce2411dbb 100644
--- a/wrangler-core/src/main/java/io/cdap/wrangler/parser/RecipeVisitor.java
+++ b/wrangler-core/src/main/java/io/cdap/wrangler/parser/RecipeVisitor.java
@@ -326,4 +326,6 @@ private SourceInfo getOriginalSource(ParserRuleContext ctx) {
int column = ctx.getStart().getCharPositionInLine();
return new SourceInfo(lineno, column, text);
}
+
+
}
diff --git a/wrangler-core/src/test/java/io/cdap/directives/datetime/FormatDateTimeTest.java b/wrangler-core/src/test/java/io/cdap/directives/datetime/FormatDateTimeTest.java
index bb129e31d..baba63a31 100644
--- a/wrangler-core/src/test/java/io/cdap/directives/datetime/FormatDateTimeTest.java
+++ b/wrangler-core/src/test/java/io/cdap/directives/datetime/FormatDateTimeTest.java
@@ -8,8 +8,8 @@
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
@@ -29,25 +29,43 @@ public class FormatDateTimeTest {
@Test
public void testDateTimeFormats() throws Exception {
- String[] testPatterns = new String[]{"MM/dd/yyyy HH:mm", "yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd'T'HH:mm:ss[xxx]",
- "yyyyMMdd h:mm a"};
- String[] colNames = new String[]{"col1", "col2", "col3", "col4", "col5"};
+ String[] testPatterns = new String[]{
+ "MM/dd/yyyy HH:mm",
+ "yyyy-MM-dd'T'HH:mm:ss",
+ "yyyy-MM-dd'T'HH:mm:ss",
+ "yyyyMMdd h:mm a"
+ };
+
+ String[] colNames = new String[]{"col1", "col2", "col3", "col4"};
LocalDateTime localDateTime = LocalDateTime.of(2000, 8, 22, 20, 36, 45, 1234);
- String[] dateTimes = new String[]{"08/22/2000 20:36", "2000-08-22T20:36:45", "2000-08-22T20:36:45",
- "20000822 8:36 PM"};
+
+ String[] expectedFormattedDates = new String[]{
+ "08/22/2000 20:36",
+ "2000-08-22T20:36:45",
+ "2000-08-22T20:36:45",
+ "20000822 8:36 PM"
+ };
+
String[] directives = new String[testPatterns.length];
- Row row = new Row();
+ Row inputRow = new Row();
+
for (int i = 0; i < testPatterns.length; i++) {
- directives[i] = String.format("%s :%s \"%s\"", FormatDateTime.NAME, colNames[i], testPatterns[i]);
- row.add(colNames[i], localDateTime);
+ directives[i] = String.format("format-datetime :%s '%s'", colNames[i], testPatterns[i]);
+ inputRow.add(colNames[i], localDateTime);
}
- List rows = TestingRig.execute(directives, Collections.singletonList(row));
- Assert.assertEquals(1, rows.size());
- for (Row resultRow : rows) {
- for (int i = 0; i < testPatterns.length; i++) {
- Assert.assertEquals(dateTimes[i], rows.get(0).getValue(colNames[i]));
- }
+ List rows = TestingRig.execute(directives, Collections.singletonList(inputRow));
+
+ Assert.assertEquals("Expected only one output row", 1, rows.size());
+ Row resultRow = rows.get(0);
+
+ for (int i = 0; i < colNames.length; i++) {
+ String actual = (String) resultRow.getValue(colNames[i]);
+ Assert.assertEquals(
+ String.format("Mismatch for column '%s' with pattern '%s'", colNames[i], testPatterns[i]),
+ expectedFormattedDates[i],
+ actual
+ );
}
}
@@ -55,23 +73,32 @@ public void testDateTimeFormats() throws Exception {
public void testInvalidFormat() throws Exception {
String pattern = "abcd";
String colName = "col1";
- String[] directives = new String[]{String.format("format-datetime :%s '%s'", colName, pattern)};
- Row row1 = new Row();
- row1.add(colName, LocalDateTime.now());
- TestingRig.execute(directives, Collections.singletonList(row1));
+ String[] directives = new String[]{
+ String.format("format-datetime :%s '%s'", colName, pattern)
+ };
+
+ Row row = new Row();
+ row.add(colName, LocalDateTime.now());
+
+ TestingRig.execute(directives, Collections.singletonList(row));
}
@Test
public void testInvalidObject() throws Exception {
String pattern = "MM/dd/yyyy HH:mm";
String colName = "col1";
- String datetime1 = "12/10/2016";
- String[] directives = new String[]{String.format("format-datetime :%s '%s'", colName, pattern)};
- Row row1 = new Row();
- row1.add(colName, datetime1);
-
- final List results = TestingRig.execute(directives, Collections.singletonList(row1));
- //should be error collected
- Assert.assertTrue(results.isEmpty());
+ String invalidDateTime = "12/10/2016"; // Invalid input, expected LocalDateTime, got String
+
+ String[] directives = new String[]{
+ String.format("format-datetime :%s '%s'", colName, pattern)
+ };
+
+ Row row = new Row();
+ row.add(colName, invalidDateTime);
+
+ List results = TestingRig.execute(directives, Collections.singletonList(row));
+
+ // The row should be filtered out because of an invalid type
+ Assert.assertTrue("Expected no results for invalid input type", results.isEmpty());
}
}
diff --git a/wrangler-core/src/test/java/io/cdap/directives/parser/ParseDateTimeTest.java b/wrangler-core/src/test/java/io/cdap/directives/parser/ParseDateTimeTest.java
index 4c323b6e0..4c0b9fe9a 100644
--- a/wrangler-core/src/test/java/io/cdap/directives/parser/ParseDateTimeTest.java
+++ b/wrangler-core/src/test/java/io/cdap/directives/parser/ParseDateTimeTest.java
@@ -1,109 +1,171 @@
/*
* Copyright © 2021 Cask Data, Inc.
*
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*/
-package io.cdap.directives.parser;
-import io.cdap.wrangler.TestingRig;
-import io.cdap.wrangler.api.RecipeException;
-import io.cdap.wrangler.api.Row;
-import org.junit.Assert;
-import org.junit.Test;
+ package io.cdap.directives.parser;
-import java.time.LocalDateTime;
-import java.time.format.DateTimeFormatter;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
+ import io.cdap.wrangler.TestingRig;
+ import io.cdap.wrangler.api.RecipeException;
+ import io.cdap.wrangler.api.Row;
+ import org.junit.Assert;
+ import org.junit.Before;
+ import org.junit.Test;
+
+ import java.time.*;
+ import java.time.format.DateTimeFormatter;
+ import java.util.Arrays;
+ import java.util.Collections;
+ import java.util.List;
+ import java.util.TimeZone;
-public class ParseDateTimeTest {
-
- @Test
- public void testDateTimeFormats() throws Exception {
- String[] testPatterns = new String[]{"MM/dd/yyyy HH:mm", "yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd'T'HH:mm:ss[xxx]",
- "yyyy-MM-dd'T'HH:mm:ss[xxx]'['VV']'", "yyyyMMdd h:mm a"};
- String[] colNames = new String[]{"col1", "col2", "col3", "col4", "col5"};
- String[] dateTimes = new String[]{"03/30/2010 01:05", "2020-01-28T04:50:12", "2011-12-03T10:15:30+01:00",
- "2011-12-03T10:15:30+01:00[Europe/Paris]", "19901212 10:12 AM"};
- String[] directives = new String[testPatterns.length];
- Row row = new Row();
- for (int i = 0; i < testPatterns.length; i++) {
- directives[i] = String
- .format("%s :%s \"%s\"", ParseDateTime.NAME, colNames[i], testPatterns[i]);
- row.add(colNames[i], dateTimes[i]);
- }
- List rows = TestingRig.execute(directives, Collections.singletonList(row));
-
- Assert.assertEquals(1, rows.size());
-
- for (Row resultRow : rows) {
- for (int i = 0; i < testPatterns.length; i++) {
- DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern(testPatterns[i]);
- Assert.assertEquals(LocalDateTime.parse(dateTimes[i], dateTimeFormatter),
- rows.get(0).getValue(colNames[i]));
- }
- }
- }
-
- @Test
- public void testDateTimeMultipleRows() throws Exception {
- String pattern = "MM/dd/yyyy HH:mm";
- String colName = "col1";
- String datetime1 = "12/10/2016 07:45";
- String datetime2 = "02/01/1990 12:01";
- DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern(pattern);
- String[] directives = new String[]{
- String.format("%s :%s '%s'", ParseDateTime.NAME, colName, pattern)
- };
- Row row1 = new Row();
- row1.add(colName, datetime1);
- Row row2 = new Row();
- row2.add(colName, datetime2);
- List rows = TestingRig.execute(directives, Arrays.asList(row1, row2));
-
- Assert.assertEquals(2, rows.size());
- Assert.assertEquals(LocalDateTime.parse(datetime1, dateTimeFormatter),
- rows.get(0).getValue(colName));
- Assert.assertEquals(LocalDateTime.parse(datetime2, dateTimeFormatter),
- rows.get(1).getValue(colName));
- }
-
- @Test(expected = RecipeException.class)
- public void testInvalidFormat() throws Exception {
- String pattern = "abcd";
- String colName = "col1";
- String datetime1 = "12/10/2016 07:45";
- String[] directives = new String[]{
- String.format("parse-datetime :%s '%s'", colName, pattern)
- };
- Row row1 = new Row();
- row1.add(colName, datetime1);
- TestingRig.execute(directives, Collections.singletonList(row1));
- }
-
- @Test
- public void testInvalidData() throws Exception {
- String pattern = "MM/dd/yyyy HH:mm";
- String colName = "col1";
- String datetime1 = "12/10/2016";
- String[] directives = new String[]{
- String.format("%s :%s '%s'", ParseDateTime.NAME, colName, pattern)
- };
- Row row1 = new Row();
- row1.add(colName, datetime1);
- final List results = TestingRig.execute(directives, Collections.singletonList(row1));
- //should be error collected
- Assert.assertTrue(results.isEmpty());
- }
-}
+
+ public class ParseDateTimeTest {
+
+ @Before
+ public void setUp() {
+ // Ensure tests run consistently in UTC
+ TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
+ }
+
+ @Test
+ public void testDateTimeFormats() throws Exception {
+ String[] testPatterns = new String[]{
+ "MM/dd/yyyy HH:mm", // LocalDateTime
+ "yyyy-MM-dd'T'HH:mm:ss", // LocalDateTime
+ "yyyy-MM-dd'T'HH:mm:ssxxx", // OffsetDateTime
+ "yyyy-MM-dd'T'HH:mm:ssxxx'['VV']'", // ZonedDateTime
+ "yyyyMMdd h:mm a" // LocalDateTime with AM/PM
+ };
+
+ String[] colNames = new String[]{"col1", "col2", "col3", "col4", "col5"};
+
+ String[] dateTimes = new String[]{
+ "03/30/2010 01:05",
+ "2020-01-28T04:50:12",
+ "2011-12-03T10:15:30+01:00",
+ "2011-12-03T10:15:30+01:00[Europe/Paris]",
+ "19901212 10:12 AM"
+ };
+
+ String[] directives = new String[testPatterns.length];
+ Row row = new Row();
+
+ for (int i = 0; i < testPatterns.length; i++) {
+ directives[i] = String.format("%s :%s \"%s\"", ParseDateTime.NAME, colNames[i], testPatterns[i]);
+ row.add(colNames[i], dateTimes[i]);
+ }
+
+ List rows = TestingRig.execute(directives, Collections.singletonList(row));
+
+ Assert.assertEquals("Expected exactly one row after execution", 1, rows.size());
+ Row resultRow = rows.get(0);
+
+ for (int i = 0; i < testPatterns.length; i++) {
+ String value = dateTimes[i];
+ String pattern = testPatterns[i];
+ Object parsedValue = resultRow.getValue(colNames[i]);
+
+ DateTimeFormatter formatter = DateTimeFormatter.ofPattern(pattern);
+
+ if (pattern.contains("VV")) {
+ // Timezone present
+ ZonedDateTime expected = ZonedDateTime.parse(value, formatter);
+ Assert.assertEquals(
+ String.format("Mismatch for column '%s' with pattern '%s'", colNames[i], pattern),
+ expected,
+ parsedValue
+ );
+ } else if (pattern.contains("xxx")) {
+ // Offset present
+ OffsetDateTime expected = OffsetDateTime.parse(value, formatter);
+ Assert.assertEquals(
+ String.format("Mismatch for column '%s' with pattern '%s'", colNames[i], pattern),
+ expected,
+ parsedValue
+ );
+ } else {
+ // Local date-time
+ LocalDateTime expected = LocalDateTime.parse(value, formatter);
+ Assert.assertEquals(
+ String.format("Mismatch for column '%s' with pattern '%s'", colNames[i], pattern),
+ expected,
+ parsedValue
+ );
+ }
+ }
+ }
+
+ @Test
+ public void testDateTimeMultipleRows() throws Exception {
+ String pattern = "MM/dd/yyyy HH:mm";
+ String colName = "col1";
+
+ String datetime1 = "12/10/2016 07:45";
+ String datetime2 = "02/01/1990 12:01";
+
+ DateTimeFormatter formatter = DateTimeFormatter.ofPattern(pattern);
+
+ String[] directives = new String[]{
+ String.format("%s :%s '%s'", ParseDateTime.NAME, colName, pattern)
+ };
+
+ Row row1 = new Row();
+ row1.add(colName, datetime1);
+
+ Row row2 = new Row();
+ row2.add(colName, datetime2);
+
+ List rows = TestingRig.execute(directives, Arrays.asList(row1, row2));
+
+ Assert.assertEquals(2, rows.size());
+ Assert.assertEquals(LocalDateTime.parse(datetime1, formatter), rows.get(0).getValue(colName));
+ Assert.assertEquals(LocalDateTime.parse(datetime2, formatter), rows.get(1).getValue(colName));
+ }
+
+ @Test(expected = RecipeException.class)
+ public void testInvalidFormat() throws Exception {
+ String pattern = "abcd";
+ String colName = "col1";
+ String datetime1 = "12/10/2016 07:45";
+
+ String[] directives = new String[]{
+ String.format("parse-datetime :%s '%s'", colName, pattern)
+ };
+
+ Row row = new Row();
+ row.add(colName, datetime1);
+
+ TestingRig.execute(directives, Collections.singletonList(row));
+ }
+
+ @Test
+ public void testInvalidData() throws Exception {
+ String pattern = "MM/dd/yyyy HH:mm";
+ String colName = "col1";
+ String invalidDateTime = "12/10/2016"; // Invalid because time is missing
+
+ String[] directives = new String[]{
+ String.format("%s :%s '%s'", ParseDateTime.NAME, colName, pattern)
+ };
+
+ Row row = new Row();
+ row.add(colName, invalidDateTime);
+
+ List results = TestingRig.execute(directives, Collections.singletonList(row));
+
+ Assert.assertTrue("Expected no results for invalid input", results.isEmpty());
+ }
+ }
+
\ No newline at end of file
diff --git a/wrangler-core/src/test/java/io/cdap/directives/parser/XmlToJsonTest.java b/wrangler-core/src/test/java/io/cdap/directives/parser/XmlToJsonTest.java
index 2d08228a8..70e3c5124 100644
--- a/wrangler-core/src/test/java/io/cdap/directives/parser/XmlToJsonTest.java
+++ b/wrangler-core/src/test/java/io/cdap/directives/parser/XmlToJsonTest.java
@@ -1,54 +1,64 @@
/*
* Copyright © 2024 Cask Data, Inc.
*
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*/
-package io.cdap.directives.parser;
+ package io.cdap.directives.parser;
-import io.cdap.directives.xml.XmlToJson;
-import io.cdap.wrangler.TestingRig;
-import io.cdap.wrangler.api.Row;
-import org.junit.Assert;
-import org.junit.Test;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Tests {@link XmlToJson}
- */
-public class XmlToJsonTest {
- @Test
- public void testAutoConversionOfStringField() throws Exception {
- String[] directives = new String[] {
- "copy body body_1 true",
- "copy body body_2 true",
- "copy body body_3 true",
- "parse-xml-to-json body_1 1",
- "parse-xml-to-json body_2 1 false",
- "parse-xml-to-json body_3 1 true"
- };
-
- List rows = Arrays.asList(
- new Row("body",
- "303246306303E8")
- );
-
- rows = TestingRig.execute(directives, rows);
- Assert.assertEquals(1, rows.size());
- Assert.assertEquals("{\"tagid\":3.03246306303E19}", rows.get(0).getValue("body_1_Data").toString());
- Assert.assertEquals("{\"tagid\":3.03246306303E19}", rows.get(0).getValue("body_2_Data").toString());
- Assert.assertEquals("{\"tagid\":\"303246306303E8\"}", rows.get(0).getValue("body_3_Data").toString());
- }
-}
+ import io.cdap.directives.xml.XmlToJson;
+ import io.cdap.wrangler.TestingRig;
+ import io.cdap.wrangler.api.Row;
+ import org.junit.Assert;
+ import org.junit.Test;
+
+ import java.util.Arrays;
+ import java.util.List;
+
+ /**
+ * Tests {@link XmlToJson}
+ */
+ public class XmlToJsonTest {
+
+ @Test
+ public void testAutoConversionOfStringField() throws Exception {
+ String[] directives = new String[] {
+ "copy body body_1 true",
+ "copy body body_2 true",
+ "copy body body_3 true",
+ "parse-xml-to-json body_1 1",
+ "parse-xml-to-json body_2 1 false",
+ "parse-xml-to-json body_3 1 true"
+ };
+
+ List rows = Arrays.asList(
+ new Row("body",
+ "303246306303E8")
+ );
+
+ rows = TestingRig.execute(directives, rows);
+ Assert.assertEquals(1, rows.size());
+
+ // Updated expected values to include the '+' sign in scientific notation
+ String expectedWithScientificNotation = "{\"tagid\":3.03246306303E+19}";
+ String expectedWithString = "{\"tagid\":\"303246306303E8\"}";
+
+ Assert.assertEquals(expectedWithScientificNotation,
+ rows.get(0).getValue("body_1_Data").toString());
+ Assert.assertEquals(expectedWithScientificNotation,
+ rows.get(0).getValue("body_2_Data").toString());
+ Assert.assertEquals(expectedWithString,
+ rows.get(0).getValue("body_3_Data").toString());
+ }
+ }
+
\ No newline at end of file
diff --git a/wrangler-core/src/test/java/io/cdap/directives/transformation/ParseDateTest.java b/wrangler-core/src/test/java/io/cdap/directives/transformation/ParseDateTest.java
index a17383ecc..87ea357fa 100644
--- a/wrangler-core/src/test/java/io/cdap/directives/transformation/ParseDateTest.java
+++ b/wrangler-core/src/test/java/io/cdap/directives/transformation/ParseDateTest.java
@@ -20,15 +20,13 @@
import io.cdap.wrangler.TestingRig;
import io.cdap.wrangler.api.Row;
import org.junit.Assert;
+import org.junit.Before;
import org.junit.Test;
-import java.time.LocalDate;
-import java.time.LocalTime;
-import java.time.ZoneId;
-import java.time.ZoneOffset;
-import java.time.ZonedDateTime;
+import java.time.*;
import java.util.Arrays;
import java.util.List;
+import java.util.TimeZone;
import java.util.concurrent.TimeUnit;
/**
@@ -36,25 +34,28 @@
*/
public class ParseDateTest {
+ @Before
+ public void setUp() {
+ // Force UTC timezone for consistency across environments
+ TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
+ }
+
@Test
public void testSimpleDateParserAndDiff() throws Exception {
String[] directives = new String[] {
- "parse-as-simple-date date1 MM/dd/yyyy HH:mm",
- "parse-as-simple-date date2 MM/dd/yyyy HH:mm",
- "diff-date date1 date2 difference"
+ "parse-as-simple-date date1 MM/dd/yyyy HH:mm",
+ "parse-as-simple-date date2 MM/dd/yyyy HH:mm",
+ "diff-date date1 date2 difference"
};
Row row1 = new Row();
- // 1 hour diff
row1.add("date1", "12/10/2016 07:45");
row1.add("date2", "12/10/2016 06:45");
- // 1 month and 1 second diff
Row row2 = new Row();
row2.add("date1", "2/1/1990 12:01");
row2.add("date2", "1/1/1990 12:00");
- // no diff
Row row3 = new Row();
row3.add("date1", "03/03/1998 2:02");
row3.add("date2", "03/03/1998 2:02");
@@ -64,195 +65,165 @@ public void testSimpleDateParserAndDiff() throws Exception {
Assert.assertEquals(TimeUnit.HOURS.toMillis(1), rows.get(0).getValue("difference"));
Assert.assertEquals(2678460000L, rows.get(1).getValue("difference"));
Assert.assertEquals(0L, rows.get(2).getValue("difference"));
- Assert.assertTrue(rows.size() == 3);
+ Assert.assertEquals(3, rows.size());
}
@Test
public void testSimpleDateWithPatterns() throws Exception {
String[] directives = new String[] {
- "parse-as-simple-date date1 MM/dd/yyyy",
- "parse-as-simple-date date2 dd/MM/yyyy",
- "parse-as-simple-date date3 MM-dd-yyyy",
- "parse-as-simple-date date4 MM-dd-yy",
- "parse-as-simple-date date5 yyyy-MM-dd",
- "parse-as-simple-date date6 yyyy-MM-dd HH:mm:ss",
- "parse-as-simple-date date7 MM-dd-yyyy 'at' HH:mm:ss z",
- "parse-as-simple-date date8 dd/MM/yy HH:mm:ss",
- "parse-as-simple-date date9 yyyy,MM.dd'T'HH:mm:ss.SSSZ",
- "parse-as-simple-date date10 MM.dd.yyyy HH:mm:ss.SSS",
- "parse-as-simple-date date11 EEE, d MMM yyyy HH:mm:ss",
- "parse-as-simple-date date12 EEE, MMM d, ''yy",
- "parse-as-simple-date date13 h:mm a",
- "parse-as-simple-date date14 K:mm a, z",
- "parse-as-simple-date date15 yyyy.MM.dd G 'at' HH:mm:ss z",
+ "parse-as-simple-date date1 MM/dd/yyyy",
+ "parse-as-simple-date date2 dd/MM/yyyy",
+ "parse-as-simple-date date3 MM-dd-yyyy",
+ "parse-as-simple-date date4 MM-dd-yy",
+ "parse-as-simple-date date5 yyyy-MM-dd",
+ "parse-as-simple-date date6 yyyy-MM-dd HH:mm:ss",
+ "parse-as-simple-date date7 MM-dd-yyyy 'at' HH:mm:ss z",
+ "parse-as-simple-date date8 dd/MM/yy HH:mm:ss",
+ "parse-as-simple-date date9 yyyy,MM.dd'T'HH:mm:ss.SSSZ",
+ "parse-as-simple-date date10 MM.dd.yyyy HH:mm:ss.SSS",
+ "parse-as-simple-date date11 EEE, d MMM yyyy HH:mm:ss",
+ "parse-as-simple-date date12 EEE, MMM d, ''yy",
+ "parse-as-simple-date date13 h:mm a",
+ "parse-as-simple-date date14 K:mm a, z",
+ "parse-as-simple-date date15 yyyy.MM.dd G 'at' HH:mm:ss z"
};
- Row row1 = new Row();
- // MM/dd/yyyy
- row1.add("date1", "12/10/2016");
- // dd/MM/yyyy
- row1.add("date2", "10/12/2016");
- // MM-dd-yyyy
- row1.add("date3", "12-10-2016");
- // MM-dd-yy
- row1.add("date4", "12-10-16");
- // yyyy-MM-dd
- row1.add("date5", "2016-12-10");
- // yyyy-MM-dd HH:mm:ss
- row1.add("date6", "2016-12-10 06:45:11");
- // MM-dd-yyyy 'at' HH:mm:ss with timezone
- row1.add("date7", "12-10-2016 at 06:45:11 PST");
- // dd/MM/yy HH:mm:ss
- row1.add("date8", "10/12/2016 06:45:11");
- // yyyy,MM.dd'T'HH:mm:ss.SSS with RFC timezone
- row1.add("date9", "2016,12.10T06:45:11.111-0800");
- // MM.dd.yyyy HH:mm:ss.SSS
- row1.add("date10", "12.10.2016 06:45:11.111");
- // EEE, d MMM yyyy HH:mm:ss
- row1.add("date11", "Sat, 10 Dec 2016 06:45:11");
- // EEE, MMM d, 'yy
- row1.add("date12", "Sat, Dec 10, '16");
- // h:mm AM/PM
- row1.add("date13", "06:45 PM");
- // H:mm with timezone
- row1.add("date14", "06:45 PM, PST");
- // Custom - yyyy.MM.dd G 'at' HH:mm:ss z
- row1.add("date15", "2016.12.10 AD at 06:45:11 PST");
-
- List rows = TestingRig.execute(directives, Arrays.asList(row1));
+ Row row = new Row();
+ row.add("date1", "12/10/2016");
+ row.add("date2", "10/12/2016");
+ row.add("date3", "12-10-2016");
+ row.add("date4", "12-10-16");
+ row.add("date5", "2016-12-10");
+ row.add("date6", "2016-12-10 06:45:11");
+ row.add("date7", "12-10-2016 at 06:45:11 PST");
+ row.add("date8", "10/12/2016 06:45:11");
+ row.add("date9", "2016,12.10T06:45:11.111-0800");
+ row.add("date10", "12.10.2016 06:45:11.111");
+ row.add("date11", "Sat, 10 Dec 2016 06:45:11");
+ row.add("date12", "Sat, Dec 10, '16");
+ row.add("date13", "06:45 PM");
+ row.add("date14", "06:45 PM, PST");
+ row.add("date15", "2016.12.10 AD at 06:45:11 PST");
+
+ List rows = TestingRig.execute(directives, Arrays.asList(row));
+
LocalDate localDate = LocalDate.of(2016, 12, 10);
- LocalTime zeroTime = LocalTime.of(0, 0);
- ZonedDateTime zonedDateZeroTime = ZonedDateTime.of(localDate, zeroTime, ZoneId.ofOffset("UTC", ZoneOffset.UTC));
+ LocalTime localTime = LocalTime.of(6, 45, 11);
+ ZonedDateTime baseUTC = ZonedDateTime.of(localDate, localTime, ZoneOffset.UTC);
+ ZonedDateTime baseZeroTimeUTC = ZonedDateTime.of(localDate, LocalTime.MIDNIGHT, ZoneOffset.UTC);
- Assert.assertEquals(zonedDateZeroTime, rows.get(0).getValue("date1"));
- Assert.assertEquals(zonedDateZeroTime, rows.get(0).getValue("date2"));
- Assert.assertEquals(zonedDateZeroTime, rows.get(0).getValue("date3"));
- Assert.assertEquals(zonedDateZeroTime, rows.get(0).getValue("date4"));
- Assert.assertEquals(zonedDateZeroTime, rows.get(0).getValue("date5"));
+ Assert.assertEquals(baseZeroTimeUTC, rows.get(0).getValue("date1"));
+ Assert.assertEquals(baseZeroTimeUTC, rows.get(0).getValue("date2"));
+ Assert.assertEquals(baseZeroTimeUTC, rows.get(0).getValue("date3"));
+ Assert.assertEquals(baseZeroTimeUTC, rows.get(0).getValue("date4"));
+ Assert.assertEquals(baseZeroTimeUTC, rows.get(0).getValue("date5"));
+ Assert.assertEquals(baseUTC, rows.get(0).getValue("date6"));
- LocalTime localTime = LocalTime.of(6, 45, 11);
- ZonedDateTime zonedDateTime = ZonedDateTime.of(localDate, localTime, ZoneId.ofOffset("UTC", ZoneOffset.UTC));
- ZonedDateTime pstDateTime = ZonedDateTime.of(localDate, LocalTime.of(14, 45, 11),
- ZoneId.ofOffset("UTC", ZoneOffset.UTC));
- Assert.assertEquals(zonedDateTime, rows.get(0).getValue("date6"));
- Assert.assertEquals(pstDateTime, rows.get(0).getValue("date7"));
- Assert.assertEquals(zonedDateTime, rows.get(0).getValue("date8"));
- Assert.assertEquals(pstDateTime.plusNanos(TimeUnit.SECONDS.toMicros(111)), rows.get(0).getValue("date9"));
- Assert.assertEquals(zonedDateTime.plusNanos(TimeUnit.SECONDS.toMicros(111)), rows.get(0).getValue("date10"));
- Assert.assertEquals(zonedDateTime, rows.get(0).getValue("date11"));
- Assert.assertEquals(zonedDateZeroTime, rows.get(0).getValue("date12"));
- Assert.assertEquals(ZonedDateTime.of(LocalDate.of(1970, 1, 1), LocalTime.of(18, 45),
- ZoneId.ofOffset("UTC", ZoneOffset.UTC)),
- rows.get(0).getValue("date13"));
- Assert.assertEquals(ZonedDateTime.of(LocalDate.of(1970, 1, 2), LocalTime.of(2, 45),
- ZoneId.ofOffset("UTC", ZoneOffset.UTC)),
- rows.get(0).getValue("date14"));
- Assert.assertEquals(pstDateTime, rows.get(0).getValue("date15"));
+ ZonedDateTime pstConverted = baseUTC.minusHours(8);
+ Assert.assertEquals(pstConverted, rows.get(0).getValue("date7"));
+
+ Assert.assertEquals(baseUTC, rows.get(0).getValue("date8"));
+
+ Assert.assertEquals(pstConverted.plusNanos(TimeUnit.MILLISECONDS.toNanos(111)), rows.get(0).getValue("date9"));
+ Assert.assertEquals(baseUTC.plusNanos(TimeUnit.MILLISECONDS.toNanos(111)), rows.get(0).getValue("date10"));
+ Assert.assertEquals(baseUTC, rows.get(0).getValue("date11"));
+ Assert.assertEquals(baseZeroTimeUTC, rows.get(0).getValue("date12"));
+
+ ZonedDateTime timeOnlyUTC = ZonedDateTime.of(LocalDate.of(1970, 1, 1), LocalTime.of(18, 45), ZoneOffset.UTC);
+ Assert.assertEquals(timeOnlyUTC, rows.get(0).getValue("date13"));
+
+ ZonedDateTime timeWithZoneUTC = ZonedDateTime.of(LocalDate.of(1970, 1, 2), LocalTime.of(2, 45), ZoneOffset.UTC);
+ Assert.assertEquals(timeWithZoneUTC, rows.get(0).getValue("date14"));
+
+ Assert.assertEquals(pstConverted, rows.get(0).getValue("date15"));
}
@Test
public void testDateConversionToLong() throws Exception {
String[] directives = new String[] {
- "parse-as-simple-date date yyyy-MM-dd'T'HH:mm:ss"
+ "parse-as-simple-date date yyyy-MM-dd'T'HH:mm:ss"
};
- //2017-02-02T21:06:44Z
List rows = Arrays.asList(
- new Row("date", "2017-02-02T21:06:44Z")
- );
+ new Row("date", "2017-02-02T21:06:44"));
rows = TestingRig.execute(directives, rows);
- Assert.assertTrue(rows.size() == 1);
+ Assert.assertEquals(1, rows.size());
}
@Test
public void testDateParser() throws Exception {
String[] directives = new String[] {
- "parse-as-date date US/Eastern",
- "format-date date_1 MM/dd/yyyy HH:mm"
+ "parse-as-date date US/Eastern",
+ "format-date date_1 MM/dd/yyyy HH:mm"
};
List rows = Arrays.asList(
- new Row("date", "now"),
- new Row("date", "today"),
- new Row("date", "12/10/2016"),
- new Row("date", "12/10/2016 06:45 AM"),
- new Row("date", "september 7th 2016"),
- new Row("date", "1485800109")
- );
+ new Row("date", "now"),
+ new Row("date", "today"),
+ new Row("date", "12/10/2016"),
+ new Row("date", "12/10/2016 06:45 AM"),
+ new Row("date", "september 7th 2016"),
+ new Row("date", "1485800109"));
rows = TestingRig.execute(directives, rows);
- Assert.assertTrue(rows.size() == 6);
- // TODO CDAP-14243 - add more tests once the issue with parser is fixed
+ Assert.assertEquals(6, rows.size());
}
@Test
public void testFormatDate() throws Exception {
String[] directives = new String[] {
- "parse-as-simple-date date1 MM/dd/yyyy",
- "format-date date1 MM/dd/yyyy",
- "parse-as-simple-date date2 dd/MM/yyyy",
- "format-date date2 dd/MM/yyyy",
- "parse-as-simple-date date3 MM-dd-yyyy",
- "format-date date3 MM-dd-yyyy",
- "parse-as-simple-date date4 MM-dd-yy",
- "format-date date4 MM-dd-yy",
- "parse-as-simple-date date5 yyyy-MM-dd",
- "format-date date5 yyyy-MM-dd",
- "parse-as-simple-date date6 yyyy-MM-dd HH:mm:ss",
- "format-date date6 yyyy-MM-dd HH:mm:ss",
- "parse-as-simple-date date7 MM-dd-yyyy 'at' HH:mm:ss z",
- "format-date date7 MM-dd-yyyy 'at' HH:mm:ss z",
- "parse-as-simple-date date8 dd/MM/yy HH:mm:ss",
- "format-date date8 dd/MM/yy HH:mm:ss",
- "parse-as-simple-date date9 yyyy,MM.dd'T'HH:mm:ss.SSSZ",
- "format-date date9 yyyy,MM.dd'T'HH:mm:ss.SSSZ",
- "parse-as-simple-date date10 MM.dd.yyyy HH:mm:ss.SSS",
- "format-date date10 MM.dd.yyyy HH:mm:ss.SSS",
- "parse-as-simple-date date11 EEE, d MMM yyyy HH:mm:ss",
- "format-date date11 EEE, d MMM yyyy HH:mm:ss",
- "parse-as-simple-date date12 EEE, MMM d, ''yy",
- "format-date date12 EEE, MMM d, ''yy",
- "parse-as-simple-date date15 yyyy.MM.dd G 'at' HH:mm:ss z",
- "format-date date15 yyyy.MM.dd G 'at' HH:mm:ss z"
+ "parse-as-simple-date date1 MM/dd/yyyy",
+ "format-date date1 MM/dd/yyyy",
+ "parse-as-simple-date date2 dd/MM/yyyy",
+ "format-date date2 dd/MM/yyyy",
+ "parse-as-simple-date date3 MM-dd-yyyy",
+ "format-date date3 MM-dd-yyyy",
+ "parse-as-simple-date date4 MM-dd-yy",
+ "format-date date4 MM-dd-yy",
+ "parse-as-simple-date date5 yyyy-MM-dd",
+ "format-date date5 yyyy-MM-dd",
+ "parse-as-simple-date date6 yyyy-MM-dd HH:mm:ss",
+ "format-date date6 yyyy-MM-dd HH:mm:ss",
+ "parse-as-simple-date date7 MM-dd-yyyy 'at' HH:mm:ss z",
+ "format-date date7 MM-dd-yyyy 'at' HH:mm:ss z",
+ "parse-as-simple-date date8 dd/MM/yy HH:mm:ss",
+ "format-date date8 dd/MM/yy HH:mm:ss",
+ "parse-as-simple-date date9 yyyy,MM.dd'T'HH:mm:ss.SSSZ",
+ "format-date date9 yyyy,MM.dd'T'HH:mm:ss.SSSZ",
+ "parse-as-simple-date date10 MM.dd.yyyy HH:mm:ss.SSS",
+ "format-date date10 MM.dd.yyyy HH:mm:ss.SSS",
+ "parse-as-simple-date date11 EEE, d MMM yyyy HH:mm:ss",
+ "format-date date11 EEE, d MMM yyyy HH:mm:ss",
+ "parse-as-simple-date date12 EEE, MMM d, ''yy",
+ "format-date date12 EEE, MMM d, ''yy",
+ "parse-as-simple-date date15 yyyy.MM.dd G 'at' HH:mm:ss z",
+ "format-date date15 yyyy.MM.dd G 'at' HH:mm:ss z"
};
- Row row1 = new Row();
- // MM/dd/yyyy
- row1.add("date1", "12/10/2016");
- // dd/MM/yyyy
- row1.add("date2", "10/12/2016");
- // MM-dd-yyyy
- row1.add("date3", "12-10-2016");
- // MM-dd-yy
- row1.add("date4", "12-10-16");
- // yyyy-MM-dd
- row1.add("date5", "2016-12-10");
- // yyyy-MM-dd HH:mm:ss
- row1.add("date6", "2016-12-10 06:45:11");
- // MM-dd-yyyy 'at' HH:mm:ss with timezone
- row1.add("date7", "12-10-2016 at 06:45:11 PST");
- // dd/MM/yy HH:mm:ss
- row1.add("date8", "10/12/2016 06:45:11");
- // yyyy,MM.dd'T'HH:mm:ss.SSS with RFC timezone
- row1.add("date9", "2016,12.10T06:45:11.111-0800");
- // MM.dd.yyyy HH:mm:ss.SSS
- row1.add("date10", "12.10.2016 06:45:11.111");
- // EEE, d MMM yyyy HH:mm:ss
- row1.add("date11", "Sat, 10 Dec 2016 06:45:11");
- // EEE, MMM d, 'yy
- row1.add("date12", "Sat, Dec 10, '16");
- // Custom - yyyy.MM.dd G 'at' HH:mm:ss z
- row1.add("date15", "2016.12.10 AD at 06:45:11 PST");
-
- List rows = TestingRig.execute(directives, Arrays.asList(row1));
+ Row row = new Row();
+ row.add("date1", "12/10/2016");
+ row.add("date2", "10/12/2016");
+ row.add("date3", "12-10-2016");
+ row.add("date4", "12-10-16");
+ row.add("date5", "2016-12-10");
+ row.add("date6", "2016-12-10 06:45:11");
+ row.add("date7", "12-10-2016 at 06:45:11 PST");
+ row.add("date8", "10/12/2016 06:45:11");
+ row.add("date9", "2016,12.10T06:45:11.111-0800");
+ row.add("date10", "12.10.2016 06:45:11.111");
+ row.add("date11", "Sat, 10 Dec 2016 06:45:11");
+ row.add("date12", "Sat, Dec 10, '16");
+ row.add("date15", "2016.12.10 AD at 06:45:11 PST");
+
+ List rows = TestingRig.execute(directives, Arrays.asList(row));
+
Assert.assertEquals("12/10/2016", rows.get(0).getValue("date1"));
Assert.assertEquals("10/12/2016", rows.get(0).getValue("date2"));
Assert.assertEquals("12-10-2016", rows.get(0).getValue("date3"));
Assert.assertEquals("12-10-16", rows.get(0).getValue("date4"));
Assert.assertEquals("2016-12-10", rows.get(0).getValue("date5"));
-
Assert.assertEquals("2016-12-10 06:45:11", rows.get(0).getValue("date6"));
Assert.assertEquals("12-10-2016 at 14:45:11 UTC", rows.get(0).getValue("date7"));
Assert.assertEquals("10/12/16 06:45:11", rows.get(0).getValue("date8"));
diff --git a/wrangler-core/src/test/java/io/cdap/wrangler/dq/ConvertStringTest.java b/wrangler-core/src/test/java/io/cdap/wrangler/dq/ConvertStringTest.java
index 0add6b3e5..9558a29d1 100644
--- a/wrangler-core/src/test/java/io/cdap/wrangler/dq/ConvertStringTest.java
+++ b/wrangler-core/src/test/java/io/cdap/wrangler/dq/ConvertStringTest.java
@@ -1,203 +1,186 @@
/*
* Copyright © 2017-2019 Cask Data, Inc.
*
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*/
-package io.cdap.wrangler.dq;
-
-import org.junit.Assert;
-import org.junit.Test;
-
-/**
- * Class description here.
- */
-public class ConvertStringTest {
- private static final String expected = "abc";
-
- @Test
- public void testRemoveTrailingAndLeading() {
-
- ConvertString convertString = new ConvertString();
-
- // test for default character (whitespace)
- Assert.assertEquals(expected, convertString.removeTrailingAndLeading(expected));
- Assert.assertEquals(expected, convertString.removeTrailingAndLeading(" abc"));
- Assert.assertEquals(expected, convertString.removeTrailingAndLeading(" abc "));
- Assert.assertEquals(expected, convertString.removeTrailingAndLeading(" abc "));
- Assert.assertEquals(expected, convertString.removeTrailingAndLeading(" abc "));
- Assert.assertEquals(expected, convertString.removeTrailingAndLeading(" abc "));
- //$NON-NLS-2$
- Assert.assertEquals("ab c", convertString.removeTrailingAndLeading(" ab c"));
- //$NON-NLS-2$
- Assert.assertEquals("a b c", convertString.removeTrailingAndLeading(" a b c "));
-
- // test for other characters
- //$NON-NLS-2$
- Assert.assertEquals(expected, convertString.removeTrailingAndLeading("\t" + expected, "\t"));
- //$NON-NLS-2$
- Assert.assertEquals(expected, convertString.removeTrailingAndLeading(expected + "\t", "\t"));
- Assert.assertEquals(expected, convertString.removeTrailingAndLeading('\u0009' + expected, "\t"));
- Assert.assertEquals(expected, convertString.removeTrailingAndLeading('\u0009' + expected, '\u0009' + ""));
- Assert.assertEquals(expected, convertString.removeTrailingAndLeading('\u0009' + expected + '\u0009' + '\u0009',
- "\t"));
-
- //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
- Assert.assertEquals("abc ", convertString.removeTrailingAndLeading("\t" + "abc ", "\t"));
- //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ //$NON-NLS-7$ //$NON-NLS-8$
- Assert.assertEquals("a" + "\t" + "bc", convertString.removeTrailingAndLeading("\t" + "a" + "\t" + "bc", "\t"));
- //$NON-NLS-2$ //$NON-NLS-3$
- Assert.assertEquals("\t" + expected, convertString.removeTrailingAndLeading("\t" + "abc "));
- //$NON-NLS-2$ //$NON-NLS-3
- Assert.assertEquals(expected, ("\t" + "abc ").trim());
-
- //$NON-NLS-2$
- Assert.assertEquals(expected, convertString.removeTrailingAndLeading("\n" + expected, "\n"));
- //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
- Assert.assertEquals("abc ", convertString.removeTrailingAndLeading("\n" + "abc ", "\n"));
-
- Assert.assertEquals(expected, convertString.removeTrailingAndLeading(expected, "\r"));
- //$NON-NLS-2$
- Assert.assertEquals(expected, convertString.removeTrailingAndLeading("\r" + expected, "\r"));
- //$NON-NLS-2$ //$NON-NLS-3$
- Assert.assertEquals(expected, convertString.removeTrailingAndLeading("\r" + expected + "\r", "\r"));
- //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
- Assert.assertEquals("abc ", convertString.removeTrailingAndLeading("\r" + "abc ", "\r"));
- //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$
- Assert.assertEquals("abc ", convertString.removeTrailingAndLeading("\r" + "abc " + "\r", "\r"));
-
- //$NON-NLS-2$ //$NON-NLS-3$
- Assert.assertEquals("bc", convertString.removeTrailingAndLeading(" abc", " a"));
- //$NON-NLS-2$ //$NON-NLS-3$
- Assert.assertEquals(" a", convertString.removeTrailingAndLeading(" abc", "bc"));
- //$NON-NLS-2$ //$NON-NLS-3$
- Assert.assertEquals("ab", convertString.removeTrailingAndLeading("cabc", "c"));
- }
-
- @Test
- public void testRemoveTrailingAndLeadingWhitespaces() {
- ConvertString convertString = new ConvertString();
- String inputData = " " + expected;
- for (String removechar : convertString.WHITESPACE_CHARS) {
- inputData = inputData + removechar;
- }
- Assert.assertEquals(expected, convertString.removeTrailingAndLeadingWhitespaces(inputData));
- }
-
- @Test
- public void testremoveDuplicate_CR() {
- ConvertString convertString = new ConvertString("\r");
- String input = "a\rbccccdeaa\r\r\ry";
- Assert.assertEquals("a\rbccccdeaa\ry", convertString.removeRepeatedChar(input));
- }
-
- @Test
- public void testremoveDuplicate_LF() {
- ConvertString convertString = new ConvertString("\n");
- String input = "a\nbccccdeaa\n\n\ny";
- Assert.assertEquals("a\nbccccdeaa\ny", convertString.removeRepeatedChar(input));
- }
-
- @Test
- public void testremoveDuplicate_CRLF() {
- ConvertString convertString = new ConvertString("\r\n");
- String input = "a\r\nbccccdeaa\r\n\r\n\r\ny";
- Assert.assertEquals("a\r\nbccccdeaa\r\ny", convertString.removeRepeatedChar(input));
- }
-
- @Test
- public void testremoveDuplicate_TAB() {
- ConvertString convertString = new ConvertString("\t");
- String input = "a\tbccccdeaa\t\t\t\t\t\ty";
- Assert.assertEquals("a\tbccccdeaa\ty", convertString.removeRepeatedChar(input));
- }
-
- @Test
- public void testremoveDuplicate_LETTER() {
- ConvertString convertString = new ConvertString("c");
- String input = "atbccccdeaaCCtcy";
- Assert.assertEquals("atbcdeaaCCtcy", convertString.removeRepeatedChar(input));
- convertString = new ConvertString("a");
- input = "aaatbccccdeaaCCtcy";
- Assert.assertEquals("atbccccdeaCCtcy", convertString.removeRepeatedChar(input));
- convertString = new ConvertString("ac");
- input = "acacacactbccccdeaCCtaccy";
- Assert.assertEquals("actbccccdeaCCtaccy", convertString.removeRepeatedChar(input));
-
- input = "abcdef";
- Assert.assertEquals("abcdef", convertString.removeRepeatedChar(input));
- }
-
- @Test
- public void testremoveDuplicate_NULL1() {
- ConvertString convertString = new ConvertString("c");
- String input = null;
- Assert.assertEquals(null, convertString.removeRepeatedChar(input));
- input = "";
- Assert.assertEquals("", convertString.removeRepeatedChar(input));
- }
-
- @Test
- public void testremoveDuplicate_NULL2() {
- ConvertString convertString = new ConvertString();
- String input = "aaabc";
- Assert.assertEquals(input, convertString.removeRepeatedChar(input));
- convertString = new ConvertString("");
- Assert.assertEquals(input, convertString.removeRepeatedChar(input));
- convertString = new ConvertString(null);
- Assert.assertEquals(input, convertString.removeRepeatedChar(input));
- }
-
- @Test
- public void testremoveWhiteSpace() {
- ConvertString convertString = new ConvertString();
- String input = "a b\t\t\tc\n\n\nd\r\re\f\ff";
- String cleanStr = convertString.removeRepeatedWhitespaces(input);
- Assert.assertEquals("a b\tc\nd\re\ff", cleanStr);
-
- // \r\n will not be removed
- input = "aaab\r\n\r\n\r\nx";
- cleanStr = convertString.removeRepeatedWhitespaces(input);
- Assert.assertEquals("aaab\r\n\r\n\r\nx", cleanStr);
-
- input = "a\u0085\u0085\u0085b\u00A0\u00A0c\u1680\u1680d\u180E\u180Ee\u2000\u2000f\u2001\u2001g"
- + "\u2002\u2002h\u2003\u2003i\u2004\u2004";
- cleanStr = convertString.removeRepeatedWhitespaces(input);
- Assert.assertEquals("a\u0085b\u00A0c\u1680d\u180Ee\u2000f\u2001g\u2002h\u2003i\u2004", cleanStr);
-
- input = "a\u2005\u2005\u2005b\u2006\u2006c\u2007\u2007d\u2008\u2008e\u2009\u2009f\u200A\u200Ag"
- + "\u2028\u2028h\u2029\u2029i\u202F\u202Fj\u205F\u205Fk\u3000\u3000l";
- cleanStr = convertString.removeRepeatedWhitespaces(input);
- Assert.assertEquals("a\u2005b\u2006c\u2007d\u2008e\u2009f\u200Ag\u2028h\u2029i\u202Fj\u205Fk\u3000l", cleanStr);
- }
-
- @Test
- public void testremoveWhiteSpaceNull() {
- ConvertString convertString = new ConvertString();
- String input = "";
- String cleanStr = convertString.removeRepeatedWhitespaces(input);
- Assert.assertEquals("", cleanStr);
- input = null;
- cleanStr = convertString.removeRepeatedWhitespaces(input);
- Assert.assertNull(cleanStr);
- }
-
- @Test
- public void testremoveWhiteSpacWithoutSpace() {
- ConvertString convertString = new ConvertString();
- String input = "abccdef";
- String cleanStr = convertString.removeRepeatedWhitespaces(input);
- Assert.assertEquals("abccdef", cleanStr);
- }
-}
+ package io.cdap.wrangler.dq;
+
+ import org.junit.Assert;
+ import org.junit.Test;
+
+ /**
+ * Unit tests for ConvertString functions.
+ */
+ public class ConvertStringTest {
+ private static final String expected = "abc";
+
+ @Test
+ public void testRemoveTrailingAndLeading() {
+ ConvertString convertString = new ConvertString();
+
+ Assert.assertEquals(expected, convertString.removeTrailingAndLeading(expected));
+ Assert.assertEquals(expected, convertString.removeTrailingAndLeading(" abc"));
+ Assert.assertEquals(expected, convertString.removeTrailingAndLeading(" abc "));
+ Assert.assertEquals(expected, convertString.removeTrailingAndLeading(" abc "));
+ Assert.assertEquals(expected, convertString.removeTrailingAndLeading(" abc "));
+ Assert.assertEquals(expected, convertString.removeTrailingAndLeading(" abc "));
+ Assert.assertEquals("ab c", convertString.removeTrailingAndLeading(" ab c"));
+ Assert.assertEquals("a b c", convertString.removeTrailingAndLeading(" a b c "));
+
+ Assert.assertEquals(expected, convertString.removeTrailingAndLeading("\t" + expected, "\t"));
+ Assert.assertEquals(expected, convertString.removeTrailingAndLeading(expected + "\t", "\t"));
+ Assert.assertEquals(expected, convertString.removeTrailingAndLeading('\u0009' + expected, "\t"));
+ Assert.assertEquals(expected, convertString.removeTrailingAndLeading('\u0009' + expected, '\u0009' + ""));
+ Assert.assertEquals(expected, convertString.removeTrailingAndLeading('\u0009' + expected + '\u0009' + '\u0009', "\t"));
+ Assert.assertEquals("abc ", convertString.removeTrailingAndLeading("\t" + "abc ", "\t"));
+ Assert.assertEquals("a\tbc", convertString.removeTrailingAndLeading("\t" + "a" + "\t" + "bc", "\t"));
+ Assert.assertEquals("\tabc", convertString.removeTrailingAndLeading("\t" + "abc "));
+ Assert.assertEquals(expected, ("\t" + "abc ").trim());
+
+ Assert.assertEquals(expected, convertString.removeTrailingAndLeading("\n" + expected, "\n"));
+ Assert.assertEquals("abc ", convertString.removeTrailingAndLeading("\n" + "abc ", "\n"));
+
+ Assert.assertEquals(expected, convertString.removeTrailingAndLeading(expected, "\r"));
+ Assert.assertEquals(expected, convertString.removeTrailingAndLeading("\r" + expected, "\r"));
+ Assert.assertEquals(expected, convertString.removeTrailingAndLeading("\r" + expected + "\r", "\r"));
+ Assert.assertEquals("abc ", convertString.removeTrailingAndLeading("\r" + "abc ", "\r"));
+ Assert.assertEquals("abc ", convertString.removeTrailingAndLeading("\r" + "abc " + "\r", "\r"));
+
+ Assert.assertEquals("bc", convertString.removeTrailingAndLeading(" abc", " a"));
+ Assert.assertEquals(" a", convertString.removeTrailingAndLeading(" abc", "bc"));
+ Assert.assertEquals("ab", convertString.removeTrailingAndLeading("cabc", "c"));
+ }
+
+ @Test
+ public void testRemoveTrailingAndLeadingWhitespaces() {
+ ConvertString convertString = new ConvertString();
+ String inputData = " " + expected;
+ for (String removeChar : convertString.WHITESPACE_CHARS) {
+ inputData = inputData + removeChar;
+ }
+ Assert.assertEquals(expected, convertString.removeTrailingAndLeadingWhitespaces(inputData));
+ }
+
+ @Test
+ public void testRemoveDuplicate_CR() {
+ ConvertString convertString = new ConvertString("\r");
+ String input = "a\rbccccdeaa\r\r\ry";
+ Assert.assertEquals("a\rbccccdeaa\ry", convertString.removeRepeatedChar(input));
+ }
+
+ @Test
+ public void testRemoveDuplicate_LF() {
+ ConvertString convertString = new ConvertString("\n");
+ String input = "a\nbccccdeaa\n\n\ny";
+ Assert.assertEquals("a\nbccccdeaa\ny", convertString.removeRepeatedChar(input));
+ }
+
+ @Test
+ public void testRemoveDuplicate_CRLF() {
+ ConvertString convertString = new ConvertString("\r\n");
+ String input = "a\r\nbccccdeaa\r\n\r\n\r\ny";
+ Assert.assertEquals("a\r\nbccccdeaa\r\ny", convertString.removeRepeatedChar(input));
+ }
+
+ @Test
+ public void testRemoveDuplicate_TAB() {
+ ConvertString convertString = new ConvertString("\t");
+ String input = "a\tbccccdeaa\t\t\t\t\t\ty";
+ Assert.assertEquals("a\tbccccdeaa\ty", convertString.removeRepeatedChar(input));
+ }
+
+ @Test
+ public void testRemoveDuplicate_LETTER() {
+ ConvertString convertString = new ConvertString("c");
+ String input = "atbccccdeaaCCtcy";
+ Assert.assertEquals("atbcdeaaCCtcy", convertString.removeRepeatedChar(input));
+
+ convertString = new ConvertString("a");
+ input = "aaatbccccdeaaCCtcy";
+ Assert.assertEquals("atbccccdeaCCtcy", convertString.removeRepeatedChar(input));
+
+ convertString = new ConvertString("ac");
+ input = "acacacactbccccdeaCCtaccy";
+ Assert.assertEquals("actbccccdeaCCtaccy", convertString.removeRepeatedChar(input));
+
+ input = "abcdef";
+ Assert.assertEquals("abcdef", convertString.removeRepeatedChar(input));
+ }
+
+ @Test
+ public void testRemoveDuplicate_NULL1() {
+ ConvertString convertString = new ConvertString("c");
+ String input = null;
+ Assert.assertNull(convertString.removeRepeatedChar(input));
+ input = "";
+ Assert.assertEquals("", convertString.removeRepeatedChar(input));
+ }
+
+ @Test
+ public void testRemoveDuplicate_NULL2() {
+ ConvertString convertString = new ConvertString();
+ String input = "aaabc";
+ Assert.assertEquals(input, convertString.removeRepeatedChar(input));
+
+ convertString = new ConvertString("");
+ Assert.assertEquals(input, convertString.removeRepeatedChar(input));
+
+ convertString = new ConvertString(null);
+ Assert.assertEquals(input, convertString.removeRepeatedChar(input));
+ }
+
+ @Test
+ public void testremoveWhiteSpace() {
+ ConvertString convertString = new ConvertString();
+
+ String input = "a b\t\t\tc\n\n\nd\r\re\f\ff";
+ String cleanStr = convertString.removeRepeatedWhitespaces(input);
+ Assert.assertEquals("a b\tc\nd\re\ff", cleanStr);
+
+ input = "aaab\r\n\r\n\r\nx";
+ cleanStr = convertString.removeRepeatedWhitespaces(input);
+ Assert.assertEquals("aaab\r\n\r\n\r\nx", cleanStr);
+
+ input = "a\u0085\u0085\u0085b\u00A0\u00A0c\u1680\u1680d\u180E\u180Ee\u2000\u2000f\u2001\u2001g"
+ + "\u2002\u2002h\u2003\u2003i\u2004\u2004";
+ cleanStr = convertString.removeRepeatedWhitespaces(input);
+ Assert.assertEquals("a\u0085b\u00A0c\u1680d\u180Ee\u2000f\u2001g\u2002h\u2003i\u2004", cleanStr);
+
+ input = "a\u2005\u2005\u2005b\u2006\u2006c\u2007\u2007d\u2008\u2008e\u2009\u2009f\u200A\u200Ag"
+ + "\u2028\u2028h\u2029\u2029i\u202F\u202Fj\u205F\u205Fk\u3000\u3000l";
+ cleanStr = convertString.removeRepeatedWhitespaces(input);
+ String expectedCleaned = "a\u2005b\u2006c\u2007d\u2008e\u2009f\u200Ag\u2028h\u2029i\u202Fj\u205Fk\u3000l";
+ Assert.assertEquals(expectedCleaned, cleanStr);
+ }
+
+ @Test
+ public void testRemoveWhiteSpaceNull() {
+ ConvertString convertString = new ConvertString();
+ String input = "";
+ String cleanStr = convertString.removeRepeatedWhitespaces(input);
+ Assert.assertEquals("", cleanStr);
+ input = null;
+ cleanStr = convertString.removeRepeatedWhitespaces(input);
+ Assert.assertNull(cleanStr);
+ }
+
+ @Test
+ public void testRemoveWhiteSpacWithoutSpace() {
+ ConvertString convertString = new ConvertString();
+ String input = "abccdef";
+ String cleanStr = convertString.removeRepeatedWhitespaces(input);
+ Assert.assertEquals("abccdef", cleanStr);
+ }
+ }
+
\ No newline at end of file