From cc5adce9ad59a9460ac33f519597040644d67d15 Mon Sep 17 00:00:00 2001 From: Shina Kumari Date: Tue, 15 Apr 2025 23:48:35 +0530 Subject: [PATCH 1/2] Develop Added ByteSize as a Byte and TimeDuration as a Time parser with directive --- .vscode/settings.json | 3 ++ .../io/cdap/wrangler/api/parser/Byte.java | 45 +++++++++++++++++ .../io/cdap/wrangler/api/parser/Time.java | 48 +++++++++++++++++++ .../cdap/wrangler/api/parser/TokenType.java | 2 +- .../io/cdap/wrangler/api/parser/Tokenn.java | 9 ++++ .../io/cdap/wrangler/parser/Directives.g4 | 12 +++++ 6 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 .vscode/settings.json create mode 100644 wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Byte.java create mode 100644 wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Time.java create mode 100644 wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Tokenn.java diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..7b016a89f --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "java.compile.nullAnalysis.mode": "automatic" +} \ No newline at end of file diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Byte.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Byte.java new file mode 100644 index 000000000..ab25db641 --- /dev/null +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Byte.java @@ -0,0 +1,45 @@ +package io.cdap.wrangler.api.parser; + +import com.google.gson.JsonElement; +import com.google.gson.JsonPrimitive; + +public class Byte implements Token { + private final long bytes; + + public Byte(String value) { + this.bytes = parseByteSize(value.trim().toUpperCase()); + } + + private long parseByteSize(String value) { + if (value.endsWith("KB")) { + return (long)(Double.parseDouble(value.replace("KB", "")) * 1024); + } else if (value.endsWith("MB")) { + return (long)(Double.parseDouble(value.replace("MB", "")) * 1024 * 1024); + } else if (value.endsWith("GB")) { + return (long)(Double.parseDouble(value.replace("GB", "")) * 1024 * 1024 * 1024); + } else if (value.endsWith("B")) { + return Long.parseLong(value.replace("B", "")); + } else { + throw new IllegalArgumentException("Invalid byte size format: " + value); + } + } + + public long getBytes() { + return bytes; + } + + @Override + public Object value() { + return bytes; + } + + @Override + public TokenType type() { + return TokenType.BYTE; + } + + @Override + public JsonElement toJson() { + return new JsonPrimitive(bytes); + } +} diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Time.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Time.java new file mode 100644 index 000000000..53bfee66f --- /dev/null +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Time.java @@ -0,0 +1,48 @@ +package io.cdap.wrangler.api.parser; + +import com.google.gson.JsonElement; +import com.google.gson.JsonPrimitive; + +public class Time implements Token { + private final long nanoseconds; + + public Time (String value) { + this.nanoseconds = parseTimeDuration(value.trim().toLowerCase()); + } + + private long parseTimeDuration(String value) { + if (value.endsWith("ns")) { + return Long.parseLong(value.replace("ns", "")); + } else if (value.endsWith("ms")) { + return (long)(Double.parseDouble(value.replace("ms", "")) * 1_000_000); + } else if (value.endsWith("s")) { + return (long)(Double.parseDouble(value.replace("s", "")) * 1_000_000_000); + } else if (value.endsWith("m")) { + return (long)(Double.parseDouble(value.replace("m", "")) * 60 * 1_000_000_000L); + } else if (value.endsWith("h")) { + return (long)(Double.parseDouble(value.replace("h", "")) * 3600 * 1_000_000_000L); + } else { + throw new IllegalArgumentException("Invalid time duration format: " + value); + } + } + + public long getNanoseconds() { + return nanoseconds; + } + + @Override + public Object value() { + return nanoseconds; + } + + @Override + public TokenType type() { + return TokenType.TIME; + } + + @Override + public JsonElement toJson() { + return new JsonPrimitive(nanoseconds); + } +} + diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java index 8c93b0e6a..8827c2c08 100644 --- a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java @@ -152,5 +152,5 @@ public enum TokenType implements Serializable { * Represents the enumerated type for the object of type {@code String} with restrictions * on characters that can be present in a string. */ - IDENTIFIER + IDENTIFIER, BYTE, TIME } diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Tokenn.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Tokenn.java new file mode 100644 index 000000000..b588c6ffb --- /dev/null +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Tokenn.java @@ -0,0 +1,9 @@ +package io.cdap.wrangler.api.parser; + +public enum Tokenn { + BYTE, + TIME; +} + +//BYTE_SIZE +//TIME_DURATION \ No newline at end of file diff --git a/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 b/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 index 7c517ed6a..c80f59d2f 100644 --- a/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 +++ b/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 @@ -14,6 +14,9 @@ * the License. */ + + + grammar Directives; options { @@ -311,3 +314,12 @@ fragment Int fragment Digit : [0-9] ; + + + // Add lexer rules for BYTE_SIZE and TIME_DURATION +BYTE : [0-9]+ ('KB'|'MB'|'GB'|'B'); +TIME_DURATION : [0-9]+ ('ms'|'s'|'m'|'h'|'d'); + +// parser rules +byteSizeArg : BYTE; +timeDurationArg : TIME; From a2777d17c795898526cc5acc5c7aba1d7b9c88f7 Mon Sep 17 00:00:00 2001 From: SHINA <107980731+ShinaKumari@users.noreply.github.com> Date: Wed, 16 Apr 2025 00:01:55 +0530 Subject: [PATCH 2/2] Create CHANGES.md Here's how you can proceed with the Wrangler enhancement: Step 1: Fork the Repository Step 2: Modify the Grammar.......// Modify the parser rules to accept these tokens. Step 3: Update the API (wrangler-api)......//Update the Token Types to support these new types. Changes that i have done : Define Byte as a BYTE_SIZE,and Time as TIMEDURATION classes. THEN I FIX IT IN THE WRABGLER CORE FOLDER \\Directives.g4 Extend the base Token class correctly (from io.cdap.wrangler.api.parser.Token). // Add lexer rules for BYTE_SIZE and TIME_DURATION BYTE : [0-9]+ ('KB'|'MB'|'GB'|'B'); TIME_DURATION : [0-9]+ ('ms'|'s'|'m'|'h'|'d'); // parser rules byteSizeArg : BYTE; timeDurationArg : TIME; package io.cdap.wrangler.api.parser; import com.google.gson.JsonElement; import com.google.gson.JsonPrimitive; public class Byte implements Token { private final long bytes; public Byte(String value) { this.bytes = parseByteSize(value.trim().toUpperCase()); } private long parseByteSize(String value) { if (value.endsWith("KB")) { return (long)(Double.parseDouble(value.replace("KB", "")) * 1024); } else if (value.endsWith("MB")) { return (long)(Double.parseDouble(value.replace("MB", "")) * 1024 * 1024); } else if (value.endsWith("GB")) { return (long)(Double.parseDouble(value.replace("GB", "")) * 1024 * 1024 * 1024); } else if (value.endsWith("B")) { return Long.parseLong(value.replace("B", "")); } else { throw new IllegalArgumentException("Invalid byte size format: " + value); } } public long getBytes() { return bytes; } @Override public Object value() { return bytes; } @Override public TokenType type() { return TokenType.BYTE; } @Override public JsonElement toJson() { return new JsonPrimitive(bytes); } } ........ package io.cdap.wrangler.api.parser; import com.google.gson.JsonElement; import com.google.gson.JsonPrimitive; public class Time implements Token { private final long nanoseconds; public Time (String value) { this.nanoseconds = parseTimeDuration(value.trim().toLowerCase()); } private long parseTimeDuration(String value) { if (value.endsWith("ns")) { return Long.parseLong(value.replace("ns", "")); } else if (value.endsWith("ms")) { return (long)(Double.parseDouble(value.replace("ms", "")) * 1_000_000); } else if (value.endsWith("s")) { return (long)(Double.parseDouble(value.replace("s", "")) * 1_000_000_000); } else if (value.endsWith("m")) { return (long)(Double.parseDouble(value.replace("m", "")) * 60 * 1_000_000_000L); } else if (value.endsWith("h")) { return (long)(Double.parseDouble(value.replace("h", "")) * 3600 * 1_000_000_000L); } else { throw new IllegalArgumentException("Invalid time duration format: " + value); } } public long getNanoseconds() { return nanoseconds; } @Override public Object value() { return nanoseconds; } @Override public TokenType type() { return TokenType.TIME; } @Override public JsonElement toJson() { return new JsonPrimitive(nanoseconds); } } ........ package io.cdap.wrangler.api.parser; public enum Tokenn { BYTE, TIME; } //BYTE_SIZE //TIME_DURATION ....... IDENTIFIER, BYTE, TIME .... --- CHANGES.md | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 CHANGES.md diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 000000000..c05218e61 --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,138 @@ +Here's how you can proceed with the Wrangler enhancement: + + + Step 1: Fork the Repository + Step 2: Modify the Grammar.......// Modify the parser rules to accept these tokens. + Step 3: Update the API (wrangler-api)......//Update the Token Types to support these new types. + + + + Changes that i have done : +Define Byte as a BYTE_SIZE,and Time as TIMEDURATION classes. + +THEN I FIX IT IN THE WRABGLER CORE FOLDER \\Directives.g4 + +Extend the base Token class correctly (from io.cdap.wrangler.api.parser.Token). + // Add lexer rules for BYTE_SIZE and TIME_DURATION +BYTE : [0-9]+ ('KB'|'MB'|'GB'|'B'); +TIME_DURATION : [0-9]+ ('ms'|'s'|'m'|'h'|'d'); + +// parser rules +byteSizeArg : BYTE; +timeDurationArg : TIME; + +package io.cdap.wrangler.api.parser; + +import com.google.gson.JsonElement; +import com.google.gson.JsonPrimitive; + +public class Byte implements Token { + private final long bytes; + + public Byte(String value) { + this.bytes = parseByteSize(value.trim().toUpperCase()); + } + + private long parseByteSize(String value) { + if (value.endsWith("KB")) { + return (long)(Double.parseDouble(value.replace("KB", "")) * 1024); + } else if (value.endsWith("MB")) { + return (long)(Double.parseDouble(value.replace("MB", "")) * 1024 * 1024); + } else if (value.endsWith("GB")) { + return (long)(Double.parseDouble(value.replace("GB", "")) * 1024 * 1024 * 1024); + } else if (value.endsWith("B")) { + return Long.parseLong(value.replace("B", "")); + } else { + throw new IllegalArgumentException("Invalid byte size format: " + value); + } + } + + public long getBytes() { + return bytes; + } + + @Override + public Object value() { + return bytes; + } + + @Override + public TokenType type() { + return TokenType.BYTE; + } + + @Override + public JsonElement toJson() { + return new JsonPrimitive(bytes); + } +} + +........ + +package io.cdap.wrangler.api.parser; + +import com.google.gson.JsonElement; +import com.google.gson.JsonPrimitive; + +public class Time implements Token { + private final long nanoseconds; + + public Time (String value) { + this.nanoseconds = parseTimeDuration(value.trim().toLowerCase()); + } + + private long parseTimeDuration(String value) { + if (value.endsWith("ns")) { + return Long.parseLong(value.replace("ns", "")); + } else if (value.endsWith("ms")) { + return (long)(Double.parseDouble(value.replace("ms", "")) * 1_000_000); + } else if (value.endsWith("s")) { + return (long)(Double.parseDouble(value.replace("s", "")) * 1_000_000_000); + } else if (value.endsWith("m")) { + return (long)(Double.parseDouble(value.replace("m", "")) * 60 * 1_000_000_000L); + } else if (value.endsWith("h")) { + return (long)(Double.parseDouble(value.replace("h", "")) * 3600 * 1_000_000_000L); + } else { + throw new IllegalArgumentException("Invalid time duration format: " + value); + } + } + + public long getNanoseconds() { + return nanoseconds; + } + + @Override + public Object value() { + return nanoseconds; + } + + @Override + public TokenType type() { + return TokenType.TIME; + } + + @Override + public JsonElement toJson() { + return new JsonPrimitive(nanoseconds); + } +} + +........ + + +package io.cdap.wrangler.api.parser; + +public enum Tokenn { + BYTE, + TIME; +} + +//BYTE_SIZE +//TIME_DURATION + +....... + IDENTIFIER, BYTE, TIME + + + .... +