diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..7b016a89f --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "java.compile.nullAnalysis.mode": "automatic" +} \ No newline at end of file diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 000000000..c05218e61 --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,138 @@ +Here's how you can proceed with the Wrangler enhancement: + + + Step 1: Fork the Repository + Step 2: Modify the Grammar.......// Modify the parser rules to accept these tokens. + Step 3: Update the API (wrangler-api)......//Update the Token Types to support these new types. + + + + Changes that i have done : +Define Byte as a BYTE_SIZE,and Time as TIMEDURATION classes. + +THEN I FIX IT IN THE WRABGLER CORE FOLDER \\Directives.g4 + +Extend the base Token class correctly (from io.cdap.wrangler.api.parser.Token). + // Add lexer rules for BYTE_SIZE and TIME_DURATION +BYTE : [0-9]+ ('KB'|'MB'|'GB'|'B'); +TIME_DURATION : [0-9]+ ('ms'|'s'|'m'|'h'|'d'); + +// parser rules +byteSizeArg : BYTE; +timeDurationArg : TIME; + +package io.cdap.wrangler.api.parser; + +import com.google.gson.JsonElement; +import com.google.gson.JsonPrimitive; + +public class Byte implements Token { + private final long bytes; + + public Byte(String value) { + this.bytes = parseByteSize(value.trim().toUpperCase()); + } + + private long parseByteSize(String value) { + if (value.endsWith("KB")) { + return (long)(Double.parseDouble(value.replace("KB", "")) * 1024); + } else if (value.endsWith("MB")) { + return (long)(Double.parseDouble(value.replace("MB", "")) * 1024 * 1024); + } else if (value.endsWith("GB")) { + return (long)(Double.parseDouble(value.replace("GB", "")) * 1024 * 1024 * 1024); + } else if (value.endsWith("B")) { + return Long.parseLong(value.replace("B", "")); + } else { + throw new IllegalArgumentException("Invalid byte size format: " + value); + } + } + + public long getBytes() { + return bytes; + } + + @Override + public Object value() { + return bytes; + } + + @Override + public TokenType type() { + return TokenType.BYTE; + } + + @Override + public JsonElement toJson() { + return new JsonPrimitive(bytes); + } +} + +........ + +package io.cdap.wrangler.api.parser; + +import com.google.gson.JsonElement; +import com.google.gson.JsonPrimitive; + +public class Time implements Token { + private final long nanoseconds; + + public Time (String value) { + this.nanoseconds = parseTimeDuration(value.trim().toLowerCase()); + } + + private long parseTimeDuration(String value) { + if (value.endsWith("ns")) { + return Long.parseLong(value.replace("ns", "")); + } else if (value.endsWith("ms")) { + return (long)(Double.parseDouble(value.replace("ms", "")) * 1_000_000); + } else if (value.endsWith("s")) { + return (long)(Double.parseDouble(value.replace("s", "")) * 1_000_000_000); + } else if (value.endsWith("m")) { + return (long)(Double.parseDouble(value.replace("m", "")) * 60 * 1_000_000_000L); + } else if (value.endsWith("h")) { + return (long)(Double.parseDouble(value.replace("h", "")) * 3600 * 1_000_000_000L); + } else { + throw new IllegalArgumentException("Invalid time duration format: " + value); + } + } + + public long getNanoseconds() { + return nanoseconds; + } + + @Override + public Object value() { + return nanoseconds; + } + + @Override + public TokenType type() { + return TokenType.TIME; + } + + @Override + public JsonElement toJson() { + return new JsonPrimitive(nanoseconds); + } +} + +........ + + +package io.cdap.wrangler.api.parser; + +public enum Tokenn { + BYTE, + TIME; +} + +//BYTE_SIZE +//TIME_DURATION + +....... + IDENTIFIER, BYTE, TIME + + + .... + diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Byte.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Byte.java new file mode 100644 index 000000000..ab25db641 --- /dev/null +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Byte.java @@ -0,0 +1,45 @@ +package io.cdap.wrangler.api.parser; + +import com.google.gson.JsonElement; +import com.google.gson.JsonPrimitive; + +public class Byte implements Token { + private final long bytes; + + public Byte(String value) { + this.bytes = parseByteSize(value.trim().toUpperCase()); + } + + private long parseByteSize(String value) { + if (value.endsWith("KB")) { + return (long)(Double.parseDouble(value.replace("KB", "")) * 1024); + } else if (value.endsWith("MB")) { + return (long)(Double.parseDouble(value.replace("MB", "")) * 1024 * 1024); + } else if (value.endsWith("GB")) { + return (long)(Double.parseDouble(value.replace("GB", "")) * 1024 * 1024 * 1024); + } else if (value.endsWith("B")) { + return Long.parseLong(value.replace("B", "")); + } else { + throw new IllegalArgumentException("Invalid byte size format: " + value); + } + } + + public long getBytes() { + return bytes; + } + + @Override + public Object value() { + return bytes; + } + + @Override + public TokenType type() { + return TokenType.BYTE; + } + + @Override + public JsonElement toJson() { + return new JsonPrimitive(bytes); + } +} diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Time.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Time.java new file mode 100644 index 000000000..53bfee66f --- /dev/null +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Time.java @@ -0,0 +1,48 @@ +package io.cdap.wrangler.api.parser; + +import com.google.gson.JsonElement; +import com.google.gson.JsonPrimitive; + +public class Time implements Token { + private final long nanoseconds; + + public Time (String value) { + this.nanoseconds = parseTimeDuration(value.trim().toLowerCase()); + } + + private long parseTimeDuration(String value) { + if (value.endsWith("ns")) { + return Long.parseLong(value.replace("ns", "")); + } else if (value.endsWith("ms")) { + return (long)(Double.parseDouble(value.replace("ms", "")) * 1_000_000); + } else if (value.endsWith("s")) { + return (long)(Double.parseDouble(value.replace("s", "")) * 1_000_000_000); + } else if (value.endsWith("m")) { + return (long)(Double.parseDouble(value.replace("m", "")) * 60 * 1_000_000_000L); + } else if (value.endsWith("h")) { + return (long)(Double.parseDouble(value.replace("h", "")) * 3600 * 1_000_000_000L); + } else { + throw new IllegalArgumentException("Invalid time duration format: " + value); + } + } + + public long getNanoseconds() { + return nanoseconds; + } + + @Override + public Object value() { + return nanoseconds; + } + + @Override + public TokenType type() { + return TokenType.TIME; + } + + @Override + public JsonElement toJson() { + return new JsonPrimitive(nanoseconds); + } +} + diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java index 8c93b0e6a..8827c2c08 100644 --- a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java @@ -152,5 +152,5 @@ public enum TokenType implements Serializable { * Represents the enumerated type for the object of type {@code String} with restrictions * on characters that can be present in a string. */ - IDENTIFIER + IDENTIFIER, BYTE, TIME } diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Tokenn.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Tokenn.java new file mode 100644 index 000000000..b588c6ffb --- /dev/null +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Tokenn.java @@ -0,0 +1,9 @@ +package io.cdap.wrangler.api.parser; + +public enum Tokenn { + BYTE, + TIME; +} + +//BYTE_SIZE +//TIME_DURATION \ No newline at end of file diff --git a/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 b/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 index 7c517ed6a..c80f59d2f 100644 --- a/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 +++ b/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 @@ -14,6 +14,9 @@ * the License. */ + + + grammar Directives; options { @@ -311,3 +314,12 @@ fragment Int fragment Digit : [0-9] ; + + + // Add lexer rules for BYTE_SIZE and TIME_DURATION +BYTE : [0-9]+ ('KB'|'MB'|'GB'|'B'); +TIME_DURATION : [0-9]+ ('ms'|'s'|'m'|'h'|'d'); + +// parser rules +byteSizeArg : BYTE; +timeDurationArg : TIME;