Skip to content

Develop #975

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"java.compile.nullAnalysis.mode": "automatic"
}
138 changes: 138 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
Here's how you can proceed with the Wrangler enhancement:


Step 1: Fork the Repository
Step 2: Modify the Grammar.......// Modify the parser rules to accept these tokens.
Step 3: Update the API (wrangler-api)......//Update the Token Types to support these new types.



Changes that i have done :
Define Byte as a BYTE_SIZE,and Time as TIMEDURATION classes.

THEN I FIX IT IN THE WRABGLER CORE FOLDER \\Directives.g4

Extend the base Token class correctly (from io.cdap.wrangler.api.parser.Token).
// Add lexer rules for BYTE_SIZE and TIME_DURATION
BYTE : [0-9]+ ('KB'|'MB'|'GB'|'B');
TIME_DURATION : [0-9]+ ('ms'|'s'|'m'|'h'|'d');

// parser rules
byteSizeArg : BYTE;
timeDurationArg : TIME;

package io.cdap.wrangler.api.parser;

import com.google.gson.JsonElement;
import com.google.gson.JsonPrimitive;

public class Byte implements Token {
private final long bytes;

public Byte(String value) {
this.bytes = parseByteSize(value.trim().toUpperCase());
}

private long parseByteSize(String value) {
if (value.endsWith("KB")) {
return (long)(Double.parseDouble(value.replace("KB", "")) * 1024);
} else if (value.endsWith("MB")) {
return (long)(Double.parseDouble(value.replace("MB", "")) * 1024 * 1024);
} else if (value.endsWith("GB")) {
return (long)(Double.parseDouble(value.replace("GB", "")) * 1024 * 1024 * 1024);
} else if (value.endsWith("B")) {
return Long.parseLong(value.replace("B", ""));
} else {
throw new IllegalArgumentException("Invalid byte size format: " + value);
}
}

public long getBytes() {
return bytes;
}

@Override
public Object value() {
return bytes;
}

@Override
public TokenType type() {
return TokenType.BYTE;
}

@Override
public JsonElement toJson() {
return new JsonPrimitive(bytes);
}
}

........

package io.cdap.wrangler.api.parser;

import com.google.gson.JsonElement;
import com.google.gson.JsonPrimitive;

public class Time implements Token {
private final long nanoseconds;

public Time (String value) {
this.nanoseconds = parseTimeDuration(value.trim().toLowerCase());
}

private long parseTimeDuration(String value) {
if (value.endsWith("ns")) {
return Long.parseLong(value.replace("ns", ""));
} else if (value.endsWith("ms")) {
return (long)(Double.parseDouble(value.replace("ms", "")) * 1_000_000);
} else if (value.endsWith("s")) {
return (long)(Double.parseDouble(value.replace("s", "")) * 1_000_000_000);
} else if (value.endsWith("m")) {
return (long)(Double.parseDouble(value.replace("m", "")) * 60 * 1_000_000_000L);
} else if (value.endsWith("h")) {
return (long)(Double.parseDouble(value.replace("h", "")) * 3600 * 1_000_000_000L);
} else {
throw new IllegalArgumentException("Invalid time duration format: " + value);
}
}

public long getNanoseconds() {
return nanoseconds;
}

@Override
public Object value() {
return nanoseconds;
}

@Override
public TokenType type() {
return TokenType.TIME;
}

@Override
public JsonElement toJson() {
return new JsonPrimitive(nanoseconds);
}
}

........


package io.cdap.wrangler.api.parser;

public enum Tokenn {
BYTE,
TIME;
}

//BYTE_SIZE
//TIME_DURATION

.......
IDENTIFIER, BYTE, TIME


....

45 changes: 45 additions & 0 deletions wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Byte.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package io.cdap.wrangler.api.parser;

import com.google.gson.JsonElement;
import com.google.gson.JsonPrimitive;

public class Byte implements Token {
private final long bytes;

public Byte(String value) {
this.bytes = parseByteSize(value.trim().toUpperCase());
}

private long parseByteSize(String value) {
if (value.endsWith("KB")) {
return (long)(Double.parseDouble(value.replace("KB", "")) * 1024);
} else if (value.endsWith("MB")) {
return (long)(Double.parseDouble(value.replace("MB", "")) * 1024 * 1024);
} else if (value.endsWith("GB")) {
return (long)(Double.parseDouble(value.replace("GB", "")) * 1024 * 1024 * 1024);
} else if (value.endsWith("B")) {
return Long.parseLong(value.replace("B", ""));
} else {
throw new IllegalArgumentException("Invalid byte size format: " + value);
}
}

public long getBytes() {
return bytes;
}

@Override
public Object value() {
return bytes;
}

@Override
public TokenType type() {
return TokenType.BYTE;
}

@Override
public JsonElement toJson() {
return new JsonPrimitive(bytes);
}
}
48 changes: 48 additions & 0 deletions wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Time.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package io.cdap.wrangler.api.parser;

import com.google.gson.JsonElement;
import com.google.gson.JsonPrimitive;

public class Time implements Token {
private final long nanoseconds;

public Time (String value) {
this.nanoseconds = parseTimeDuration(value.trim().toLowerCase());
}

private long parseTimeDuration(String value) {
if (value.endsWith("ns")) {
return Long.parseLong(value.replace("ns", ""));
} else if (value.endsWith("ms")) {
return (long)(Double.parseDouble(value.replace("ms", "")) * 1_000_000);
} else if (value.endsWith("s")) {
return (long)(Double.parseDouble(value.replace("s", "")) * 1_000_000_000);
} else if (value.endsWith("m")) {
return (long)(Double.parseDouble(value.replace("m", "")) * 60 * 1_000_000_000L);
} else if (value.endsWith("h")) {
return (long)(Double.parseDouble(value.replace("h", "")) * 3600 * 1_000_000_000L);
} else {
throw new IllegalArgumentException("Invalid time duration format: " + value);
}
}

public long getNanoseconds() {
return nanoseconds;
}

@Override
public Object value() {
return nanoseconds;
}

@Override
public TokenType type() {
return TokenType.TIME;
}

@Override
public JsonElement toJson() {
return new JsonPrimitive(nanoseconds);
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -152,5 +152,5 @@ public enum TokenType implements Serializable {
* Represents the enumerated type for the object of type {@code String} with restrictions
* on characters that can be present in a string.
*/
IDENTIFIER
IDENTIFIER, BYTE, TIME
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package io.cdap.wrangler.api.parser;

public enum Tokenn {
BYTE,
TIME;
}

//BYTE_SIZE
//TIME_DURATION
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
* the License.
*/




grammar Directives;

options {
Expand Down Expand Up @@ -311,3 +314,12 @@ fragment Int
fragment Digit
: [0-9]
;


// Add lexer rules for BYTE_SIZE and TIME_DURATION
BYTE : [0-9]+ ('KB'|'MB'|'GB'|'B');
TIME_DURATION : [0-9]+ ('ms'|'s'|'m'|'h'|'d');

// parser rules
byteSizeArg : BYTE;
timeDurationArg : TIME;