Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions presto-clp/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -140,5 +140,22 @@
<artifactId>commons-io</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-tests</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-main-base</artifactId>
<type>test-jar</type>
<scope>test</scope>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-expressions</artifactId>
</dependency>
</dependencies>
</project>

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.plugin.clp;

import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.block.BlockBuilder;
import com.facebook.presto.common.type.StandardTypes;
import com.facebook.presto.spi.function.Description;
import com.facebook.presto.spi.function.ScalarFunction;
import com.facebook.presto.spi.function.SqlType;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;

import static com.facebook.presto.common.type.VarcharType.VARCHAR;

public final class ClpFunctions
{
private ClpFunctions()
{
}

@ScalarFunction(value = "CLP_GET_INT", deterministic = false)
@Description("Retrieves an integer value corresponding to the given JSON path.")
@SqlType(StandardTypes.BIGINT)
public static long clpGetInt(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
{
return 0;
}

@ScalarFunction(value = "CLP_GET_FLOAT", deterministic = false)
@Description("Retrieves a floating point value corresponding to the given JSON path.")
@SqlType(StandardTypes.DOUBLE)
public static double clpGetFloat(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
{
return 0.0;
}

@ScalarFunction(value = "CLP_GET_BOOL", deterministic = false)
@Description("Retrieves a boolean value corresponding to the given JSON path.")
@SqlType(StandardTypes.BOOLEAN)
public static boolean clpGetBool(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
{
return false;
}

@ScalarFunction(value = "CLP_GET_STRING", deterministic = false)
@Description("Retrieves a string value corresponding to the given JSON path.")
@SqlType(StandardTypes.VARCHAR)
public static Slice clpGetString(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
{
return Slices.EMPTY_SLICE;
}

@ScalarFunction(value = "CLP_GET_STRING_ARRAY", deterministic = false)
@Description("Retrieves an array value corresponding to the given JSON path and converts each element into a string.")
@SqlType("ARRAY(VARCHAR)")
public static Block clpGetStringArray(@SqlType(StandardTypes.VARCHAR) Slice jsonPath)
{
BlockBuilder blockBuilder = VARCHAR.createBlockBuilder(null, 0);
return blockBuilder.build();
}

@ScalarFunction(value = "CLP_WILDCARD_STRING_COLUMN", deterministic = false)
@Description("Used in filter expressions to allow comparisons with any string column in the log record.")
@SqlType(StandardTypes.VARCHAR)
public static Slice clpWildcardStringColumn()
{
return Slices.EMPTY_SLICE;
}

@ScalarFunction(value = "CLP_WILDCARD_INT_COLUMN", deterministic = false)
@Description("Used in filter expressions to allow comparisons with any integer column in the log record.")
@SqlType(StandardTypes.BIGINT)
public static long clpWildcardIntColumn()
{
return 0;
}

@ScalarFunction(value = "CLP_WILDCARD_FLOAT_COLUMN", deterministic = false)
@Description("Used in filter expressions to allow comparisons with any floating point column in the log record.")
@SqlType(StandardTypes.DOUBLE)
public static double clpWildcardFloatColumn()
{
return 0.0;
}

@ScalarFunction(value = "CLP_WILDCARD_BOOL_COLUMN", deterministic = false)
@Description("Used in filter expressions to allow comparisons with any boolean column in the log record.")
@SqlType(StandardTypes.BOOLEAN)
public static boolean clpWildcardBoolColumn()
{
return false;
}

@ScalarFunction(value = "CLP_GET_JSON_STRING", deterministic = false)
@Description("Converts an entire log record into a JSON string.")
@SqlType(StandardTypes.VARCHAR)
public static Slice clpGetJSONString()
{
return Slices.EMPTY_SLICE;
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

Document that these are placeholder implementations

All functions return hardcoded default values. While this is intentional since they're rewritten during query optimization, it would be helpful to add a class-level comment explaining this design choice.

+/**
+ * CLP-specific scalar functions that are rewritten during query optimization.
+ * These functions serve as placeholders and their actual logic is implemented
+ * through query plan rewriting in ClpPlanOptimizer and ClpFilterToKqlConverter.
+ */
 public final class ClpFunctions
 {
🤖 Prompt for AI Agents
In presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFunctions.java
between lines 33 and 112, add a class-level comment explaining that all the
scalar functions currently return hardcoded default values as placeholders
because their implementations are replaced during query optimization. This
documentation will clarify the intentional design choice for future maintainers
and reviewers.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Verification agent

🧩 Analysis chain

Consider adding a parameter to CLP_GET_JSON_STRING

Unlike other CLP_GET functions, this one takes no parameters. Should it accept a JSON path parameter for consistency, or is it intended to return the entire record?


🌐 Web query:

What is the intended behavior of CLP_GET_JSON_STRING function in Presto CLP connector - should it take a JSON path parameter or return the entire log record?

💡 Result:

The intended behavior of the CLP_GET_JSON_STRING function in the Presto CLP connector is not documented in detail within the provided search results. However, based on best practices for similar JSON extraction functions in SQL engines and the available context around JSON path extraction and CLP encoding, the following evidence-supported conclusions can be drawn:

  • The typical approach for JSON extraction functions (such as JSONPATH in Apache Pinot or field mapping in the Presto Kafka connector) is to allow the user to specify a JSON path or field mapping[1][6][8]. The path points to the element or elements you want to extract from the JSON record rather than always returning the entire record.
  • For instance, the Presto Kafka connector's JSON decoder lets users map columns to “slash-separated list of field names to select a field from the JSON object”, which serves as a path to the desired property[1].
  • Similarly, other data platforms (like Pinot) provide functions such as JSONPATH(jsonField, 'jsonPath') where the second argument is a path expression that targets a specific field in the JSON structure[6].

Extracts the object value from jsonField based on 'jsonPath', the result type is inferred based on JSON value[6].

  • The CLP-specific stream ingestion feature in Pinot also makes it clear that fields are individually targeted and encoded/extracted based on configuration, not by returning the whole JSON record by default[8].

Given these conventions, the intended behavior of a function named CLP_GET_JSON_STRING in Presto’s CLP connector is most likely to accept a JSON path parameter and extract (or return) the value at that path from the log record—not just return the entire JSON log record. Requiring a path parameter also aligns with SQL and analytic platform norms for working with nested or semi-structured data.

If CLP_GET_JSON_STRING were designed to return the whole log record, there would be little benefit in calling it specifically, as fetching the entire record typically does not require a specialized function or parameterization.

In summary:
CLP_GET_JSON_STRING should be expected to take a JSON path parameter and return only the requested field or value, not the entire log record, following standard design patterns for JSON extraction in Presto and related systems[1][6][8]. If official documentation becomes available, it should be referenced for precise usage.

Citations:


Add a JSON path parameter to CLP_GET_JSON_STRING for consistency

Based on common SQL connector patterns and Presto’s own JSON extraction functions, CLP_GET_JSON_STRING should accept a JSON-path argument rather than always returning the full record.

Please update accordingly:

• File: presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFunctions.java
Lines: 106–112

• Change the signature from:

public static Slice clpGetJSONString()

to:

public static Slice clpGetJSONString(
    @SqlType(StandardTypes.VARCHAR) Slice jsonPath)

• Adjust the annotations:

@ScalarFunction(value = "CLP_GET_JSON_STRING", deterministic = false)
@Description("Extracts the JSON string at the given path from a log record.")
@SqlType(StandardTypes.VARCHAR)

• Implement lookup of the specified path inside the record (or throw a clear error if the path is invalid).

This aligns with Presto’s Kafka connector (JSONPATH(jsonField, 'path')) and other CLP-based extractors.

🤖 Prompt for AI Agents
In presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpFunctions.java
around lines 106 to 112, update the clpGetJSONString method to accept a Slice
parameter annotated with @SqlType(StandardTypes.VARCHAR) representing the JSON
path. Modify the annotations to reflect that the function extracts the JSON
string at the given path from a log record. Implement logic inside the method to
parse the log record and extract the JSON value at the specified path, throwing
a clear error if the path is invalid or not found.

}
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,16 @@ public class ClpMetadataFilterProvider
public ClpMetadataFilterProvider(ClpConfig config)
{
requireNonNull(config, "config is null");
String configPath = config.getMetadataFilterConfig();
if (configPath == null || configPath.isEmpty()) {
filterMap = ImmutableMap.of();
return;
}

ObjectMapper mapper = new ObjectMapper();
try {
filterMap = mapper.readValue(
new File(config.getMetadataFilterConfig()),
new File(configPath),
new TypeReference<Map<String, List<Filter>>>() {});
}
catch (IOException e) {
Expand Down
Loading
Loading