Skip to content

Commit 861dc52

Browse files
authored
fix: null point exception in event parser (#1488)
1 parent f3b04c2 commit 861dc52

File tree

10 files changed

+90
-19
lines changed

10 files changed

+90
-19
lines changed

src/data-pipeline/etl-common/src/main/java/software/aws/solution/clickstream/common/BaseEventParser.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import java.util.HashMap;
2828
import java.util.Iterator;
2929
import java.util.Map;
30+
import java.util.Optional;
3031

3132
import static software.aws.solution.clickstream.common.Util.deCodeUri;
3233
import static software.aws.solution.clickstream.common.Util.decompress;
@@ -149,11 +150,12 @@ protected void setPageViewUrl(final ClickstreamEvent clickstreamEvent, final Str
149150
return;
150151
}
151152
clickstreamEvent.setPageViewPageUrl(deCodeUri(url));
152-
UrlParseResult urlParseResult = Util.parseUrl(url);
153+
Optional<UrlParseResult> urlParseResultOpt = Util.parseUrl(url);
153154

154-
if (urlParseResult == null) {
155+
if (urlParseResultOpt.isEmpty()) {
155156
return;
156157
}
158+
UrlParseResult urlParseResult = urlParseResultOpt.get();
157159

158160
if (urlParseResult.getPath() != null) {
159161
clickstreamEvent.setPageViewPageUrlPath(urlParseResult.getPath());

src/data-pipeline/etl-common/src/main/java/software/aws/solution/clickstream/common/ClickstreamEventParser.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,13 @@
2323
import software.aws.solution.clickstream.common.ingest.*;
2424
import software.aws.solution.clickstream.common.model.*;
2525

26-
import java.sql.*;
27-
import java.time.*;
28-
import java.util.*;
26+
import java.sql.Timestamp;
27+
import java.time.Instant;
28+
import java.util.ArrayList;
29+
import java.util.HashMap;
30+
import java.util.List;
31+
import java.util.Map;
32+
import java.util.Optional;
2933
import java.util.stream.Collectors;
3034
import java.util.stream.Stream;
3135

@@ -398,8 +402,11 @@ private void setPageView(final Event ingestEvent, final ClickstreamEvent clickst
398402
clickstreamEvent.setPageViewLatestReferrerHost(ingestEvent.getAttributes().getLatestReferrerHost());
399403
if (ingestEvent.getAttributes().getLatestReferrer() != null
400404
&& clickstreamEvent.getPageViewLatestReferrerHost() == null) {
401-
UrlParseResult latestReferrerUrlParseResult = parseUrl(ingestEvent.getAttributes().getLatestReferrer());
402-
clickstreamEvent.setPageViewLatestReferrerHost(latestReferrerUrlParseResult.getHostName());
405+
406+
Optional<UrlParseResult> r = parseUrl(ingestEvent.getAttributes().getLatestReferrer());
407+
if (r.isPresent()) {
408+
clickstreamEvent.setPageViewLatestReferrerHost(r.get().getHostName());
409+
}
403410
}
404411
clickstreamEvent.setPageViewEntrances(ingestEvent.getAttributes().getEntrances());
405412
}

src/data-pipeline/etl-common/src/main/java/software/aws/solution/clickstream/common/Util.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import java.util.HashMap;
3939
import java.util.List;
4040
import java.util.Map;
41+
import java.util.Optional;
4142
import java.util.zip.GZIPInputStream;
4243

4344
@Slf4j
@@ -185,9 +186,9 @@ public static Map<String, List<String>> getUriParams(final URI uriObj) {
185186
return params;
186187
}
187188

188-
public static UrlParseResult parseUrl(final String url) {
189+
public static Optional<UrlParseResult> parseUrl(final String url) {
189190
if (url == null || url.isEmpty()) {
190-
return null;
191+
return Optional.empty();
191192
}
192193
String schemaUrl = url;
193194
if (!url.substring(0, Math.min(url.length(), 15)).contains("://")) {
@@ -210,7 +211,7 @@ public static UrlParseResult parseUrl(final String url) {
210211
}
211212
}
212213
log.debug("parseUrl(): result host: {}", result.getHostName());
213-
return result;
214+
return Optional.of(result);
214215
}
215216

216217
private static UrlParseResult extractFromUrl(final String schemaUrl) throws URISyntaxException {

src/data-pipeline/etl-common/src/main/java/software/aws/solution/clickstream/common/enrich/RuleBasedTrafficSourceHelper.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import java.util.HashMap;
3434
import java.util.List;
3535
import java.util.Map;
36+
import java.util.Optional;
3637

3738
import static software.aws.solution.clickstream.common.Util.getUriParams;
3839
import static software.aws.solution.clickstream.common.Util.objectToJsonString;
@@ -189,7 +190,10 @@ public CategoryTrafficSource parse(final String pageUrl, final String pageReferr
189190
String pageHostName = null;
190191
if (pageUrl != null && !pageUrl.isEmpty()) {
191192
trafficSourceUtm = getUtmSourceFromUrl(pageUrl);
192-
pageHostName = parseUrl(pageUrl).getHostName();
193+
Optional<UrlParseResult> r = parseUrl(pageUrl);
194+
if (r.isPresent()) {
195+
pageHostName = r.get().getHostName();
196+
}
193197
}
194198
CategoryTrafficSource result = parse(trafficSourceUtm, pageHostName, pageReferrer, latestReferrer, latestReferrerHost);
195199
categoryTrafficSourceCache.put(cachedKey, result);
@@ -339,7 +343,10 @@ public CategoryTrafficSource parse(final TrafficSourceUtm trafficSourceUtmInput,
339343

340344
String pageReferrerHost = null;
341345
if (pageReferrer != null && !pageReferrer.isEmpty()) {
342-
pageReferrerHost = parseUrl(pageReferrer).getHostName();
346+
Optional<UrlParseResult> r = parseUrl(pageReferrer);
347+
if (r.isPresent()) {
348+
pageReferrerHost = r.get().getHostName();
349+
}
343350
}
344351
boolean isInternalReferrer = pageHostName != null && pageHostName.equalsIgnoreCase(pageReferrerHost);
345352
boolean isInternalLatestReferrer = pageHostName != null && pageHostName.equalsIgnoreCase(latestReferrerHost);

src/data-pipeline/etl-common/src/main/java/software/aws/solution/clickstream/common/enrich/ts/rule/CategoryListEvaluator.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import java.util.HashSet;
3232
import java.util.List;
3333
import java.util.Map;
34+
import java.util.Optional;
3435
import java.util.regex.Matcher;
3536
import java.util.regex.Pattern;
3637

@@ -103,10 +104,16 @@ public SourceCategoryAndTerms evaluate(final String theReferrerUrl) {
103104
return categoryAndTerms;
104105
}
105106

106-
UrlParseResult r = Util.parseUrl(theReferrerUrl);
107-
String hostName = r.getHostName();
108-
String path = r.getPath();
109-
Map<String, List<String>> urlParams = r.getQueryParameters();
107+
Optional<UrlParseResult> r = Util.parseUrl(theReferrerUrl);
108+
String hostName = "";
109+
String path = "";
110+
Map<String, List<String>> urlParams = new HashMap<>();
111+
112+
if (r.isPresent()) {
113+
hostName = r.get().getHostName();
114+
path = r.get().getPath();
115+
urlParams = r.get().getQueryParameters();
116+
}
110117

111118
List<String> candidateUrls = getCandidateUrls(theReferrerUrl, hostName, path);
112119

src/data-pipeline/etl-common/src/test/java/software/aws/solution/clickstream/common/ClickstreamEventParserTest.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,4 +524,18 @@ void test_DeviceOperatingSystem_set_as_platform_web() throws IOException {
524524
Assertions.assertNull(event.getDeviceOperatingSystem());
525525
}
526526

527+
@Test
528+
void test_parse_line_with_empty_latest_referrer() throws IOException {
529+
// ./gradlew clean test --info --tests software.aws.solution.clickstream.common.ClickstreamEventParserTest.test_parse_line_with_empty_latest_referrer
530+
String line = resourceFileContent("/empty_latest_referrer.json");
531+
log.info(line);
532+
ClickstreamEventParser clickstreamEventParser = getClickstreamEventParser();
533+
String projectId = "test_project_id";
534+
String fileName = "empty_latest_referrer.json";
535+
536+
ParseRowResult rowResult = clickstreamEventParser.parseLineToDBRow(line, projectId, fileName);
537+
538+
Assertions.assertEquals(1, rowResult.getClickstreamEventList().size());
539+
Assertions.assertEquals(1, rowResult.getClickstreamUserList().size());
540+
}
527541
}

src/data-pipeline/etl-common/src/test/java/software/aws/solution/clickstream/common/util/UtilTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -169,13 +169,13 @@ void test_readTextFile() throws IOException {
169169
void test_parseUrl() {
170170
// ./gradlew clean test --info --tests software.aws.solution.clickstream.common.util.UtilTest.test_parseUrl
171171
String url = "https://www.example.com/abc/test?param1=value1&param2=value2";
172-
UrlParseResult result = Util.parseUrl(url);
172+
UrlParseResult result = Util.parseUrl(url).get();
173173
assertEquals("www.example.com", result.getHostName());
174174
assertEquals("param1=value1&param2=value2", result.getQueryString());
175175
assertEquals("/abc/test", result.getPath());
176176

177-
assertNull(Util.parseUrl(null));
178-
assertNull(Util.parseUrl(""));
177+
assertTrue(Util.parseUrl(null).isEmpty());
178+
assertTrue(Util.parseUrl("").isEmpty());
179179
assertNotNull(Util.parseUrl("https://www.example.com/"));
180180
assertNotNull(Util.parseUrl("https://www.example.com/?"));
181181
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"appId": "shop",
3+
"client_timestamp": "1719469182900",
4+
"compression": null,
5+
"data": "[{\"event_type\":\"_page_view\",\"event_id\":\"9029c8fd-ed51-4da0-a7ea-40aad4d3ad45\",\"device_id\":\"564ea695-00d1-4fdf-beb2-9c54682a4217\",\"unique_id\":\"c631fe47-bf74-4fcf-a3ff-0e5380250d10\",\"app_id\":\"shop\",\"timestamp\":1719466410498,\"host_name\":\"test.cloudfront.net\",\"locale\":\"en-US\",\"system_language\":\"en\",\"country_code\":\"US\",\"zone_offset\":28800000,\"make\":\"Gecko\",\"platform\":\"Web\",\"screen_height\":1440,\"screen_width\":2560,\"viewport_height\":1302,\"viewport_width\":1276,\"sdk_name\":\"aws-solution-clickstream-sdk\",\"sdk_version\":\"0.12.1\",\"user\":{\"_user_first_touch_timestamp\":{\"value\":1719466021378,\"set_timestamp\":1719466021378},\"_user_id\":{\"value\":\"58\",\"set_timestamp\":1719466046946}},\"attributes\":{\"_traffic_source_source\":\"amazon\",\"_traffic_source_medium\":\"cpc\",\"_traffic_source_campaign\":\"summer_promotion\",\"_traffic_source_campaign_id\":\"summer_promotion_01\",\"_traffic_source_term\":\"running_shoes\",\"_traffic_source_content\":\"banner_ad_1\",\"_traffic_source_clid\":\"amazon_ad_123\",\"_traffic_source_clid_platform\":\"amazon_ads\",\"_session_id\":\"80250d10-20240627-052701378\",\"_session_start_timestamp\":1719466021378,\"_session_duration\":389120,\"_session_number\":1,\"_page_title\":\"Retail Demo Store\",\"_page_url\":\"https://test.cloudfront.net/product/ecc45e4c-9249-4b06-9f99-aa068eebddf4?feature=home_product_recs\",\"_latest_referrer\":\"\",\"_page_referrer\":\"https://test.cloudfront.net/\",\"_page_referrer_title\":\"Retail Demo Store\",\"_entrances\":0,\"_previous_timestamp\":1719466409801,\"_engagement_time_msec\":697}}]",
6+
"date": "2024-06-27T06:19:45+00:00",
7+
"fakeIp": null,
8+
"ingest_time": 1719469185000,
9+
"ip": "72.21.198.67",
10+
"method": "POST",
11+
"path": "/collect",
12+
"platform": "Web",
13+
"rid": "0ae1d0c2989e6a7a5619097967d65a7e",
14+
"server_ingest_time": 1719469185000,
15+
"source_type": "http_server",
16+
"timestamp": "2024-06-27T06:19:45.547102438Z",
17+
"ua": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:126.0) Gecko/20100101 Firefox/126.0",
18+
"uri": "/collect?platform=Web&appId=shop&event_bundle_sequence_id=43&upload_timestamp=1719469182900&hashCode=366095c8"
19+
}

src/data-pipeline/spark-etl/src/test/java/software/aws/solution/clickstream/ClickstreamDataConverterV3Test.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,18 @@ public void test_convert_data_v3() throws IOException {
5050
Assertions.assertEquals(expectedJson, dataJson);
5151
}
5252

53+
@Test
54+
public void test_empty_latest_referrer() throws IOException {
55+
// ./gradlew clean test --info --tests software.aws.solution.clickstream.ClickstreamDataConverterV3Test.test_empty_latest_referrer
56+
57+
String filePath = "/empty_latest_referrer.json";
58+
Dataset<Row> dataset = readJsonDataset(filePath);
59+
dataset = addFileName(dataset);
60+
System.setProperty(PROJECT_ID_PROP, "projectId1");
61+
Dataset<Row> result = converter.transform(dataset);
62+
Assertions.assertEquals(88, result.count());
63+
}
64+
5365
@Test
5466
public void test_convert_data_v3_invalid_data() throws IOException {
5567
// ./gradlew clean test --info --tests software.aws.solution.clickstream.ClickstreamDataConverterV3Test.test_convert_data_v3_invalid_data

src/data-pipeline/spark-etl/src/test/resources/empty_latest_referrer.json

Lines changed: 2 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)