Skip to content

Commit 431cfb1

Browse files
authored
Fix major mem leak in HTML parsing (#126)
* Refactor README.md to update download links and badges for different platforms * Refactor HTML and JSON parsers
1 parent b162f12 commit 431cfb1

File tree

6 files changed

+208
-175
lines changed

6 files changed

+208
-175
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212

1313
<div align="center">
1414
Download</br>
15-
<a href="https://github.com/locaal-ai/obs-urlsource/releases/download/0.3.6/obs-urlsource-0.3.6-windows-x64-Installer.exe"><img src="https://img.shields.io/badge/Windows-0078D6?style=for-the-badge" /></a>
16-
<a href="https://github.com/locaal-ai/obs-urlsource/releases/download/0.3.6/obs-urlsource-0.3.6-macos-universal.pkg"><img src="https://img.shields.io/badge/Mac-000000?style=for-the-badge&logo=Apple" /></a>
17-
<a href="https://github.com/locaal-ai/obs-urlsource/releases/download/0.3.6/obs-urlsource-0.3.6-x86_64-linux-gnu.deb"><img src="https://img.shields.io/badge/Linux-FCC624?style=for-the-badge&logo=linux&logoColor=black"/></a>
15+
<a href="https://github.com/locaal-ai/obs-urlsource/releases/download/0.3.7/obs-urlsource-0.3.7-windows-x64-Installer.exe"><img src="https://img.shields.io/badge/Windows-0078D6?style=for-the-badge" /></a>
16+
<a href="https://github.com/locaal-ai/obs-urlsource/releases/download/0.3.7/obs-urlsource-0.3.7-macos-universal.pkg"><img src="https://img.shields.io/badge/Mac-000000?style=for-the-badge&logo=Apple" /></a>
17+
<a href="https://github.com/locaal-ai/obs-urlsource/releases/download/0.3.7/obs-urlsource-0.3.7-x86_64-linux-gnu.deb"><img src="https://img.shields.io/badge/Linux-FCC624?style=for-the-badge&logo=linux&logoColor=black"/></a>
1818
</div>
1919

2020
## Introduction

buildspec.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
}
3838
},
3939
"name": "obs-urlsource",
40-
"version": "0.3.6",
40+
"version": "0.3.7",
4141
"author": "Roy Shilkrot",
4242
"website": "https://github.com/locaal-ai/obs-urlsource",
4343
"email": "roy.shil@gmail.com",

src/parsers/html.cpp

Lines changed: 96 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -20,99 +20,125 @@ lxb_status_t find_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t
2020
{
2121
UNUSED_PARAMETER(spec);
2222
std::string str;
23-
(void)lxb_html_serialize_deep_cb(node, serializer_callback, &str);
24-
((std::vector<std::string> *)data)->push_back(str);
25-
return LXB_STATUS_OK;
23+
lxb_status_t status = lxb_html_serialize_deep_cb(node, serializer_callback, &str);
24+
if (status == LXB_STATUS_OK) {
25+
((std::vector<std::string> *)data)->push_back(str);
26+
}
27+
return status;
2628
}
2729

2830
lxb_status_t find_with_selectors(const std::string &slctrs, lxb_html_document_t *document,
2931
std::vector<std::string> &found)
3032
{
31-
/* Create CSS parser. */
32-
lxb_css_parser_t *parser;
33-
lxb_css_selector_list_t *list;
34-
lxb_status_t status;
35-
lxb_dom_node_t *body;
36-
lxb_selectors_t *selectors;
37-
38-
parser = lxb_css_parser_create();
39-
status = lxb_css_parser_init(parser, NULL);
40-
if (status != LXB_STATUS_OK) {
41-
obs_log(LOG_ERROR, "Failed to setup CSS parser");
42-
return EXIT_FAILURE;
43-
}
33+
lxb_css_parser_t *parser = nullptr;
34+
lxb_css_selector_list_t *list = nullptr;
35+
lxb_selectors_t *selectors = nullptr;
36+
lxb_status_t status = LXB_STATUS_ERROR;
37+
38+
do {
39+
parser = lxb_css_parser_create();
40+
if (!parser) {
41+
obs_log(LOG_ERROR, "Failed to create CSS parser");
42+
break;
43+
}
4444

45-
/* Selectors. */
46-
selectors = lxb_selectors_create();
47-
status = lxb_selectors_init(selectors);
48-
if (status != LXB_STATUS_OK) {
49-
obs_log(LOG_ERROR, "Failed to setup Selectors");
50-
return EXIT_FAILURE;
51-
}
45+
status = lxb_css_parser_init(parser, nullptr);
46+
if (status != LXB_STATUS_OK) {
47+
obs_log(LOG_ERROR, "Failed to init CSS parser");
48+
break;
49+
}
5250

53-
/* Parse and get the log. */
51+
selectors = lxb_selectors_create();
52+
if (!selectors) {
53+
obs_log(LOG_ERROR, "Failed to create selectors");
54+
break;
55+
}
5456

55-
list = lxb_css_selectors_parse(parser, (const lxb_char_t *)slctrs.c_str(), slctrs.length());
56-
if (parser->status != LXB_STATUS_OK) {
57-
obs_log(LOG_ERROR, "Failed to parse CSS selectors");
58-
return EXIT_FAILURE;
59-
}
57+
status = lxb_selectors_init(selectors);
58+
if (status != LXB_STATUS_OK) {
59+
obs_log(LOG_ERROR, "Failed to init selectors");
60+
break;
61+
}
6062

61-
/* Find HTML nodes by CSS Selectors. */
62-
body = lxb_dom_interface_node(lxb_html_document_body_element(document));
63+
list = lxb_css_selectors_parse(parser, (const lxb_char_t *)slctrs.c_str(),
64+
slctrs.length());
65+
if (!list || parser->status != LXB_STATUS_OK) {
66+
obs_log(LOG_ERROR, "Failed to parse CSS selectors");
67+
break;
68+
}
6369

64-
status = lxb_selectors_find(selectors, body, list, find_callback, &found);
65-
if (status != LXB_STATUS_OK) {
66-
obs_log(LOG_ERROR, "Failed to find HTML nodes by CSS Selectors");
67-
return EXIT_FAILURE;
68-
}
70+
lxb_dom_node_t *body =
71+
lxb_dom_interface_node(lxb_html_document_body_element(document));
72+
if (!body) {
73+
obs_log(LOG_ERROR, "Failed to get document body");
74+
break;
75+
}
6976

70-
/* Destroy Selectors object. */
71-
(void)lxb_selectors_destroy(selectors, true);
77+
status = lxb_selectors_find(selectors, body, list, find_callback, &found);
78+
if (status != LXB_STATUS_OK) {
79+
obs_log(LOG_ERROR, "Failed to find nodes by CSS Selectors");
80+
break;
81+
}
7282

73-
/* Destroy resources for CSS Parser. */
74-
(void)lxb_css_parser_destroy(parser, true);
83+
} while (0);
7584

76-
/* Destroy all object for all CSS Selector List. */
77-
lxb_css_selector_list_destroy_memory(list);
85+
// Cleanup
86+
if (list) {
87+
lxb_css_selector_list_destroy_memory(list);
88+
}
89+
if (selectors) {
90+
lxb_selectors_destroy(selectors, true);
91+
}
92+
if (parser) {
93+
lxb_css_parser_destroy(parser, true);
94+
}
7895

79-
return LXB_STATUS_OK;
96+
return status;
8097
}
8198

8299
struct request_data_handler_response parse_html(struct request_data_handler_response response,
83100
const url_source_request_data *request_data)
84101
{
85-
lxb_status_t status;
86-
lxb_html_document_t *document;
102+
lxb_html_document_t *document = nullptr;
87103

88-
document = lxb_html_document_create();
89-
if (document == NULL) {
90-
return make_fail_parse_response("Failed to setup HTML parser");
91-
}
104+
try {
105+
document = lxb_html_document_create();
106+
if (!document) {
107+
return make_fail_parse_response("Failed to create HTML document");
108+
}
92109

93-
status = lxb_html_document_parse(document, (const lxb_char_t *)response.body.c_str(),
94-
response.body.length());
95-
if (status != LXB_STATUS_OK) {
96-
return make_fail_parse_response("Failed to parse HTML");
97-
}
110+
lxb_status_t status =
111+
lxb_html_document_parse(document, (const lxb_char_t *)response.body.c_str(),
112+
response.body.length());
98113

99-
std::string parsed_output = response.body;
100-
// Get the output value
101-
if (request_data->output_cssselector != "") {
102-
std::vector<std::string> found;
103-
if (find_with_selectors(request_data->output_cssselector, document, found) !=
104-
LXB_STATUS_OK) {
105-
return make_fail_parse_response("Failed to find element with CSS selector");
106-
} else {
107-
if (found.size() > 0) {
108-
std::copy(found.begin(), found.end(),
109-
std::back_inserter(response.body_parts_parsed));
114+
if (status != LXB_STATUS_OK) {
115+
lxb_html_document_destroy(document);
116+
return make_fail_parse_response("Failed to parse HTML");
117+
}
118+
119+
if (!request_data->output_cssselector.empty()) {
120+
std::vector<std::string> found;
121+
status = find_with_selectors(request_data->output_cssselector, document,
122+
found);
123+
124+
if (status != LXB_STATUS_OK) {
125+
lxb_html_document_destroy(document);
126+
return make_fail_parse_response(
127+
"Failed to find element with CSS selector");
110128
}
129+
130+
response.body_parts_parsed = std::move(found);
131+
} else {
132+
response.body_parts_parsed.push_back(response.body);
111133
}
112-
} else {
113-
// Return the whole HTML object
114-
response.body_parts_parsed.push_back(parsed_output);
115-
}
116134

117-
return response;
135+
lxb_html_document_destroy(document);
136+
return response;
137+
138+
} catch (const std::exception &e) {
139+
if (document) {
140+
lxb_html_document_destroy(document);
141+
}
142+
return make_fail_parse_response(std::string("HTML parsing exception: ") + e.what());
143+
}
118144
}

src/parsers/jsonpath.cpp

Lines changed: 26 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2,67 +2,56 @@
22
#include "errors.h"
33

44
#include <jsoncons/basic_json.hpp>
5-
#include <jsoncons/json_parser.hpp>
65
#include <jsoncons_ext/jsonpath/jsonpath.hpp>
7-
#include <obs-module.h>
86
#include <nlohmann/json.hpp>
7+
#include <util/c99defs.h>
98

109
struct request_data_handler_response parse_json(struct request_data_handler_response response,
1110
const url_source_request_data *request_data)
1211
{
1312
UNUSED_PARAMETER(request_data);
14-
15-
// Parse the response as JSON
16-
jsoncons::json json;
1713
try {
18-
json = jsoncons::json::parse(response.body);
14+
// Parse JSON only once and store in both formats
15+
auto json_cons = jsoncons::json::parse(response.body);
1916
response.body_json = nlohmann::json::parse(response.body);
20-
} catch (jsoncons::json_exception &e) {
17+
return response;
18+
} catch (const jsoncons::json_exception &e) {
2119
return make_fail_parse_response(e.what());
22-
} catch (nlohmann::json::parse_error &e) {
20+
} catch (const nlohmann::json::exception &e) {
2321
return make_fail_parse_response(e.what());
2422
}
25-
// Return the whole JSON object
26-
response.body_parts_parsed.push_back(json.as_string());
27-
return response;
2823
}
2924

3025
struct request_data_handler_response parse_json_path(struct request_data_handler_response response,
3126
const url_source_request_data *request_data)
3227
{
33-
34-
// Parse the response as JSON
35-
jsoncons::json json;
3628
try {
37-
json = jsoncons::json::parse(response.body);
29+
auto json = jsoncons::json::parse(response.body);
3830
response.body_json = nlohmann::json::parse(response.body);
39-
} catch (jsoncons::json_exception &e) {
40-
return make_fail_parse_response(e.what());
41-
} catch (nlohmann::json::parse_error &e) {
42-
return make_fail_parse_response(e.what());
43-
}
44-
std::vector<std::string> parsed_output = {};
45-
// Get the output value
46-
if (request_data->output_json_path != "") {
47-
try {
48-
const auto value = jsoncons::jsonpath::json_query(
49-
json, request_data->output_json_path);
31+
32+
if (!request_data->output_json_path.empty()) {
33+
// Create and evaluate JSONPath expression
34+
auto value = jsoncons::jsonpath::json_query(json,
35+
request_data->output_json_path);
36+
5037
if (value.is_array()) {
51-
// extract array items as strings
38+
response.body_parts_parsed.reserve(value.size());
5239
for (const auto &item : value.array_range()) {
53-
parsed_output.push_back(item.as_string());
40+
response.body_parts_parsed.push_back(
41+
item.as<std::string>());
5442
}
5543
} else {
56-
parsed_output.push_back(value.as_string());
44+
response.body_parts_parsed.push_back(value.as<std::string>());
5745
}
58-
} catch (jsoncons::json_exception &e) {
59-
return make_fail_parse_response(e.what());
46+
} else {
47+
response.body_parts_parsed.push_back(json.as<std::string>());
6048
}
61-
} else {
62-
// Return the whole JSON object
63-
parsed_output.clear();
64-
parsed_output.push_back(json.as_string());
49+
50+
return response;
51+
52+
} catch (const jsoncons::jsonpath::jsonpath_error &e) {
53+
return make_fail_parse_response(std::string("JSONPath error: ") + e.what());
54+
} catch (const std::exception &e) {
55+
return make_fail_parse_response(std::string("JSON parse error: ") + e.what());
6556
}
66-
response.body_parts_parsed = parsed_output;
67-
return response;
6857
}

src/parsers/regex.cpp

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,42 @@
1-
21
#include "request-data.h"
32
#include "plugin-support.h"
3+
#include "errors.h"
44

55
#include <regex>
66
#include <obs-module.h>
77

88
struct request_data_handler_response parse_regex(struct request_data_handler_response response,
99
const url_source_request_data *request_data)
1010
{
11-
std::string parsed_output = "";
12-
if (request_data->output_regex == "") {
13-
// Return the whole response body
14-
parsed_output = response.body;
15-
} else {
16-
// Parse the response as a regex
17-
std::regex regex(request_data->output_regex,
18-
std::regex_constants::ECMAScript | std::regex_constants::optimize);
11+
try {
12+
if (request_data->output_regex.empty()) {
13+
response.body_parts_parsed.push_back(response.body);
14+
return response;
15+
}
16+
17+
// Cache compiled regex patterns for better performance
18+
static thread_local std::unordered_map<std::string, std::regex> regex_cache;
19+
20+
auto &regex = regex_cache[request_data->output_regex];
21+
if (regex_cache.find(request_data->output_regex) == regex_cache.end()) {
22+
regex = std::regex(request_data->output_regex,
23+
std::regex_constants::ECMAScript |
24+
std::regex_constants::optimize);
25+
}
26+
1927
std::smatch match;
2028
if (std::regex_search(response.body, match, regex)) {
21-
if (match.size() > 1) {
22-
parsed_output = match[1].str();
23-
} else {
24-
parsed_output = match[0].str();
25-
}
26-
} else {
27-
obs_log(LOG_INFO, "Failed to match regex");
28-
// Return an error response
29-
struct request_data_handler_response responseFail;
30-
responseFail.error_message = "Failed to match regex";
31-
responseFail.status_code = URL_SOURCE_REQUEST_PARSING_ERROR_CODE;
32-
return responseFail;
29+
// Get the appropriate capture group
30+
size_t group = match.size() > 1 ? 1 : 0;
31+
response.body_parts_parsed.push_back(match[group].str());
32+
return response;
3333
}
34+
35+
return make_fail_parse_response("No regex match found");
36+
37+
} catch (const std::regex_error &e) {
38+
return make_fail_parse_response(std::string("Regex error: ") + e.what());
39+
} catch (const std::exception &e) {
40+
return make_fail_parse_response(std::string("Parse error: ") + e.what());
3441
}
35-
response.body_parts_parsed.push_back(parsed_output);
36-
return response;
3742
}

0 commit comments

Comments
 (0)