Skip to content

Commit 1a2de10

Browse files
authored
Merge pull request #3358 from airween/v2/xmlargsfeat
feat: improved XMLArgs processing
2 parents fa621f8 + 87cbf9e commit 1a2de10

File tree

5 files changed

+326
-21
lines changed

5 files changed

+326
-21
lines changed

apache2/apache2_config.c

+40
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ void *create_directory_config(apr_pool_t *mp, char *path)
166166

167167
/* xml external entity */
168168
dcfg->xml_external_entity = NOT_SET;
169+
dcfg->parse_xml_into_args = NOT_SET;
169170

170171
return dcfg;
171172
}
@@ -640,6 +641,8 @@ void *merge_directory_configs(apr_pool_t *mp, void *_parent, void *_child)
640641
/* xml external entity */
641642
merged->xml_external_entity = (child->xml_external_entity == NOT_SET
642643
? parent->xml_external_entity : child->xml_external_entity);
644+
merged->parse_xml_into_args = (child->parse_xml_into_args == NOT_SET
645+
? parent->parse_xml_into_args : child->parse_xml_into_args);
643646

644647
return merged;
645648
}
@@ -773,6 +776,7 @@ void init_directory_config(directory_config *dcfg)
773776

774777
/* xml external entity */
775778
if (dcfg->xml_external_entity == NOT_SET) dcfg->xml_external_entity = 0;
779+
if (dcfg->parse_xml_into_args == NOT_SET) dcfg->parse_xml_into_args = 0;
776780

777781
}
778782

@@ -3698,6 +3702,34 @@ static const char *cmd_cache_transformations(cmd_parms *cmd, void *_dcfg,
36983702
return NULL;
36993703
}
37003704

3705+
/**
3706+
* \brief Add SecParseXmlIntoArgs configuration option
3707+
*
3708+
* \param cmd Pointer to configuration data
3709+
* \param _dcfg Pointer to directory configuration
3710+
* \param p1 Pointer to configuration option
3711+
*
3712+
* \retval NULL On Success
3713+
* \retval apr_psprintf On error
3714+
*/
3715+
static const char *cmd_parse_xml_into_args(cmd_parms *cmd, void *_dcfg, const char *p1)
3716+
{
3717+
assert(cmd != NULL);
3718+
assert(_dcfg != NULL);
3719+
assert(p1 != NULL);
3720+
// Normally useless code, left to be safe for the moment
3721+
if (_dcfg == NULL) {
3722+
ap_log_perror(APLOG_MARK, APLOG_EMERG, 0, cmd->pool, "cmd_parse_xml_into_args: _dcfg is NULL");
3723+
return NULL;
3724+
}
3725+
directory_config *dcfg = (directory_config *)_dcfg;
3726+
if (strcasecmp(p1, "on") == 0) { dcfg->parse_xml_into_args = MSC_XML_ARGS_ON; }
3727+
else if (strcasecmp(p1, "off") == 0) { dcfg->parse_xml_into_args = MSC_XML_ARGS_OFF; }
3728+
else if (strcasecmp(p1, "onlyargs") == 0) { dcfg->parse_xml_into_args = MSC_XML_ARGS_ONLYARGS; }
3729+
else return apr_psprintf(cmd->pool, "ModSecurity: Invalid value for SecParseXmlIntoArgs: %s", p1);
3730+
3731+
return NULL;
3732+
}
37013733

37023734
/* -- Configuration directives definitions -- */
37033735

@@ -4466,5 +4498,13 @@ const command_rec module_directives[] = {
44664498
"Set Hash parameter"
44674499
),
44684500

4501+
AP_INIT_TAKE1 (
4502+
"SecParseXmlIntoArgs",
4503+
cmd_parse_xml_into_args,
4504+
NULL,
4505+
CMD_SCOPE_ANY,
4506+
"On, Off or OnlyArgs"
4507+
),
4508+
44694509
{ NULL }
44704510
};

apache2/modsecurity.h

+5
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,10 @@ extern DSOLOCAL int *unicode_map_table;
243243
#define RULE_EXCEPTION_REMOVE_MSG 4
244244
#define RULE_EXCEPTION_REMOVE_TAG 5
245245

246+
#define MSC_XML_ARGS_OFF 0
247+
#define MSC_XML_ARGS_ON 1
248+
#define MSC_XML_ARGS_ONLYARGS 2
249+
246250
#define NBSP 160
247251

248252
struct rule_exception {
@@ -643,6 +647,7 @@ struct directory_config {
643647

644648
/* xml */
645649
int xml_external_entity;
650+
int parse_xml_into_args;
646651

647652
/* This will be used whenever ModSecurity will be ready
648653
* to ask the server for newer rules.

apache2/msc_xml.c

+230-21
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,124 @@
1414

1515
#include "msc_xml.h"
1616

17+
static void msc_xml_on_start_elementns(
18+
void *ctx,
19+
const xmlChar *localname,
20+
const xmlChar *prefix,
21+
const xmlChar *URI,
22+
int nb_namespaces,
23+
const xmlChar **namespaces,
24+
int nb_attributes,
25+
int nb_defaulted,
26+
const xmlChar **attributes
27+
) {
28+
29+
// get the length of XML tag (localname)
30+
size_t taglen = strlen((const char *)localname);
31+
modsec_rec * msr = (modsec_rec *)ctx;
32+
msc_xml_parser_state * xml_parser_state = msr->xml->xml_parser_state;
33+
34+
// pathlen contains the concatenated strings of tags with '.'
35+
// eg xml.root.level1.leaf
36+
xml_parser_state->pathlen += (taglen + 1);
37+
char *newpath = apr_pstrcat(msr->mp, xml_parser_state->currpath, ".", (char *)localname, NULL);
38+
xml_parser_state->currpath = newpath;
39+
40+
int *new_stack_item = (int *)apr_array_push(xml_parser_state->has_child_stack);
41+
*new_stack_item = 0;
42+
xml_parser_state->depth++;
43+
// set the current value to null
44+
// this is necessary because if there is any text between the tags (new line, etc)
45+
// it will be added to the current value
46+
xml_parser_state->currval = NULL;
47+
48+
// if there is an item before the current one we set that has a child
49+
if (xml_parser_state->depth > 1) {
50+
int *parent_stack_item = &((int *)xml_parser_state->has_child_stack->elts)[xml_parser_state->has_child_stack->nelts - 2];
51+
*parent_stack_item = 1;
52+
}
53+
54+
}
55+
56+
static void msc_xml_on_end_elementns(
57+
void* ctx,
58+
const xmlChar* localname,
59+
const xmlChar* prefix,
60+
const xmlChar* URI
61+
) {
62+
63+
size_t taglen = strlen((const char *)localname);
64+
modsec_rec * msr = (modsec_rec *)ctx;
65+
msc_xml_parser_state * xml_parser_state = msr->xml->xml_parser_state;
66+
67+
// if the node is a leaf we add it as argument
68+
// get the top item from the stack which tells this info
69+
int * top_stack_item = apr_array_pop(xml_parser_state->has_child_stack);
70+
if (*top_stack_item == 0) {
71+
72+
if (apr_table_elts(msr->arguments)->nelts >= msr->txcfg->arguments_limit) {
73+
if (msr->txcfg->debuglog_level >= 4) {
74+
msr_log(msr, 4, "Skipping request argument, over limit (XML): name \"%s\", value \"%s\"",
75+
log_escape_ex(msr->mp, xml_parser_state->currpath, strlen(xml_parser_state->currpath)),
76+
log_escape_ex(msr->mp, xml_parser_state->currval, strlen(xml_parser_state->currval)));
77+
}
78+
msr->msc_reqbody_error = 1;
79+
msr->xml->xml_error = apr_psprintf(msr->mp, "More than %ld ARGS (GET + XML)", msr->txcfg->arguments_limit);
80+
xmlStopParser((xmlParserCtxtPtr)msr->xml->parsing_ctx_arg);
81+
}
82+
else {
83+
84+
msc_arg * arg = (msc_arg *) apr_pcalloc(msr->mp, sizeof(msc_arg));
85+
86+
arg->name = xml_parser_state->currpath;
87+
arg->name_len = strlen(arg->name);
88+
arg->value = xml_parser_state->currval;
89+
arg->value_len = strlen(xml_parser_state->currval);
90+
arg->value_origin_len = arg->value_len;
91+
arg->origin = "XML";
92+
93+
if (msr->txcfg->debuglog_level >= 9) {
94+
msr_log(msr, 9, "Adding XML argument '%s' with value '%s'",
95+
xml_parser_state->currpath, xml_parser_state->currval);
96+
}
97+
98+
apr_table_addn(msr->arguments,
99+
log_escape_nq_ex(msr->mp, arg->name, arg->name_len), (void *) arg);
100+
} // end else
101+
} // end top_stack_item == 0
102+
103+
// decrease the length of current path length - +1 because of the '\0'
104+
xml_parser_state->pathlen -= (taglen + 1);
105+
106+
// -1 is needed because we don't need the last '.'
107+
char * newpath = apr_pstrndup(msr->mp, xml_parser_state->currpath, xml_parser_state->pathlen - 1);
108+
xml_parser_state->currpath = newpath;
109+
110+
xml_parser_state->depth--;
111+
xml_parser_state->currval = NULL;
112+
}
113+
114+
static void msc_xml_on_characters(void *ctx, const xmlChar *ch, int len) {
115+
116+
modsec_rec * msr = (modsec_rec *)ctx;
117+
msc_xml_parser_state * xml_parser_state = msr->xml->xml_parser_state;
118+
119+
// libxml2 SAX parser will call this function multiple times
120+
// during the parsing of a single node, if the value has multibyte
121+
// characters, so we need to concatenate the values
122+
xml_parser_state->currval = apr_pstrcat(msr->mp,
123+
((xml_parser_state->currval != NULL) ? xml_parser_state->currval : ""),
124+
apr_pstrndup(msr->mp, (const char *)ch, len),
125+
NULL);
126+
// check if the memory allocation was successful
127+
if (xml_parser_state->currval == NULL) {
128+
msr->xml->xml_error = apr_psprintf(msr->mp, "Failed to allocate memory for XML value.");
129+
xmlStopParser((xmlParserCtxtPtr)msr->xml->parsing_ctx_arg);
130+
}
131+
132+
}
133+
134+
17135
static xmlParserInputBufferPtr
18136
xml_unload_external_entity(const char *URI, xmlCharEncoding enc) {
19137
return NULL;
@@ -37,6 +155,33 @@ int xml_init(modsec_rec *msr, char **error_msg) {
37155
entity = xmlParserInputBufferCreateFilenameDefault(xml_unload_external_entity);
38156
}
39157

158+
if (msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_OFF) {
159+
160+
msr->xml->sax_handler = (xmlSAXHandler *)apr_pcalloc(msr->mp, sizeof(xmlSAXHandler));
161+
memset(msr->xml->sax_handler, 0, sizeof(xmlSAXHandler));
162+
if (msr->xml->sax_handler == NULL) {
163+
*error_msg = apr_psprintf(msr->mp, "XML: Failed to create SAX handler.");
164+
return -1;
165+
}
166+
167+
msr->xml->sax_handler->initialized = XML_SAX2_MAGIC;
168+
msr->xml->sax_handler->startElementNs = msc_xml_on_start_elementns;
169+
msr->xml->sax_handler->endElementNs = msc_xml_on_end_elementns;
170+
msr->xml->sax_handler->characters = msc_xml_on_characters;
171+
172+
// set the parser state struct
173+
msr->xml->xml_parser_state = apr_pcalloc(msr->mp, sizeof(msc_xml_parser_state));
174+
msr->xml->xml_parser_state->depth = 0;
175+
msr->xml->xml_parser_state->pathlen = 4; // "xml\0"
176+
msr->xml->xml_parser_state->currpath = apr_pstrdup(msr->mp, "xml");
177+
msr->xml->xml_parser_state->currval = NULL;
178+
msr->xml->xml_parser_state->currpathbufflen = 4;
179+
// initialize the stack with item of 10
180+
// this will store the information about nodes
181+
// 10 is just an initial value, it can be automatically incremented
182+
msr->xml->xml_parser_state->has_child_stack = apr_array_make(msr->mp, 10, sizeof(int));
183+
}
184+
40185
return 1;
41186
}
42187

@@ -68,7 +213,7 @@ int xml_process_chunk(modsec_rec *msr, const char *buf, unsigned int size, char
68213
* enable us to pass it the first chunk of data so that
69214
* it can attempt to auto-detect the encoding.
70215
*/
71-
if (msr->xml->parsing_ctx == NULL) {
216+
if (msr->xml->parsing_ctx == NULL && msr->xml->parsing_ctx_arg == NULL) {
72217

73218
/* First invocation. */
74219

@@ -86,18 +231,52 @@ int xml_process_chunk(modsec_rec *msr, const char *buf, unsigned int size, char
86231
87232
*/
88233

89-
msr->xml->parsing_ctx = xmlCreatePushParserCtxt(NULL, NULL, buf, size, "body.xml");
90-
if (msr->xml->parsing_ctx == NULL) {
91-
*error_msg = apr_psprintf(msr->mp, "XML: Failed to create parsing context.");
92-
return -1;
234+
if (msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_ONLYARGS) {
235+
msr->xml->parsing_ctx = xmlCreatePushParserCtxt(NULL, NULL, buf, size, "body.xml");
236+
if (msr->xml->parsing_ctx == NULL) {
237+
*error_msg = apr_psprintf(msr->mp, "XML: Failed to create parsing context.");
238+
return -1;
239+
}
240+
}
241+
if (msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_OFF) {
242+
msr->xml->parsing_ctx_arg = xmlCreatePushParserCtxt(
243+
msr->xml->sax_handler,
244+
msr,
245+
buf,
246+
size,
247+
NULL);
248+
if (msr->xml->parsing_ctx_arg == NULL) {
249+
*error_msg = apr_psprintf(msr->mp, "XML: Failed to create parsing context for ARGS.");
250+
return -1;
251+
}
93252
}
94253
} else {
95254

96255
/* Not a first invocation. */
256+
msr_log(msr, 4, "XML: Continue parsing.");
257+
if (msr->xml->parsing_ctx != NULL &&
258+
msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_ONLYARGS) {
259+
xmlParseChunk(msr->xml->parsing_ctx, buf, size, 0);
260+
if (msr->xml->parsing_ctx->wellFormed != 1) {
261+
*error_msg = apr_psprintf(msr->mp, "XML: Failed to parse document.");
262+
return -1;
263+
}
264+
}
97265

98-
xmlParseChunk(msr->xml->parsing_ctx, buf, size, 0);
99-
if (msr->xml->parsing_ctx->wellFormed != 1) {
100-
*error_msg = apr_psprintf(msr->mp, "XML: Failed parsing document.");
266+
if (msr->xml->parsing_ctx_arg != NULL &&
267+
msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_OFF) {
268+
if (xmlParseChunk(msr->xml->parsing_ctx_arg, buf, size, 0) != 0) {
269+
if (msr->xml->xml_error) {
270+
*error_msg = msr->xml->xml_error;
271+
}
272+
else {
273+
*error_msg = apr_psprintf(msr->mp, "XML: Failed to parse document for ARGS.");
274+
}
275+
return -1;
276+
}
277+
}
278+
if (msr->xml->xml_error) {
279+
*error_msg = msr->xml->xml_error;
101280
return -1;
102281
}
103282
}
@@ -114,23 +293,44 @@ int xml_complete(modsec_rec *msr, char **error_msg) {
114293
*error_msg = NULL;
115294

116295
/* Only if we have a context, meaning we've done some work. */
117-
if (msr->xml->parsing_ctx != NULL) {
118-
/* This is how we signalise the end of parsing to libxml. */
119-
xmlParseChunk(msr->xml->parsing_ctx, NULL, 0, 1);
296+
if (msr->xml->parsing_ctx != NULL || msr->xml->parsing_ctx_arg != NULL) {
297+
if (msr->xml->parsing_ctx != NULL &&
298+
msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_ONLYARGS) {
299+
/* This is how we signal the end of parsing to libxml. */
300+
xmlParseChunk(msr->xml->parsing_ctx, NULL, 0, 1);
120301

121-
/* Preserve the results for our reference. */
122-
msr->xml->well_formed = msr->xml->parsing_ctx->wellFormed;
123-
msr->xml->doc = msr->xml->parsing_ctx->myDoc;
302+
/* Preserve the results for our reference. */
303+
msr->xml->well_formed = msr->xml->parsing_ctx->wellFormed;
304+
msr->xml->doc = msr->xml->parsing_ctx->myDoc;
124305

125-
/* Clean up everything else. */
126-
xmlFreeParserCtxt(msr->xml->parsing_ctx);
127-
msr->xml->parsing_ctx = NULL;
128-
msr_log(msr, 4, "XML: Parsing complete (well_formed %u).", msr->xml->well_formed);
306+
/* Clean up everything else. */
307+
xmlFreeParserCtxt(msr->xml->parsing_ctx);
308+
msr->xml->parsing_ctx = NULL;
309+
msr_log(msr, 4, "XML: Parsing complete (well_formed %u).", msr->xml->well_formed);
129310

130-
if (msr->xml->well_formed != 1) {
131-
*error_msg = apr_psprintf(msr->mp, "XML: Failed parsing document.");
132-
return -1;
311+
if (msr->xml->well_formed != 1) {
312+
*error_msg = apr_psprintf(msr->mp, "XML: Failed to parse document.");
313+
return -1;
314+
}
315+
}
316+
317+
if (msr->xml->parsing_ctx_arg != NULL &&
318+
msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_OFF) {
319+
if (xmlParseChunk(msr->xml->parsing_ctx_arg, NULL, 0, 1) != 0) {
320+
if (msr->xml->xml_error) {
321+
*error_msg = msr->xml->xml_error;
322+
}
323+
else {
324+
*error_msg = apr_psprintf(msr->mp, "XML: Failed to parse document for ARGS.");
325+
}
326+
xmlFreeParserCtxt(msr->xml->parsing_ctx_arg);
327+
msr->xml->parsing_ctx_arg = NULL;
328+
return -1;
329+
}
330+
xmlFreeParserCtxt(msr->xml->parsing_ctx_arg);
331+
msr->xml->parsing_ctx_arg = NULL;
133332
}
333+
134334
}
135335

136336
return 1;
@@ -152,6 +352,15 @@ apr_status_t xml_cleanup(modsec_rec *msr) {
152352
xmlFreeParserCtxt(msr->xml->parsing_ctx);
153353
msr->xml->parsing_ctx = NULL;
154354
}
355+
if (msr->xml->parsing_ctx_arg != NULL) {
356+
357+
if (msr->xml->parsing_ctx_arg->myDoc) {
358+
xmlFreeDoc(msr->xml->parsing_ctx_arg->myDoc);
359+
}
360+
361+
xmlFreeParserCtxt(msr->xml->parsing_ctx_arg);
362+
msr->xml->parsing_ctx_arg = NULL;
363+
}
155364
if (msr->xml->doc != NULL) {
156365
xmlFreeDoc(msr->xml->doc);
157366
msr->xml->doc = NULL;

0 commit comments

Comments
 (0)