27
27
namespace modsecurity ::actions::transformations {
28
28
29
29
30
- bool HtmlEntityDecode::transform (std::string &value, const Transaction *trans) const {
31
- std::string ret;
32
- unsigned char *input;
33
-
34
- input = reinterpret_cast <unsigned char *>
35
- (malloc (sizeof (char ) * value.length ()+1 ));
36
-
37
- if (input == NULL ) {
38
- return " " ;
39
- }
40
-
41
- memcpy (input, value.c_str (), value.length ()+1 );
42
-
43
- size_t i = inplace (input, value.length ());
44
-
45
- ret.assign (reinterpret_cast <char *>(input), i);
46
- free (input);
47
-
48
- const auto changed = ret != value;
49
- value = ret;
50
- return changed;
51
- }
52
-
30
+ static inline bool inplace (std::string &value) {
31
+ const auto input_len = value.length ();
32
+ auto d = reinterpret_cast <unsigned char *>(value.data ());
33
+ const unsigned char *input = d;
34
+ const unsigned char *end = input + input_len;
53
35
54
- int HtmlEntityDecode::inplace (unsigned char *input, uint64_t input_len) {
55
- unsigned char *d = input;
56
- int i, count;
57
-
58
- if ((input == NULL ) || (input_len == 0 )) {
59
- return 0 ;
60
- }
61
-
62
- i = count = 0 ;
63
- while ((i < input_len) && (count < input_len)) {
64
- int z, copy = 1 ;
36
+ std::string::size_type i = 0 ;
37
+ while (i < input_len) {
38
+ std::string::size_type copy = 1 ;
65
39
66
40
/* Require an ampersand and at least one character to
67
41
* start looking into the entity.
68
42
*/
69
43
if ((input[i] == ' &' ) && (i + 1 < input_len)) {
70
- int k, j = i + 1 ;
44
+ auto j = i + 1 ;
71
45
72
46
if (input[j] == ' #' ) {
73
47
/* Numerical entity. */
@@ -87,19 +61,18 @@ int HtmlEntityDecode::inplace(unsigned char *input, uint64_t input_len) {
87
61
}
88
62
j++; /* j is the position of the first digit now. */
89
63
90
- k = j;
91
- while ((j < input_len) && (isxdigit (input[j]))) {
64
+ constexpr int MAX_HEX_DIGITS = 2 ; // supports only bytes (max value 0xff)
65
+ auto k = j;
66
+ while ((j - k < MAX_HEX_DIGITS) && (j < input_len) && (isxdigit (input[j]))) {
92
67
j++;
93
68
}
94
69
if (j > k) { /* Do we have at least one digit? */
95
70
/* Decode the entity. */
96
- char *x;
97
- x = reinterpret_cast <char *>(calloc (sizeof (char ),
98
- ((j - k) + 1 )));
71
+ char x[MAX_HEX_DIGITS + 1 ];
99
72
memcpy (x, (const char *)&input[k], j - k);
100
- *d++ = ( unsigned char ) strtol (x, NULL , 16 ) ;
101
- free (x);
102
- count++ ;
73
+ x[j - k] = ' \0 ' ;
74
+
75
+ *d++ = ( unsigned char ) strtol (x, nullptr , 16 ) ;
103
76
104
77
/* Skip over the semicolon if it's there. */
105
78
if ((j < input_len) && (input[j] == ' ;' )) {
@@ -113,19 +86,18 @@ int HtmlEntityDecode::inplace(unsigned char *input, uint64_t input_len) {
113
86
}
114
87
} else {
115
88
/* Decimal entity. */
116
- k = j;
117
- while ((j < input_len) && (isdigit (input[j]))) {
89
+ constexpr int MAX_DEC_DIGITS = 3 ; // supports only bytes (max value 255)
90
+ auto k = j;
91
+ while ((j - k < MAX_DEC_DIGITS) && (j < input_len) && (isdigit (input[j]))) {
118
92
j++;
119
93
}
120
94
if (j > k) { /* Do we have at least one digit? */
121
95
/* Decode the entity. */
122
- char *x;
123
- x = reinterpret_cast <char *>(calloc (sizeof (char ),
124
- ((j - k) + 1 )));
96
+ char x[MAX_DEC_DIGITS + 1 ];
125
97
memcpy (x, (const char *)&input[k], j - k);
126
- *d++ = ( unsigned char ) strtol (x, NULL , 10 ) ;
127
- free (x);
128
- count++ ;
98
+ x[j - k] = ' \0 ' ;
99
+
100
+ *d++ = ( unsigned char ) strtol (x, nullptr , 10 ) ;
129
101
130
102
/* Skip over the semicolon if it's there. */
131
103
if ((j < input_len) && (input[j] == ' ;' )) {
@@ -140,38 +112,31 @@ int HtmlEntityDecode::inplace(unsigned char *input, uint64_t input_len) {
140
112
}
141
113
} else {
142
114
/* Text entity. */
143
- k = j;
115
+ auto k = j;
144
116
while ((j < input_len) && (isalnum (input[j]))) {
145
117
j++;
146
118
}
147
119
if (j > k) { /* Do we have at least one digit? */
148
- char *x;
149
- x = reinterpret_cast <char *>(calloc (sizeof (char ),
150
- ((j - k) + 1 )));
151
- memcpy (x, (const char *)&input[k], j - k);
120
+ const auto x = reinterpret_cast <const char *>(&input[k]);
152
121
153
122
/* Decode the entity. */
154
123
/* ENH What about others? */
155
- if (strcasecmp (x, " quot" ) == 0 ) {
124
+ if (strncasecmp (x, " quot" , 4 ) == 0 ) {
156
125
*d++ = ' "' ;
157
- } else if (strcasecmp (x, " amp" ) == 0 ) {
126
+ } else if (strncasecmp (x, " amp" , 3 ) == 0 ) {
158
127
*d++ = ' &' ;
159
- } else if (strcasecmp (x, " lt" ) == 0 ) {
128
+ } else if (strncasecmp (x, " lt" , 2 ) == 0 ) {
160
129
*d++ = ' <' ;
161
- } else if (strcasecmp (x, " gt" ) == 0 ) {
130
+ } else if (strncasecmp (x, " gt" , 2 ) == 0 ) {
162
131
*d++ = ' >' ;
163
- } else if (strcasecmp (x, " nbsp" ) == 0 ) {
132
+ } else if (strncasecmp (x, " nbsp" , 4 ) == 0 ) {
164
133
*d++ = NBSP;
165
134
} else {
166
135
/* We do no want to convert this entity,
167
136
* copy the raw data over. */
168
137
copy = j - k + 1 ;
169
- free (x);
170
138
goto HTML_ENT_OUT;
171
139
}
172
- free (x);
173
-
174
- count++;
175
140
176
141
/* Skip over the semicolon if it's there. */
177
142
if ((j < input_len) && (input[j] == ' ;' )) {
@@ -187,15 +152,20 @@ int HtmlEntityDecode::inplace(unsigned char *input, uint64_t input_len) {
187
152
188
153
HTML_ENT_OUT:
189
154
190
- for (z = 0 ; (( z < copy) && (count < input_len)) ; z++) {
155
+ for (auto z = 0 ; z < copy; z++) {
191
156
*d++ = input[i++];
192
- count++;
193
157
}
194
158
}
195
159
196
160
*d = ' \0 ' ;
197
161
198
- return count;
162
+ value.resize (d - input);
163
+ return d != end;
164
+ }
165
+
166
+
167
+ bool HtmlEntityDecode::transform (std::string &value, const Transaction *trans) const {
168
+ return inplace (value);
199
169
}
200
170
201
171
0 commit comments