Skip to content

Commit bf3fa5c

Browse files
committed
Merge pull request #540 from tempesta-tech/ab-match-suffix
Implement "suffix" operation for string matching. (#495, #471)
2 parents ed366ef + a2e0961 commit bf3fa5c

File tree

8 files changed

+285
-16
lines changed

8 files changed

+285
-16
lines changed

README.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -282,12 +282,14 @@ header. Host part in URI takes priority over the `Host` header value.
282282
* **hdr_conn** The value of `Connection` header.
283283
* **hdr_raw** The contents of any other HTTP header field as specified by
284284
`ARG`. `ARG` must include contents of an HTTP header starting with the header
285-
field name. Processing of `hdr_raw` may be slow because it requires walking
286-
over all headers of an HTTP request.
285+
field name. The `suffix` `OP` is not supported for this `FIELD`. Processing
286+
of `hdr_raw` may be slow because it requires walking over all headers of an
287+
HTTP request.
287288

288289
The following `OP` keywords are supported:
289290
* **eq** `FIELD` is fully equal to the string specified in `ARG`.
290291
* **prefix** `FIELD` starts with the string specified in `ARG`.
292+
* **suffix** `FIELD` ends with the string specified in `ARG`.
291293

292294
Below are examples of pattern-matching rules that define the HTTP scheduler:
293295
```
@@ -297,10 +299,14 @@ srv_group bar_app { ... }
297299
298300
sched_http_rules {
299301
match static uri prefix "/static";
302+
match static uri suffix ".php";
300303
match static host prefix "static.";
304+
match static host suffix "tempesta-tech.com";
301305
match foo_app host eq "foo.example.com";
302306
match bar_app hdr_conn eq "keep-alive";
303307
match bar_app hdr_host prefix "bar.";
308+
match bar_app hdr_host suffix "natsys-lab.com";
309+
match bar_app hdr_host eq "bar.natsys-lab.com";
304310
match bar_app hdr_raw prefix "X-Custom-Bar-Hdr: ";
305311
}
306312
```

tempesta_fw/http_match.c

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686

8787
/**
8888
* Look up a header in the @req->h_tbl by given @id,
89-
* and compare @val with the header's value (skipping name and LWS).
89+
* and compare @str with the header's value (skipping name and LWS).
9090
*
9191
* For example:
9292
* hdr_val_eq(req, TFW_HTTP_HDR_HOST, "natsys-lab", 10, TFW_STR_EQ_PREFIX);
@@ -96,8 +96,8 @@
9696
* "Host : natsys-lab.com"
9797
*/
9898
static bool
99-
hdr_val_eq(const TfwHttpReq *req, tfw_http_hdr_t id, const char *val,
100-
int val_len, tfw_str_eq_flags_t f)
99+
hdr_val_eq(const TfwHttpReq *req, tfw_http_hdr_t id, tfw_http_match_op_t op,
100+
const char *str, int str_len, tfw_str_eq_flags_t flags)
101101
{
102102
TfwStr *hdr;
103103
TfwStr hdr_val;
@@ -110,7 +110,11 @@ hdr_val_eq(const TfwHttpReq *req, tfw_http_hdr_t id, const char *val,
110110

111111
tfw_http_msg_clnthdr_val(hdr, id, &hdr_val);
112112

113-
return tfw_str_eq_cstr(&hdr_val, val, val_len, f);
113+
if (op == TFW_HTTP_MATCH_O_SUFFIX)
114+
return tfw_str_eq_cstr_off(&hdr_val, hdr_val.len - str_len,
115+
str, str_len, flags);
116+
117+
return tfw_str_eq_cstr(&hdr_val, str, str_len, flags);
114118
}
115119

116120
/**
@@ -123,6 +127,7 @@ map_op_to_str_eq_flags(tfw_http_match_op_t op)
123127
[ 0 ... _TFW_HTTP_MATCH_O_COUNT ] = -1,
124128
[TFW_HTTP_MATCH_O_EQ] = TFW_STR_EQ_DEFAULT,
125129
[TFW_HTTP_MATCH_O_PREFIX] = TFW_STR_EQ_PREFIX,
130+
[TFW_HTTP_MATCH_O_SUFFIX] = TFW_STR_EQ_DEFAULT,
126131
};
127132
BUG_ON(flags_tbl[op] < 0);
128133
return flags_tbl[op];
@@ -142,6 +147,8 @@ match_method(const TfwHttpReq *req, const TfwHttpMatchRule *rule)
142147
static bool
143148
match_uri(const TfwHttpReq *req, const TfwHttpMatchRule *rule)
144149
{
150+
const TfwStr *uri_path = &req->uri_path;
151+
const TfwHttpMatchArg *arg = &rule->arg;
145152
tfw_str_eq_flags_t flags = map_op_to_str_eq_flags(rule->op);
146153

147154
/* RFC 7230:
@@ -152,12 +159,18 @@ match_uri(const TfwHttpReq *req, const TfwHttpMatchRule *rule)
152159
*/
153160
flags |= TFW_STR_EQ_CASEI;
154161

155-
return tfw_str_eq_cstr(&req->uri_path, rule->arg.str, rule->arg.len, flags);
162+
if (rule->op == TFW_HTTP_MATCH_O_SUFFIX)
163+
return tfw_str_eq_cstr_off(uri_path, uri_path->len - arg->len,
164+
arg->str, arg->len, flags);
165+
166+
return tfw_str_eq_cstr(uri_path, arg->str, arg->len, flags);
156167
}
157168

158169
static bool
159170
match_host(const TfwHttpReq *req, const TfwHttpMatchRule *rule)
160171
{
172+
const TfwStr *host = &req->host;
173+
const TfwHttpMatchArg *arg = &rule->arg;
161174
tfw_str_eq_flags_t flags = map_op_to_str_eq_flags(rule->op);
162175

163176
/*
@@ -172,12 +185,15 @@ match_host(const TfwHttpReq *req, const TfwHttpMatchRule *rule)
172185

173186
flags |= TFW_STR_EQ_CASEI;
174187

175-
if (req->host.len)
176-
return tfw_str_eq_cstr(&req->host, rule->arg.str,
177-
rule->arg.len, flags);
188+
if (host->len == 0)
189+
return hdr_val_eq(req, TFW_HTTP_HDR_HOST,
190+
rule->op, arg->str, arg->len, flags);
191+
192+
if (rule->op == TFW_HTTP_MATCH_O_SUFFIX)
193+
return tfw_str_eq_cstr_off(host, host->len - arg->len,
194+
arg->str, arg->len, flags);
178195

179-
return hdr_val_eq(req, TFW_HTTP_HDR_HOST, rule->arg.str,
180-
rule->arg.len, flags);
196+
return tfw_str_eq_cstr(host, arg->str, arg->len, flags);
181197
}
182198

183199
static bool
@@ -189,6 +205,7 @@ match_hdr(const TfwHttpReq *req, const TfwHttpMatchRule *rule)
189205
[TFW_HTTP_MATCH_F_HDR_HOST] = TFW_HTTP_HDR_HOST,
190206
};
191207

208+
const TfwHttpMatchArg *arg = &rule->arg;
192209
tfw_str_eq_flags_t flags = map_op_to_str_eq_flags(rule->op);
193210
tfw_http_hdr_t id = id_tbl[rule->field];
194211
BUG_ON(id < 0);
@@ -197,7 +214,7 @@ match_hdr(const TfwHttpReq *req, const TfwHttpMatchRule *rule)
197214
* TODO: case-sensitive matching for headers when required by RFC. */
198215
flags |= TFW_STR_EQ_CASEI;
199216

200-
return hdr_val_eq(req, id, rule->arg.str, rule->arg.len, flags);
217+
return hdr_val_eq(req, id, rule->op, arg->str, arg->len, flags);
201218
}
202219

203220
#define _MOVE_TO_COND(p, end, cond) \

tempesta_fw/http_match.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ typedef enum {
4343
TFW_HTTP_MATCH_O_WILDCARD,
4444
TFW_HTTP_MATCH_O_EQ,
4545
TFW_HTTP_MATCH_O_PREFIX,
46+
TFW_HTTP_MATCH_O_SUFFIX,
4647
_TFW_HTTP_MATCH_O_COUNT
4748
} tfw_http_match_op_t;
4849

tempesta_fw/sched/tfw_sched_http.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,8 @@ static const TfwCfgEnum __read_mostly tfw_sched_http_cfg_op_enum[] = {
173173
{ "*", TFW_HTTP_MATCH_O_WILDCARD },
174174
{ "eq", TFW_HTTP_MATCH_O_EQ },
175175
{ "prefix", TFW_HTTP_MATCH_O_PREFIX },
176-
/* TODO: suffix, substr, regex, case sensitive/insensitive versions. */
176+
{ "suffix", TFW_HTTP_MATCH_O_SUFFIX },
177+
/* TODO: substr, regex, case sensitive/insensitive versions. */
177178
{}
178179
};
179180

tempesta_fw/str.c

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ tfw_str_eq_cstr_pos(const TfwStr *str, const char *pos, const char *cstr,
394394

395395
r = tfw_str_eq_cstr(&tmp, cstr, cstr_len, flags);
396396

397-
*v = t; /* restore chunk */
397+
*v = t; /* Restore the chunk */
398398
goto out;
399399
}
400400

@@ -410,8 +410,57 @@ tfw_str_eq_cstr_pos(const TfwStr *str, const char *pos, const char *cstr,
410410
}
411411
EXPORT_SYMBOL(tfw_str_eq_cstr_pos);
412412

413+
/*
414+
* Compare @str starting at offset @offset with a plain C string
415+
* @cstr of size @len. Obey the comparison flags @flags.
416+
*
417+
* The function prepares a substring of @str and then calls
418+
* tfw_str_eq_cstr(). Note that a chunk of @str is modified in the
419+
* process, but the original contents is restored before the result
420+
* is returned to the caller.
421+
*/
422+
bool
423+
tfw_str_eq_cstr_off(const TfwStr *str, ssize_t offset, const char *cstr,
424+
int cstr_len, tfw_str_eq_flags_t flags)
425+
{
426+
bool ret = false;
427+
TfwStr t, tmp = *str;
428+
TfwStr *c, *end;
429+
430+
BUG_ON(TFW_STR_DUP(str));
431+
BUG_ON(!cstr || !cstr_len);
432+
433+
if (offset < 0)
434+
return false;
435+
if (offset == 0)
436+
return tfw_str_eq_cstr(str, cstr, cstr_len, flags);
437+
if (unlikely(offset + cstr_len > str->len))
438+
return false;
439+
440+
TFW_STR_FOR_EACH_CHUNK(c, &tmp, end) {
441+
if (offset >= c->len) {
442+
offset -= c->len;
443+
tmp.len -= c->len;
444+
tmp.ptr += sizeof(TfwStr);
445+
TFW_STR_CHUNKN_SUB(&tmp, 1);
446+
continue;
447+
}
448+
t = *c;
449+
c->ptr += offset;
450+
c->len -= offset;
451+
452+
ret = tfw_str_eq_cstr(&tmp, cstr, cstr_len, flags);
453+
454+
*c = t; /* Restore the chunk */
455+
break;
456+
}
457+
458+
return ret;
459+
}
460+
EXPORT_SYMBOL(tfw_str_eq_cstr_off);
461+
413462
/**
414-
* The function intentionaly brokes zero-copy string design. And should
463+
* The function intentionaly breaks zero-copy string design. And should
415464
* be used for short-strings only.
416465
*
417466
* Join all chunks of @str to a single plain C string.

tempesta_fw/str.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,8 @@ bool tfw_str_eq_cstr(const TfwStr *str, const char *cstr, int cstr_len,
245245
tfw_str_eq_flags_t flags);
246246
bool tfw_str_eq_cstr_pos(const TfwStr *str, const char *pos, const char *cstr,
247247
int cstr_len, tfw_str_eq_flags_t flags);
248+
bool tfw_str_eq_cstr_off(const TfwStr *str, ssize_t offset, const char *cstr,
249+
int cstr_len, tfw_str_eq_flags_t flags);
248250

249251
size_t tfw_str_to_cstr(const TfwStr *str, char *out_buf, int buf_size);
250252

tempesta_fw/t/unit/test_http_match.c

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,37 @@ TEST(http_match, uri_prefix)
135135
EXPECT_EQ(-1, match_id);
136136
}
137137

138+
TEST(http_match, uri_suffix)
139+
{
140+
int match_id;
141+
142+
test_mlst_add(1, TFW_HTTP_MATCH_F_URI, TFW_HTTP_MATCH_O_SUFFIX,
143+
".jpg");
144+
test_mlst_add(2, TFW_HTTP_MATCH_F_URI, TFW_HTTP_MATCH_O_SUFFIX,
145+
"/people.html");
146+
test_mlst_add(3, TFW_HTTP_MATCH_F_URI, TFW_HTTP_MATCH_O_SUFFIX,
147+
"/bar/folks.html");
148+
149+
set_tfw_str(&test_req->uri_path, "/foo/bar/picture.jpg");
150+
match_id = test_mlst_match();
151+
EXPECT_EQ(1, match_id);
152+
153+
set_tfw_str(&test_req->uri_path, "/foo/bar/people.html");
154+
match_id = test_mlst_match();
155+
EXPECT_EQ(2, match_id);
156+
157+
set_tfw_str(&test_req->uri_path, "/foo/bar/folks.html");
158+
match_id = test_mlst_match();
159+
EXPECT_EQ(3, match_id);
160+
161+
set_tfw_str(&test_req->uri_path, "../foo");
162+
match_id = test_mlst_match();
163+
EXPECT_EQ(-1, match_id);
164+
165+
set_tfw_str(&test_req->uri_path, "/foo/bar/picture.png");
166+
match_id = test_mlst_match();
167+
EXPECT_EQ(-1, match_id);
168+
}
138169
TEST(http_match, host_eq)
139170
{
140171
int match_id;
@@ -217,6 +248,64 @@ TEST(http_match, hdr_host_prefix)
217248
free_all_str();
218249
}
219250

251+
TEST(http_match, hdr_host_suffix)
252+
{
253+
create_str_pool();
254+
255+
{
256+
int match_id;
257+
258+
/* Special headers must be compound */
259+
TFW_STR2(hdr1, "Host: ", "example.biz");
260+
TFW_STR2(hdr2, "Host: ", "example.com");
261+
TFW_STR2(hdr3, "Host: ", "example.ru");
262+
TFW_STR2(hdr4, "Host: ", "eXample.COM");
263+
TFW_STR2(hdr5, "Host: ", "www");
264+
TFW_STR2(hdr6, "Host: ", "TEST.FOLKS.COM");
265+
266+
test_mlst_add(1, TFW_HTTP_MATCH_F_HDR_CONN,
267+
TFW_HTTP_MATCH_O_EQ, "Connection: Keep-Alive");
268+
test_mlst_add(2, TFW_HTTP_MATCH_F_HDR_HOST,
269+
TFW_HTTP_MATCH_O_SUFFIX, ".ru");
270+
test_mlst_add(3, TFW_HTTP_MATCH_F_HDR_HOST,
271+
TFW_HTTP_MATCH_O_SUFFIX, ".biz");
272+
test_mlst_add(4, TFW_HTTP_MATCH_F_HDR_HOST,
273+
TFW_HTTP_MATCH_O_SUFFIX, ".folks.com");
274+
test_mlst_add(5, TFW_HTTP_MATCH_F_HDR_HOST,
275+
TFW_HTTP_MATCH_O_SUFFIX, ".com");
276+
277+
set_tfw_str(&test_req->host, "example.com");
278+
match_id = test_mlst_match();
279+
EXPECT_EQ(-1, match_id);
280+
281+
test_req->h_tbl->tbl[TFW_HTTP_HDR_HOST] = *hdr1;
282+
match_id = test_mlst_match();
283+
EXPECT_EQ(3, match_id);
284+
285+
test_req->h_tbl->tbl[TFW_HTTP_HDR_HOST] = *hdr2;
286+
match_id = test_mlst_match();
287+
EXPECT_EQ(5, match_id);
288+
289+
test_req->h_tbl->tbl[TFW_HTTP_HDR_HOST] = *hdr3;
290+
match_id = test_mlst_match();
291+
EXPECT_EQ(2, match_id);
292+
293+
test_req->h_tbl->tbl[TFW_HTTP_HDR_HOST] = *hdr4;
294+
match_id = test_mlst_match();
295+
EXPECT_EQ(5, match_id);
296+
297+
test_req->h_tbl->tbl[TFW_HTTP_HDR_HOST] = *hdr5;
298+
match_id = test_mlst_match();
299+
EXPECT_EQ(-1, match_id);
300+
301+
test_req->h_tbl->tbl[TFW_HTTP_HDR_HOST] = *hdr6;
302+
match_id = test_mlst_match();
303+
EXPECT_EQ(4, match_id);
304+
}
305+
306+
free_all_str();
307+
}
308+
220309
TEST(http_match, method_eq)
221310
{
222311
int match_id;
@@ -259,8 +348,10 @@ TEST_SUITE(http_match)
259348

260349
TEST_RUN(tfw_http_match_req, returns_first_matching_rule);
261350
TEST_RUN(http_match, uri_prefix);
351+
TEST_RUN(http_match, uri_suffix);
262352
TEST_RUN(http_match, host_eq);
263353
TEST_RUN(http_match, headers_eq);
264354
TEST_RUN(http_match, hdr_host_prefix);
355+
TEST_RUN(http_match, hdr_host_suffix);
265356
TEST_RUN(http_match, method_eq);
266357
}

0 commit comments

Comments
 (0)