diff --git a/etc/tempesta_fw.conf b/etc/tempesta_fw.conf index 3d612671e..34658cba3 100644 --- a/etc/tempesta_fw.conf +++ b/etc/tempesta_fw.conf @@ -656,8 +656,8 @@ # from HTTP tables (see 'http_chain' directive). # # is one of 'location', 'proxy_pass', 'cache_bypass', 'cache_fulfill', -# 'nonidempotent' or 'hdr_add' directives (see the corresponding directives' -# description). +# 'nonidempotent', 'hdr_add' or 'http_post_validate' directives (see the +# corresponding directives' description). # # Example: # vhost app { @@ -693,7 +693,7 @@ # is a match operator, one of 'eq', 'prefix', 'suffix', or '*'. # is a verbatim string matched against URL in a request. # is one of 'proxy_pass', 'cache_bypass', 'cache_fulfill', -# 'nonidempotent', 'hdr_add' or Frang limit directives. +# 'nonidempotent', 'hdr_add', 'http_post_validate' or Frang limit directives. # # Default: # None. @@ -894,6 +894,20 @@ # /etc/ddos_redirect.html # +# TAG: http_post_validate +# +# Validate POST requests. +# Parses Content-Type header field, and rewrites it for multipart/form-data type +# of payload, to prevent evasion attacks. All parameters other than "boundary" +# are removed. +# +# Syntax: +# http_post_validate +# +# Default: +# Validation is disabled. +# + # # Frang configuration. # diff --git a/tempesta_fw/http.c b/tempesta_fw/http.c index 709125e1a..927617d25 100644 --- a/tempesta_fw/http.c +++ b/tempesta_fw/http.c @@ -2114,6 +2114,58 @@ tfw_http_set_loc_hdrs(TfwHttpMsg *hm, TfwHttpReq *req) return 0; } +/** + * Compose Content-Type header field from scratch. + * + * A POST-request with multipart/form-data payload need a boundary, which is + * supplied by a parameter in the Content-Type header field. There are strict + * instructions on how to parse that parameter (see RFC 7231 and RFC 7230), but + * application servers parse it in a non-standard way. For example, PHP checks + * whenever parameter name contains substring "boundary", and thus happily takes + * "xxboundaryxx". Such quirks are used to bypass web application firewalls. + * + * To make evasions harder, this function composes value of the Content-Type + * field from the parsed data. All parameters other than "boundary" are dropped. + */ +static int +tfw_http_recreate_content_type_multipart_hdr(TfwHttpReq *req) +{ + TfwStr replacement = { + .ptr = (TfwStr []) { + TFW_STR_FROM("Content-Type"), + TFW_STR_FROM(": "), + TFW_STR_FROM("multipart/form-data; boundary="), + req->multipart_boundary_raw, + }, + .flags = 4 << TFW_STR_CN_SHIFT, + }; + TfwStr *c = replacement.ptr; + + BUG_ON(!TFW_STR_PLAIN(&req->multipart_boundary_raw)); + replacement.len = c[0].len + c[1].len + c[2].len + c[3].len; + return tfw_http_msg_hdr_xfrm_str((TfwHttpMsg *)req, &replacement, + TFW_HTTP_HDR_CONTENT_TYPE, false); +} + +static bool +tfw_http_should_validate_post_req(TfwHttpReq *req) +{ + if (req->location && req->location->validate_post_req) + return true; + + if (!req->vhost) + return false; + + if (req->vhost->loc_dflt && req->vhost->loc_dflt->validate_post_req) + return true; + + if (req->vhost->vhost_dflt && + req->vhost->vhost_dflt->loc_dflt->validate_post_req) + return true; + + return false; +} + /** * Adjust the request before proxying it to real server. */ @@ -2143,6 +2195,15 @@ tfw_http_adjust_req(TfwHttpReq *req) if (r < 0) return r; + if (req->method == TFW_HTTP_METH_POST && + test_bit(TFW_HTTP_B_CT_MULTIPART, req->flags) && + tfw_http_should_validate_post_req(req)) + { + r = tfw_http_recreate_content_type_multipart_hdr(req); + if (r) + return r; + } + return tfw_http_set_hdr_connection(hm, BIT(TFW_HTTP_B_CONN_KA)); } diff --git a/tempesta_fw/http.h b/tempesta_fw/http.h index 1512ecb7e..3990339fd 100644 --- a/tempesta_fw/http.h +++ b/tempesta_fw/http.h @@ -229,6 +229,10 @@ enum { TFW_HTTP_B_CONN_EXTRA, /* Chunked transfer encoding. */ TFW_HTTP_B_CHUNKED, + /* Media type is multipart/form-data. */ + TFW_HTTP_B_CT_MULTIPART, + /* Multipart/form-data request have boundary parameter. */ + TFW_HTTP_B_CT_MULTIPART_HAS_BOUNDARY, /* Singular header presents more than once. */ TFW_HTTP_B_FIELD_DUPENTRY, @@ -369,6 +373,8 @@ typedef struct { * @host - host in URI, may differ from Host header; * @uri_path - path + query + fragment from URI (RFC3986.3); * @mark - special hash mark for redirects handling in session module; + * @multipart_boundary_raw - multipart boundary as is, maybe with escaped chars; + * @multipart_boundary - decoded multipart boundary; * @fwd_list - member in the queue of forwarded/backlogged requests; * @nip_list - member in the queue of non-idempotent requests; * @method - HTTP request method, one of GET/PORT/HEAD/etc; @@ -394,6 +400,8 @@ struct tfw_http_req_t { TfwStr host; TfwStr uri_path; TfwStr mark; + TfwStr multipart_boundary_raw; + TfwStr multipart_boundary; struct list_head fwd_list; struct list_head nip_list; unsigned char method; diff --git a/tempesta_fw/http_parser.c b/tempesta_fw/http_parser.c index d8f059dbd..a3a73ff34 100644 --- a/tempesta_fw/http_parser.c +++ b/tempesta_fw/http_parser.c @@ -259,7 +259,7 @@ do { \ #define __FSM_I_MOVE_n(to, n) __FSM_I_MOVE_finish_n(to, n, {}) #define __FSM_I_MOVE(to) __FSM_I_MOVE_n(to, 1) /* The same as __FSM_I_MOVE_n(), but exactly for jumps w/o data moving. */ -#define __FSM_I_JMP(to) do { goto to; } while (0) +#define __FSM_I_JMP(to) do { parser->_i_st = to; goto to; } while (0) #define __FSM_I_MATCH_MOVE_finish(alphabet, to, finish) \ do { \ @@ -713,6 +713,26 @@ enum { I_ConnOther, I_ContLen, /* Content-Length */ I_ContType, /* Content-Type */ + I_ContTypeBoundaryValue, + I_ContTypeBoundaryValueEscapedChar, + I_ContTypeBoundaryValueQuoted, + I_ContTypeBoundaryValueUnquoted, + I_ContTypeMaybeMultipart, + I_ContTypeMediaType, + I_ContTypeMultipartOWS, + I_ContTypeOtherSubtype, + I_ContTypeOtherType, + I_ContTypeOtherTypeOWS, + I_ContTypeOtherTypeSlash, + I_ContTypeParam, + I_ContTypeParamOWS, + I_ContTypeParamOther, + I_ContTypeParamValue, + I_ContTypeParamValueEscapedChar, + I_ContTypeParamValueOWS, + I_ContTypeParamValueQuoted, + I_ContTypeParamValueQuotedEnd, + I_ContTypeParamValueUnquoted, I_KeepAlive, /* Keep-Alive header */ I_KeepAliveTO, /* Keep-Alive TimeOut */ I_KeepAliveExt, @@ -792,9 +812,9 @@ enum { if (!chunk->ptr) \ chunk->ptr = p; \ __fsm_n = __try_str(field, chunk, p, __data_remain(p), \ - str->ptr, str->len); \ + (str)->ptr, (str)->len); \ if (__fsm_n > 0) { \ - if (chunk->len == str->len) { \ + if (chunk->len == (str)->len) { \ lambda; \ TRY_STR_INIT(); \ __FSM_I_MOVE_fixup_f(state, __fsm_n, field, 0); \ @@ -1339,7 +1359,7 @@ __parse_content_length(TfwHttpMsg *msg, unsigned char *data, size_t len) * Parse Content-Type header value, RFC 7231 3.1.1.5. */ static int -__parse_content_type(TfwHttpMsg *hm, unsigned char *data, size_t len) +__resp_parse_content_type(TfwHttpMsg *hm, unsigned char *data, size_t len) { int r = CSTR_NEQ; __FSM_DECLARE_VARS(hm); @@ -1375,6 +1395,341 @@ __parse_content_type(TfwHttpMsg *hm, unsigned char *data, size_t len) return r; } +static int +__strdup_multipart_boundaries(TfwHttpReq *req) +{ + unsigned char *data_raw, *data, *ptr_raw, *ptr; + TfwStr *c, *end; + + data_raw = tfw_pool_alloc(req->pool, req->multipart_boundary_raw.len); + data = tfw_pool_alloc(req->pool, req->multipart_boundary.len); + + if (!data_raw || !data) + return -ENOMEM; + + ptr_raw = data_raw; + ptr = data; + TFW_STR_FOR_EACH_CHUNK(c, &req->multipart_boundary_raw, end) { + memcpy_fast(ptr_raw, c->ptr, c->len); + ptr_raw += c->len; + if (c->flags & TFW_STR_VALUE) { + memcpy_fast(ptr, c->ptr, c->len); + ptr += c->len; + } + } + + if (ptr_raw != data_raw + req->multipart_boundary_raw.len || + ptr != data + req->multipart_boundary.len) + { + TFW_WARN("Multipart boundary string length mismatch"); + return -1; + } + + req->multipart_boundary_raw.ptr = data_raw; + req->multipart_boundary.ptr = data; + __TFW_STR_CHUNKN_SET(&req->multipart_boundary_raw, 0); + __TFW_STR_CHUNKN_SET(&req->multipart_boundary, 0); + + return 0; +} + +static int +__req_parse_content_type(TfwHttpMsg *hm, unsigned char *data, size_t len) +{ + int r = CSTR_NEQ; + TfwHttpReq *req = (TfwHttpReq *)hm; + __FSM_DECLARE_VARS(hm); + + __FSM_START(parser->_i_st) { + + __FSM_STATE(I_ContType) { + if (req->method != TFW_HTTP_METH_POST) + __FSM_I_JMP(I_EoL); + __FSM_I_JMP(I_ContTypeMediaType); + } + + __FSM_STATE(I_ContTypeMediaType) { + static const TfwStr s_multipart_form_data = + TFW_STR_FROM("multipart/form-data"); + TRY_STR_LAMBDA_fixup(&s_multipart_form_data, &parser->hdr, {}, + I_ContTypeMaybeMultipart); + if (chunk->len >= sizeof("multipart/") - 1) { + TRY_STR_INIT(); + __FSM_I_JMP(I_ContTypeOtherSubtype); + } else { + TRY_STR_INIT(); + __FSM_I_JMP(I_ContTypeOtherType); + } + } + + __FSM_STATE(I_ContTypeMaybeMultipart) { + if (c == ';') { + __set_bit(TFW_HTTP_B_CT_MULTIPART, req->flags); + __FSM_I_MOVE_fixup(I_ContTypeParamOWS, 1, 0); + } + if (IS_WS(c)) + __FSM_I_MOVE_fixup(I_ContTypeMultipartOWS, 1, 0); + if (IS_CRLF(c)) { + __set_bit(TFW_HTTP_B_CT_MULTIPART, req->flags); + goto finalize; + } + __FSM_I_JMP(I_ContTypeOtherSubtype); + } + + __FSM_STATE(I_ContTypeMultipartOWS) { + if (IS_WS(c)) + __FSM_I_MOVE_fixup(I_ContTypeMultipartOWS, 1, 0); + if (c == ';') { + __set_bit(TFW_HTTP_B_CT_MULTIPART, req->flags); + __FSM_I_MOVE_fixup(I_ContTypeParamOWS, 1, 0); + } + if (IS_CRLF(c)) { + __set_bit(TFW_HTTP_B_CT_MULTIPART, req->flags); + goto finalize; + } + return CSTR_NEQ; + } + + __FSM_STATE(I_ContTypeParamOWS) { + if (IS_WS(c)) + __FSM_I_MOVE_fixup(I_ContTypeParamOWS, 1, 0); + if (IS_CRLF(c)) + goto finalize; + __FSM_I_JMP(I_ContTypeParam); + } + + __FSM_STATE(I_ContTypeParam) { + static const TfwStr s_boundary = TFW_STR_FROM("boundary="); + if (!test_bit(TFW_HTTP_B_CT_MULTIPART, req->flags)) + __FSM_I_JMP(I_ContTypeParamOther); + + TRY_STR_LAMBDA_fixup(&s_boundary, &parser->hdr, { + /* + * Requests with multipart/form-data payload should have + * only one boundary parameter. + */ + if (__test_and_set_bit( + TFW_HTTP_B_CT_MULTIPART_HAS_BOUNDARY, req->flags)) + return CSTR_NEQ; + }, I_ContTypeBoundaryValue); + TRY_STR_INIT(); + __FSM_I_JMP(I_ContTypeParamOther); + } + + __FSM_STATE(I_ContTypeParamOther) { + __FSM_I_MATCH_MOVE_fixup(token, I_ContTypeParamOther, 0); + if (IS_CRLF(*(p + __fsm_sz))) { + /* Line terminated just after parameter name. Value is + * missing. + */ + return CSTR_NEQ; + } + if (*(p + __fsm_sz) != '=') + return CSTR_NEQ; + __FSM_I_MOVE_fixup(I_ContTypeParamValue, __fsm_sz + 1, 0); + } + + __FSM_STATE(I_ContTypeBoundaryValue) { + req->multipart_boundary_raw.len = 0; + req->multipart_boundary.len = 0; + /* + * msg->parser.hdr.ptr can't be used as a base here, since its + * value can change due to reallocation during msg->parser.hdr + * growth. Let's store chunk number instead for now. + */ + req->multipart_boundary_raw.ptr = + (void *)(size_t)TFW_STR_CHUNKN(&parser->hdr); + if (*p == '"') { + req->multipart_boundary_raw.len += 1; + __FSM_I_MOVE_fixup(I_ContTypeBoundaryValueQuoted, 1, 0); + } + __FSM_I_JMP(I_ContTypeBoundaryValueUnquoted); + } + + __FSM_STATE(I_ContTypeBoundaryValueUnquoted) { + __fsm_n = __data_remain(p); + __fsm_sz = tfw_match_token(p, __fsm_n); + if (__fsm_sz > 0) { + __msg_hdr_chunk_fixup(p, __fsm_sz); + __FSM_I_chunk_flags(TFW_STR_VALUE); + req->multipart_boundary_raw.len += __fsm_sz; + req->multipart_boundary.len += __fsm_sz; + } + if (unlikely(__fsm_sz == __fsm_n)) + return CSTR_POSTPONE; + + p += __fsm_sz; + __TFW_STR_CHUNKN_SET(&req->multipart_boundary_raw, + TFW_STR_CHUNKN(&parser->hdr) - + (size_t)req->multipart_boundary_raw.ptr); + /* __fsm_sz != __fsm_n, therefore __data_remain(p) > 0 */ + __FSM_I_JMP(I_ContTypeParamValueOWS); + } + + __FSM_STATE(I_ContTypeBoundaryValueQuoted) { + __fsm_n = __data_remain(p); + __fsm_sz = tfw_match_token(p, __fsm_n); + if (__fsm_sz > 0) { + __msg_hdr_chunk_fixup(p, __fsm_sz); + __FSM_I_chunk_flags(TFW_STR_VALUE); + req->multipart_boundary_raw.len += __fsm_sz; + req->multipart_boundary.len += __fsm_sz; + } + if (unlikely(__fsm_sz == __fsm_n)) + return CSTR_POSTPONE; + p += __fsm_sz; + + if (*p == '\\') { + req->multipart_boundary_raw.len += 1; + __FSM_I_MOVE_fixup(I_ContTypeBoundaryValueEscapedChar, + 1, 0); + } + if (IS_CRLF(*p)) { + /* Missing closing '"'. */ + return CSTR_NEQ; + } + if (*p != '"') { + /* TODO: faster qdtext/quoted-pair matcher. */ + req->multipart_boundary_raw.len += 1; + req->multipart_boundary.len += 1; + __FSM_I_MOVE_fixup(I_ContTypeBoundaryValueQuoted, 1, + TFW_STR_VALUE); + } + + /* *p == '"' */ + __msg_hdr_chunk_fixup(p, 1); + p += 1; + req->multipart_boundary_raw.len += 1; + __TFW_STR_CHUNKN_SET(&req->multipart_boundary_raw, + TFW_STR_CHUNKN(&parser->hdr) - + (size_t)req->multipart_boundary_raw.ptr); + + if (unlikely(__data_remain(p) == 0)) { + parser->_i_st = I_ContTypeParamValueOWS; + return CSTR_POSTPONE; + } + __FSM_I_JMP(I_ContTypeParamValueOWS); + } + + __FSM_STATE(I_ContTypeBoundaryValueEscapedChar) { + if (IS_CRLF(*p)) + return CSTR_NEQ; + req->multipart_boundary_raw.len += 1; + req->multipart_boundary.len += 1; + __FSM_I_MOVE_fixup(I_ContTypeBoundaryValueQuoted, 1, + TFW_STR_VALUE); + } + + __FSM_STATE(I_ContTypeParamValue) { + if (*p == '"') + __FSM_I_MOVE_fixup(I_ContTypeParamValueQuoted, 1, 0); + __FSM_I_JMP(I_ContTypeParamValueUnquoted); + } + + __FSM_STATE(I_ContTypeParamValueUnquoted) { + __FSM_I_MATCH_MOVE_fixup(token, I_ContTypeParamValueUnquoted, + TFW_STR_VALUE); + __FSM_I_MOVE_fixup(I_ContTypeParamValueOWS, __fsm_sz, 0); + } + + __FSM_STATE(I_ContTypeParamValueOWS) { + if (IS_WS(c)) + __FSM_I_MOVE_fixup(I_ContTypeParamValueOWS, 1, 0); + if (c == ';') + __FSM_I_MOVE_fixup(I_ContTypeParamOWS, 1, 0); + if (IS_CRLF(c)) + goto finalize; + return CSTR_NEQ; + } + + __FSM_STATE(I_ContTypeParamValueQuoted) { + __FSM_I_MATCH_MOVE_fixup(token, I_ContTypeParamValueQuoted, + TFW_STR_VALUE); + if (__fsm_sz > 0) { + __msg_hdr_chunk_fixup(p, __fsm_sz); + __FSM_I_chunk_flags(TFW_STR_VALUE); + } + p += __fsm_sz; + if (*p == '\\') + __FSM_I_MOVE_fixup(I_ContTypeParamValueEscapedChar, 1, + 0); + if (*p == '"') + __FSM_I_MOVE_fixup(I_ContTypeParamValueOWS, 1, 0); + if (IS_CRLF(*p)) { + /* Missing closing '"'. */ + return CSTR_NEQ; + } + /* TODO: faster qdtext/quoted-pair matcher. */ + __FSM_I_MOVE_fixup(I_ContTypeParamValueQuoted, 1, 0); + } + + __FSM_STATE(I_ContTypeParamValueEscapedChar) { + if (IS_CRLF(*p)) + return CSTR_NEQ; + __FSM_I_MOVE_fixup(I_ContTypeParamValueQuoted, 1, + TFW_STR_VALUE); + } + + __FSM_STATE(I_ContTypeOtherType) { + __FSM_I_MATCH_MOVE_fixup(token, I_ContTypeOtherType, 0); + if (IS_CRLF(*(p + __fsm_sz))) { + p += __fsm_sz; + goto finalize; + } + __FSM_I_MOVE_n(I_ContTypeOtherTypeSlash, __fsm_sz); + } + + __FSM_STATE(I_ContTypeOtherTypeSlash) { + if (c != '/') + return CSTR_NEQ; + __FSM_I_MOVE_fixup(I_ContTypeOtherSubtype, 1, 0); + } + + __FSM_STATE(I_ContTypeOtherSubtype) { + __FSM_I_MATCH_MOVE_fixup(token, I_ContTypeOtherSubtype, 0); + __FSM_I_MOVE_fixup(I_ContTypeOtherTypeOWS, __fsm_sz, 0); + } + + __FSM_STATE(I_ContTypeOtherTypeOWS) { + if (IS_WS(c)) + __FSM_I_MOVE_fixup(I_ContTypeOtherTypeOWS, 1, 0); + if (c == ';') + __FSM_I_MOVE_fixup(I_ContTypeParamOWS, 1, 0); + if (IS_CRLF(c)) + goto finalize; + return CSTR_NEQ; + } + + __FSM_STATE(I_EoL) { + __FSM_I_MATCH_MOVE_fixup(ctext_vchar, I_EoL, 0); + if (IS_CRLF(*(p + __fsm_sz))) { + p += __fsm_sz; + goto finalize; + } + return CSTR_NEQ; + } + + } /* FSM END */ +done: + return r; + +finalize: + if (req->multipart_boundary_raw.len > 0) { + req->multipart_boundary_raw.ptr = (TfwStr *)parser->hdr.ptr + + (size_t)req->multipart_boundary_raw.ptr; + + /* + * Raw value of multipart boundary is going to be used during + * Content-Type field composing. So to prevent memcpy'ing + * intersecting buffers, we have to make a separate copy. + */ + if (__strdup_multipart_boundaries(req)) + return CSTR_NEQ; + } + + return __data_off(p); +} + /** * Parse Transfer-Encoding header value, RFC 2616 14.41 and 3.6. */ @@ -3690,9 +4045,9 @@ tfw_http_parse_req(void *req_data, unsigned char *data, size_t len, TFW_HTTP_HDR_CONTENT_LENGTH); /* 'Content-Type:*OWS' is read, process field-value. */ - TFW_HTTP_PARSE_SPECHDR_VAL(Req_HdrContent_TypeV, I_ContType, - msg, __parse_content_type, - TFW_HTTP_HDR_CONTENT_TYPE); + __TFW_HTTP_PARSE_SPECHDR_VAL(Req_HdrContent_TypeV, I_ContType, + msg, __req_parse_content_type, + TFW_HTTP_HDR_CONTENT_TYPE, 0); /* 'Host:*OWS' is read, process field-value. */ TFW_HTTP_PARSE_SPECHDR_VAL(Req_HdrHostV, Req_I_H_Start, req, @@ -4774,7 +5129,7 @@ tfw_http_parse_resp(void *resp_data, unsigned char *data, size_t len, /* 'Content-Type:*OWS' is read, process field-value. */ TFW_HTTP_PARSE_SPECHDR_VAL(Resp_HdrContent_TypeV, I_ContType, - msg, __parse_content_type, + msg, __resp_parse_content_type, TFW_HTTP_HDR_CONTENT_TYPE); /* 'Date:*OWS' is read, process field-value. */ diff --git a/tempesta_fw/t/unit/test_http_parser.c b/tempesta_fw/t/unit/test_http_parser.c index 80885198b..25006b5da 100644 --- a/tempesta_fw/t/unit/test_http_parser.c +++ b/tempesta_fw/t/unit/test_http_parser.c @@ -2282,6 +2282,145 @@ TEST(http_parser, fuzzer) kernel_fpu_begin(); } +TEST(http_parser, content_type_line_parser) +{ +#define HEAD "POST / HTTP/1.1\r\nHost: localhost.localdomain\r\nContent-Type: " +#define TAIL "\nContent-Length: 0\r\nKeep-Alive: timeout=98765\r\n\r\n" + +#define CT01 "multIPart/forM-data ; bouNDary=1234567890 ; otherparam=otherval " + + FOR_REQ(HEAD CT01 TAIL) { + EXPECT_TRUE(test_bit(TFW_HTTP_B_CT_MULTIPART, req->flags)); + EXPECT_TRUE(test_bit(TFW_HTTP_B_CT_MULTIPART_HAS_BOUNDARY, + req->flags)); + EXPECT_TFWSTR_EQ(&req->multipart_boundary_raw, "1234567890"); + EXPECT_TFWSTR_EQ(&req->multipart_boundary, "1234567890"); + EXPECT_TFWSTR_EQ(&req->h_tbl->tbl[TFW_HTTP_HDR_CONTENT_TYPE], + "Content-Type: " CT01); + } + + FOR_REQ(HEAD "multipart/form-data; boundary=\"1234\\56\\\"7890\"" TAIL) { + EXPECT_TRUE(test_bit(TFW_HTTP_B_CT_MULTIPART, req->flags)); + EXPECT_TRUE(test_bit(TFW_HTTP_B_CT_MULTIPART_HAS_BOUNDARY, + req->flags)); + EXPECT_TFWSTR_EQ(&req->multipart_boundary_raw, + "\"1234\\56\\\"7890\""); + EXPECT_TFWSTR_EQ(&req->multipart_boundary, "123456\"7890"); + } + + FOR_REQ(HEAD "multipart/form-data" TAIL) { + EXPECT_TRUE(test_bit(TFW_HTTP_B_CT_MULTIPART, req->flags)); + EXPECT_FALSE(test_bit(TFW_HTTP_B_CT_MULTIPART_HAS_BOUNDARY, + req->flags)); + } + + FOR_REQ(HEAD "multipart/form-data " TAIL) { + EXPECT_TRUE(test_bit(TFW_HTTP_B_CT_MULTIPART, req->flags)); + EXPECT_FALSE(test_bit(TFW_HTTP_B_CT_MULTIPART_HAS_BOUNDARY, + req->flags)); + } + + FOR_REQ(HEAD "multipart/form-data \t" TAIL) { + EXPECT_TRUE(test_bit(TFW_HTTP_B_CT_MULTIPART, req->flags)); + EXPECT_FALSE(test_bit(TFW_HTTP_B_CT_MULTIPART_HAS_BOUNDARY, + req->flags)); + } + + FOR_REQ(HEAD "multipart/form-data1" TAIL) { + EXPECT_FALSE(test_bit(TFW_HTTP_B_CT_MULTIPART, req->flags)); + EXPECT_FALSE(test_bit(TFW_HTTP_B_CT_MULTIPART_HAS_BOUNDARY, + req->flags)); + } + + FOR_REQ(HEAD "multipart/form-data1; param=value" TAIL) { + EXPECT_FALSE(test_bit(TFW_HTTP_B_CT_MULTIPART, req->flags)); + EXPECT_FALSE(test_bit(TFW_HTTP_B_CT_MULTIPART_HAS_BOUNDARY, + req->flags)); + } + + FOR_REQ(HEAD "multihello/world" TAIL) { + EXPECT_FALSE(test_bit(TFW_HTTP_B_CT_MULTIPART, req->flags)); + EXPECT_FALSE(test_bit(TFW_HTTP_B_CT_MULTIPART_HAS_BOUNDARY, + req->flags)); + } + + FOR_REQ(HEAD "multihello/world; param=value" TAIL) { + EXPECT_FALSE(test_bit(TFW_HTTP_B_CT_MULTIPART, req->flags)); + EXPECT_FALSE(test_bit(TFW_HTTP_B_CT_MULTIPART_HAS_BOUNDARY, + req->flags)); + } + + FOR_REQ(HEAD "multipart/form-dat" TAIL) { + EXPECT_FALSE(test_bit(TFW_HTTP_B_CT_MULTIPART, req->flags)); + EXPECT_FALSE(test_bit(TFW_HTTP_B_CT_MULTIPART_HAS_BOUNDARY, + req->flags)); + } + + FOR_REQ(HEAD "multipart/form-other; param=value" TAIL) { + EXPECT_FALSE(test_bit(TFW_HTTP_B_CT_MULTIPART, req->flags)); + EXPECT_FALSE(test_bit(TFW_HTTP_B_CT_MULTIPART_HAS_BOUNDARY, + req->flags)); + } + + FOR_REQ(HEAD "multipart/form-data; xboundary=1234567890" TAIL) { + EXPECT_TRUE(test_bit(TFW_HTTP_B_CT_MULTIPART, req->flags)); + EXPECT_FALSE(test_bit(TFW_HTTP_B_CT_MULTIPART_HAS_BOUNDARY, + req->flags)); + } + + FOR_REQ(HEAD "application/octet-stream" TAIL) { + EXPECT_FALSE(test_bit(TFW_HTTP_B_CT_MULTIPART, req->flags)); + EXPECT_FALSE(test_bit(TFW_HTTP_B_CT_MULTIPART_HAS_BOUNDARY, + req->flags)); + } + + /* Multipart requests with multiple boundaries are clearly malicious. */ + EXPECT_BLOCK_REQ(HEAD"multipart/form-data; boundary=1; boundary=2"TAIL); + + /* Comma is not a valid separator here. */ + EXPECT_BLOCK_REQ(HEAD "multipart/form-data, boundary=123" TAIL); + + /* Unfinished quoted parameter value */ + EXPECT_BLOCK_REQ(HEAD "multipart/form-data; boundary=\"123" TAIL); + + /* Spaces where they do not belong */ + EXPECT_BLOCK_REQ(HEAD "multipart/form-data; boundary =123" TAIL); + EXPECT_BLOCK_REQ(HEAD "multipart/form-data; boundary= 123" TAIL); + EXPECT_BLOCK_REQ(HEAD "multipart/form-data; boundary=12 3" TAIL); + EXPECT_BLOCK_REQ(HEAD "multipart/form-data; boun dary=123" TAIL); + + /* + * Other media types are not restricted in terms of boundary parameter + * quantities. + */ + FOR_REQ(HEAD "text/plain; boundary=1; boundary=2" TAIL); + FOR_REQ(HEAD "text/plain; boundary=1; boundary=2; boundary=3" TAIL); + FOR_REQ(HEAD "textqwe/plain; boundary=1; other=3" TAIL); + + /* Parameter should be in format name=value. */ + EXPECT_BLOCK_REQ(HEAD "text/plain; name" TAIL); + EXPECT_BLOCK_REQ(HEAD "text/plain; name " TAIL); + EXPECT_BLOCK_REQ(HEAD "text/plain; name\t " TAIL); + + /* Unfinished quoted parameter value */ + EXPECT_BLOCK_REQ(HEAD "text/plain; name=\"unfinished" TAIL); + + /* Other parameter quoted values. */ + FOR_REQ(HEAD "text/plain; name=\"value\"" TAIL); + FOR_REQ(HEAD "text/plain; name=\"value\" " TAIL); + FOR_REQ(HEAD "text/plain; name=\"value\";" TAIL); + FOR_REQ(HEAD "text/plain; name=\"value\"; " TAIL); + + FOR_REQ(HEAD "text/plain; name=\"val\\\"ue\"" TAIL); + FOR_REQ(HEAD "text/plain; name=\"val\\\"ue\" " TAIL); + + /* Line ended at '\\'. */ + EXPECT_BLOCK_REQ(HEAD "text/plain; name=\"val\\" TAIL); + +#undef HEAD +#undef TAIL +} + TEST_SUITE(http_parser) { int r; @@ -2318,6 +2457,7 @@ TEST_SUITE(http_parser) TEST_RUN(http_parser, req_hop_by_hop); TEST_RUN(http_parser, resp_hop_by_hop); TEST_RUN(http_parser, fuzzer); + TEST_RUN(http_parser, content_type_line_parser); /* * Testing for correctness of redirection mark parsing (in diff --git a/tempesta_fw/vhost.c b/tempesta_fw/vhost.c index 88c846cf7..39d2327eb 100644 --- a/tempesta_fw/vhost.c +++ b/tempesta_fw/vhost.c @@ -878,6 +878,13 @@ tfw_cfgop_in_cache_bypass(TfwCfgSpec *cs, TfwCfgEntry *ce) TFW_D_CACHE_BYPASS); } +static int +tfw_cfgop_in_http_post_validate(TfwCfgSpec *cs, TfwCfgEntry *ce) +{ + tfw_vhost_entry->loc_dflt->validate_post_req = 1; + return 0; +} + static int tfw_cfgop_out_cache_fulfill(TfwCfgSpec *cs, TfwCfgEntry *ce) { @@ -894,6 +901,13 @@ tfw_cfgop_out_cache_bypass(TfwCfgSpec *cs, TfwCfgEntry *ce) TFW_D_CACHE_BYPASS); } +static int +tfw_cfgop_out_http_post_validate(TfwCfgSpec *cs, TfwCfgEntry *ce) +{ + tfw_vhosts_reconfig->vhost_dflt->loc_dflt->validate_post_req = 1; + return 0; +} + /* * Find a location directive entry. The entry is looked up * in the array that holds all location directives. @@ -1767,6 +1781,13 @@ tfw_cfgop_frang_out_http_ct_vals(TfwCfgSpec *cs, TfwCfgEntry *ce) return tfw_cfgop_frang_http_ct_vals(cs, ce, &frang_cfg); } +static int +tfw_cfgop_http_post_validate(TfwCfgSpec *cs, TfwCfgEntry *ce) +{ + tfwcfg_this_location->validate_post_req = 1; + return 0; +} + static int tfw_cfgop_frang_loc_rsp_code_block(TfwCfgSpec *cs, TfwCfgEntry *ce) { @@ -2120,6 +2141,13 @@ static TfwCfgSpec tfw_vhost_location_specs[] = { .allow_repeat = false, .allow_reconfig = true, }, + { + .name = "http_post_validate", + .handler = tfw_cfgop_http_post_validate, + .allow_none = true, + .allow_repeat = false, + .allow_reconfig = true, + }, { .name = "http_resp_code_block", .handler = tfw_cfgop_frang_loc_rsp_code_block, @@ -2155,6 +2183,14 @@ static TfwCfgSpec tfw_vhost_internal_specs[] = { .allow_repeat = true, .allow_reconfig = true, }, + { + .name = "http_post_validate", + .deflt = NULL, + .handler = tfw_cfgop_in_http_post_validate, + .allow_none = true, + .allow_repeat = false, + .allow_reconfig = true, + }, { .name = "nonidempotent", .deflt = NULL, @@ -2378,6 +2414,14 @@ static TfwCfgSpec tfw_vhost_specs[] = { .allow_repeat = true, .allow_reconfig = true, }, + { + .name = "http_post_validate", + .deflt = NULL, + .handler = tfw_cfgop_out_http_post_validate, + .allow_none = true, + .allow_repeat = true, + .allow_reconfig = true, + }, { .name = "nonidempotent", .deflt = NULL, diff --git a/tempesta_fw/vhost.h b/tempesta_fw/vhost.h index e633bb850..be8f87b8a 100644 --- a/tempesta_fw/vhost.h +++ b/tempesta_fw/vhost.h @@ -120,6 +120,7 @@ typedef struct { TfwSrvGroup *backup_sg; TfwPool *hdrs_pool; TfwHdrMods mod_hdrs[TFW_VHOST_HDRMOD_NUM]; + unsigned int validate_post_req:1; } TfwLocation; /* Cache purge configuration modes. */