Skip to content

Commit

Permalink
Merge pull request #650 from tempesta-tech/ik-hbh-parser
Browse files Browse the repository at this point in the history
Don't cache and don't forward hop-by-hop headers
  • Loading branch information
vankoven authored Dec 29, 2016
2 parents ad1ce9d + fbf0082 commit 72a24ae
Show file tree
Hide file tree
Showing 9 changed files with 890 additions and 171 deletions.
40 changes: 15 additions & 25 deletions tempesta_fw/cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,26 +131,6 @@ enum {
TFW_CACHE_REPLICA,
};

/*
* Non-cacheable hop-by-hop response headers in terms of RFC 2068.
* The table is used if server doesn't specify Cache-Control no-cache
* directive (RFC 7234 5.2.2.2) explicitly.
*
* Server header isn't defined as hop-by-hop by the RFC, but we don't show
* protected server to world.
*
* We don't store the headers in cache and create then from scratch.
* Adding a header is faster then modify it, so this speeds up headers
* adjusting as well as saves cache storage.
*
* TODO process Cache-Control no-cache
*/
static const int hbh_hdrs[] = {
[0 ... TFW_HTTP_HDR_RAW] = 0,
[TFW_HTTP_HDR_SERVER] = 1,
[TFW_HTTP_HDR_CONNECTION] = 1,
};

typedef struct {
int cpu[NR_CPUS];
atomic_t cpu_idx;
Expand Down Expand Up @@ -764,9 +744,15 @@ tfw_cache_copy_resp(TfwCacheEntry *ce, TfwHttpResp *resp, TfwHttpReq *req,
ce->hdr_len = 0;
ce->hdr_num = resp->h_tbl->off;
FOR_EACH_HDR_FIELD(field, end1, resp) {
n = field - resp->h_tbl->tbl;
/* Skip hop-by-hop headers. */
h = (n < TFW_HTTP_HDR_RAW && hbh_hdrs[n]) ? &empty : field;
if (!(field->flags & TFW_STR_HBH_HDR)) {
h = field;
} else if (field - resp->h_tbl->tbl < TFW_HTTP_HDR_RAW) {
h = &empty;
} else {
--ce->hdr_num;
continue;
}
n = tfw_cache_copy_hdr(&p, &trec, h, &tot_len);
if (n < 0) {
TFW_ERR("Cache: cannot copy HTTP header\n");
Expand Down Expand Up @@ -809,7 +795,6 @@ tfw_cache_copy_resp(TfwCacheEntry *ce, TfwHttpResp *resp, TfwHttpReq *req,
static size_t
__cache_entry_size(TfwHttpResp *resp, TfwHttpReq *req)
{
long n;
size_t size = CE_BODY_SIZE;
TfwStr *h, *hdr, *hdr_end, *dup, *dup_end, empty = {};

Expand All @@ -820,8 +805,13 @@ __cache_entry_size(TfwHttpResp *resp, TfwHttpReq *req)
/* Add all the headers size */
FOR_EACH_HDR_FIELD(hdr, hdr_end, resp) {
/* Skip hop-by-hop headers. */
n = hdr - resp->h_tbl->tbl;
h = (n < TFW_HTTP_HDR_RAW && hbh_hdrs[n]) ? &empty : hdr;
if (!(hdr->flags & TFW_STR_HBH_HDR))
h = hdr;
else if (hdr - resp->h_tbl->tbl < TFW_HTTP_HDR_RAW)
h = &empty;
else
continue;

if (!TFW_STR_DUP(h)) {
size += sizeof(TfwCStr);
size += h->len ? (h->len + SLEN(S_CRLF)) : 0;
Expand Down
15 changes: 12 additions & 3 deletions tempesta_fw/http.c
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,8 @@ static int
tfw_http_set_hdr_connection(TfwHttpMsg *hm, int conn_flg)
{
if (((hm->flags & __TFW_HTTP_CONN_MASK) == conn_flg)
&& (!TFW_STR_EMPTY(&hm->h_tbl->tbl[TFW_HTTP_HDR_CONNECTION])))
&& (!TFW_STR_EMPTY(&hm->h_tbl->tbl[TFW_HTTP_HDR_CONNECTION]))
&& !(hm->flags & TFW_HTTP_CONN_EXTRA))
return 0;

switch (conn_flg) {
Expand All @@ -617,7 +618,7 @@ tfw_http_set_hdr_connection(TfwHttpMsg *hm, int conn_flg)
}
}

/*
/**
* Add/Replace/Remove Keep-Alive header field to/from HTTP message.
*/
static int
Expand All @@ -630,7 +631,7 @@ tfw_http_set_hdr_keep_alive(TfwHttpMsg *hm, int conn_flg)

switch (conn_flg) {
case TFW_HTTP_CONN_CLOSE:
r = TFW_HTTP_MSG_HDR_DEL(hm, "Keep-Alive", TFW_HTTP_HDR_RAW);
r = TFW_HTTP_MSG_HDR_DEL(hm, "Keep-Alive", TFW_HTTP_HDR_KEEP_ALIVE);
if (unlikely(r && r != -ENOENT)) {
TFW_WARN("Cannot delete Keep-Alive header (%d)\n", r);
return r;
Expand Down Expand Up @@ -736,6 +737,10 @@ tfw_http_adjust_req(TfwHttpReq *req)
if (r)
return r;

r = tfw_http_msg_del_hbh_hdrs(hm);
if (r < 0)
return r;

return tfw_http_set_hdr_connection(hm, TFW_HTTP_CONN_KA);
}

Expand All @@ -754,6 +759,10 @@ tfw_http_adjust_resp(TfwHttpResp *resp, TfwHttpReq *req)
if (r < 0)
return r;

r = tfw_http_msg_del_hbh_hdrs(hm);
if (r < 0)
return r;

r = tfw_http_set_hdr_keep_alive(hm, conn_flg);
if (r < 0)
return r;
Expand Down
111 changes: 73 additions & 38 deletions tempesta_fw/http.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,47 +135,16 @@ typedef struct {
time_t expires;
} TfwCacheControl;

/**
* We use goto/switch-driven automaton, so compiler typically generates binary
* search code over jump labels, so it gives log(N) lookup complexity where
* N is number of states. However, DFA for full HTTP processing can be quite
* large and log(N) becomes expensive and hard to code.
*
* So we use states space splitting to avoid states explosion.
* @_i_st is used to save current state and go to interior sub-automaton
* (e.g. process OWS using @state while current state is saved in @_i_st
* or using @_i_st parse value of a header described.
*
* @to_go - remaining number of bytes to process in the data chunk;
* (limited by single packet size and never exceeds 64KB)
* @state - current parser state;
* @_i_st - helping (interior) state;
* @to_read - remaining number of bytes to read;
* @_acc - integer accumulator for parsing chunked integers;
* @_date - accumulator for a date in date related headers;
* @_hdr_tag - stores header id which must be closed on generic EoL handling
* (see RGEN_EOL());
* @_tmp_chunk - currently parsed (sub)string, possibly chunked;
* @hdr - currently parsed header.
*/
typedef struct {
unsigned short to_go;
int state;
int _i_st;
int to_read;
unsigned long _acc;
time_t _date;
unsigned int _hdr_tag;
TfwStr _tmp_chunk;
TfwStr hdr;
} TfwHttpParser;

/**
* Http headers table.
*
* Singular headers (in terms of RFC 7230 3.2.2) go first to protect header
* repetition attacks. See __hdr_is_singular() and don't forget to
* update the static headers array when add a new singular header here.
* If the new header is hop-by-hop (must not be forwarded and cached by Tempesta)
* it must be listed in __hbh_parser_init_req()/__hbh_parser_init_resp() for
* unconditionally hop-by-hop header or in __parse_connection() otherwize.
* If the header is end-to-end it must be listed in __hbh_parser_add_data().
*
* Note: don't forget to update __http_msg_hdr_val() upon adding a new header.
*
Expand All @@ -197,6 +166,7 @@ typedef enum {

TFW_HTTP_HDR_CONNECTION = TFW_HTTP_HDR_NONSINGULAR,
TFW_HTTP_HDR_X_FORWARDED_FOR,
TFW_HTTP_HDR_KEEP_ALIVE,
TFW_HTTP_HDR_TRANSFER_ENCODING,

/* Start of list of generic (raw) headers. */
Expand All @@ -216,12 +186,76 @@ typedef struct {
+ sizeof(TfwStr) * (s))
#define TFW_HHTBL_SZ(o) TFW_HHTBL_EXACTSZ(__HHTBL_SZ(o))

/** Maximum of hop-by-hop tokens listed in Connection header. */
#define TFW_HBH_TOKENS_MAX 16

/**
* Non-cacheable hop-by-hop headers in terms of RFC 7230.
*
* We don't store the headers in cache and create them from scratch if needed.
* Adding a header is faster then modify it, so this speeds up headers
* adjusting as well as saves cache storage.
*
* Headers unconditionaly treated as hop-by-hop must be listed in
* __hbh_parser_init_req()/__hbh_parser_init_resp() functions and must be
* members of Special headers.
* group.
*
* @spec - bit array for special headers. Hop-by-hop special header is
* stored as (0x1 << tfw_http_hdr_t[hid]);
* @raw - table of raw headers names, parsed form connection field;
* @off - offset of last added raw header name;
*/
typedef struct {
unsigned int spec;
unsigned int off;
TfwStr raw[TFW_HBH_TOKENS_MAX];
} TfwHttpHbhHdrs;

/**
* We use goto/switch-driven automaton, so compiler typically generates binary
* search code over jump labels, so it gives log(N) lookup complexity where
* N is number of states. However, DFA for full HTTP processing can be quite
* large and log(N) becomes expensive and hard to code.
*
* So we use states space splitting to avoid states explosion.
* @_i_st is used to save current state and go to interior sub-automaton
* (e.g. process OWS using @state while current state is saved in @_i_st
* or using @_i_st parse value of a header described.
*
* @to_go - remaining number of bytes to process in the data chunk;
* (limited by single packet size and never exceeds 64KB)
* @state - current parser state;
* @_i_st - helping (interior) state;
* @to_read - remaining number of bytes to read;
* @_hdr_tag - stores header id which must be closed on generic EoL handling
* (see RGEN_EOL());
* @_acc - integer accumulator for parsing chunked integers;
* @_tmp_chunk - currently parsed (sub)string, possibly chunked;
* @hdr - currently parsed header.
* @hbh_parser - list of special and raw headers names to be treated as
* hop-by-hop
*/
typedef struct {
unsigned short to_go;
int state;
int _i_st;
int to_read;
unsigned long _acc;
time_t _date;
unsigned int _hdr_tag;
TfwStr _tmp_chunk;
TfwStr hdr;
TfwHttpHbhHdrs hbh_parser;
} TfwHttpParser;

/* Common flags for requests and responses. */
#define TFW_HTTP_CONN_CLOSE 0x000001
#define TFW_HTTP_CONN_KA 0x000002
#define __TFW_HTTP_CONN_MASK (TFW_HTTP_CONN_CLOSE | TFW_HTTP_CONN_KA)
#define TFW_HTTP_CHUNKED 0x000004
#define TFW_HTTP_MSG_SENT 0x000008
#define TFW_HTTP_CONN_EXTRA 0x000004
#define TFW_HTTP_CHUNKED 0x000008
#define TFW_HTTP_MSG_SENT 0x000010

/* Request flags */
#define TFW_HTTP_HAS_STICKY 0x000100
Expand Down Expand Up @@ -265,6 +299,7 @@ typedef struct {
* @content_length - the value of Content-Length header field;
* @conn - connection which the message was received on;
* @jtstamp - time the message has been received, in jiffies;
* @keep_alive - the value of timeout specified in Keep-Alive header;
* @crlf - pointer to CRLF between headers and body;
* @body - pointer to the body of a message;
*
Expand All @@ -280,6 +315,7 @@ typedef struct {
unsigned int flags; \
unsigned long content_length; \
unsigned long jtstamp; \
unsigned int keep_alive; \
TfwConnection *conn; \
void (*destructor)(void *msg); \
TfwStr crlf; \
Expand Down Expand Up @@ -342,7 +378,6 @@ typedef struct {
TFW_HTTP_MSG_COMMON;
TfwStr s_line;
unsigned short status;
unsigned int keep_alive;
time_t date;
} TfwHttpResp;

Expand Down
Loading

0 comments on commit 72a24ae

Please sign in to comment.