diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 532480a46..d27c08f48 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -86,6 +86,10 @@ static bool interrupted_user_mode(void) */ bool irq_fpu_usable(void) { +#ifdef CONFIG_SECURITY_TEMPESTA + if (likely(in_serving_softirq())) + return true; +#endif return !in_interrupt() || interrupted_user_mode() || interrupted_kernel_fpu_idle(); diff --git a/crypto/api.c b/crypto/api.c index 941cd4c6c..5fd308d8e 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -451,7 +451,11 @@ void *crypto_create_tfm(struct crypto_alg *alg, tfmsize = frontend->tfmsize; total = tfmsize + sizeof(*tfm) + frontend->extsize(alg); +#ifdef CONFIG_SECURITY_TEMPESTA + mem = kzalloc(total, GFP_ATOMIC); +#else mem = kzalloc(total, GFP_KERNEL); +#endif if (mem == NULL) goto out_err; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 46bf7cc7d..b698c645b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -148,11 +148,22 @@ static inline bool dev_xmit_complete(int rc) # define LL_MAX_HEADER 32 #endif +#ifdef CONFIG_SECURITY_TEMPESTA +/* + * For Tempesta case the most traffic is TLS encrypted, so we need the extra + * room for TLS record header and explicit IV on skb allocation to avoid data + * movement on tcp_write_xmit(). Not all skbs have TLS headers - not a big deal + * to allocate 16 more bytes (5 - TLS header, 8 - IV, 3 - alignment). + */ +#define TLS_MAX_HDR 16 +#else +#define TLS_MAX_HDR 0 +#endif #if !IS_ENABLED(CONFIG_NET_IPIP) && !IS_ENABLED(CONFIG_NET_IPGRE) && \ !IS_ENABLED(CONFIG_IPV6_SIT) && !IS_ENABLED(CONFIG_IPV6_TUNNEL) -#define MAX_HEADER LL_MAX_HEADER +#define MAX_HEADER (LL_MAX_HEADER + TLS_MAX_HDR) #else -#define MAX_HEADER (LL_MAX_HEADER + 48) +#define MAX_HEADER (LL_MAX_HEADER + 48 + TLS_MAX_HDR) #endif /* diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e6438bfa9..0d90fcf05 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -232,6 +232,12 @@ SKB_WITH_OVERHEAD((PAGE_SIZE << (ORDER)) - (X)) #define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0)) #define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2)) +#ifdef CONFIG_SECURITY_TEMPESTA +#define SKB_MAX_HEADER (PAGE_SIZE - MAX_TCP_HEADER \ + - SKB_DATA_ALIGN(sizeof(struct sk_buff)) \ + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) \ + - SKB_DATA_ALIGN(1)) +#endif /* return minimum truesize of one skb containing X bytes of data */ #define SKB_TRUESIZE(X) ((X) + \ @@ -854,6 +860,50 @@ struct sk_buff { #define SKB_ALLOC_RX 0x02 #define SKB_ALLOC_NAPI 0x04 +#ifdef CONFIG_SECURITY_TEMPESTA +/** + * The skb type is used only for time between @skb was inserted into TCP send + * queue and it's processed (first time) in tcp_write_xmit(). This time the @skb + * isn't scheduled yet, so we can use skb->dev for our needs to avoid extending + * sk_buff. We use the least significant bit to be sure that this isn't a + * pointer to not to break anything. TLS message type << 1 is always smaller + * than 0xff. + */ +static inline void +tempesta_tls_skb_settype(struct sk_buff *skb, unsigned char type) +{ + BUG_ON(type >= 0x80); + WARN_ON_ONCE(skb->dev); + + skb->dev = (void *)((type << 1) | 1UL); +} + +static inline unsigned char +tempesta_tls_skb_type(struct sk_buff *skb) +{ + unsigned long d = (unsigned long)skb->dev; + + if (!(d & 1UL)) + return 0; /* a pointer in skb->dev */ + return d >> 1; +} + +static inline void +tempesta_tls_skb_typecp(struct sk_buff *dst, struct sk_buff *src) +{ + dst->dev = src->dev; +} + +static inline void +tempesta_tls_skb_clear(struct sk_buff *skb) +{ + unsigned long d = (unsigned long)skb->dev; + + WARN_ON_ONCE(d & ~0xff); + skb->dev = NULL; +} +#endif + /* Returns true if the skb was allocated from PFMEMALLOC reserves */ static inline bool skb_pfmemalloc(const struct sk_buff *skb) { @@ -972,6 +1022,7 @@ void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, bool *fragstolen, int *delta_truesize); +void *pg_skb_alloc(unsigned int size, gfp_t gfp_mask, int node); struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags, int node); struct sk_buff *__build_skb(void *data, unsigned int frag_size); diff --git a/include/net/sock.h b/include/net/sock.h index ece2126c0..4b990fe91 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -471,6 +471,11 @@ struct sock { void (*sk_state_change)(struct sock *sk); void (*sk_data_ready)(struct sock *sk); void (*sk_write_space)(struct sock *sk); +#ifdef CONFIG_SECURITY_TEMPESTA + int (*sk_write_xmit)(struct sock *sk, + struct sk_buff *skb, + unsigned int limit); +#endif void (*sk_error_report)(struct sock *sk); int (*sk_backlog_rcv)(struct sock *sk, struct sk_buff *skb); diff --git a/include/net/tcp.h b/include/net/tcp.h index b2a6ca581..0f6bd0cf2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1719,6 +1719,9 @@ static inline void tcp_insert_write_queue_after(struct sk_buff *skb, struct sk_buff *buff, struct sock *sk) { +#ifdef CONFIG_SECURITY_TEMPESTA + tempesta_tls_skb_typecp(buff, skb); +#endif __skb_queue_after(&sk->sk_write_queue, skb, buff); } diff --git a/include/net/tls.h b/include/net/tls.h index df950383b..4a99f03df 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -55,6 +55,13 @@ #define TLS_AAD_SPACE_SIZE 13 +#ifdef CONFIG_SECURITY_TEMPESTA +#define TLS_MAX_TAG_SZ 16 +/* Maximum size for required skb overhead: header, IV, tag. */ +#define TLS_MAX_OVERHEAD (TLS_HEADER_SIZE + TLS_AAD_SPACE_SIZE \ + + TLS_MAX_TAG_SZ) +#endif + struct tls_sw_context { struct crypto_aead *aead_send; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4a8b1b165..92e9a635d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -219,8 +219,8 @@ __pg_pool_shrink(TfwSkbMemPool *pool) return true; } -static void * -__pg_skb_alloc(unsigned int size, gfp_t gfp_mask, int node) +void * +pg_skb_alloc(unsigned int size, gfp_t gfp_mask, int node) { /* * Don't disable softirq if hardirqs are already disabled to avoid @@ -319,6 +319,7 @@ do { \ #undef PREEMPT_CTX_DISABLE #undef PREEMPT_CTX_ENABLE } +EXPORT_SYMBOL(pg_skb_alloc); #endif static void @@ -455,7 +456,7 @@ __alloc_skb(unsigned int size, gfp_t gfp_mask, int flags, int node) if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX)) gfp_mask |= __GFP_MEMALLOC; - if (!(skb = __pg_skb_alloc(n, gfp_mask, node))) + if (!(skb = pg_skb_alloc(n, gfp_mask, node))) return NULL; data = (u8 *)skb + skb_sz; @@ -1706,7 +1707,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; #ifdef CONFIG_SECURITY_TEMPESTA - data = __pg_skb_alloc(size, gfp_mask, NUMA_NO_NODE); + data = pg_skb_alloc(size, gfp_mask, NUMA_NO_NODE); if (!data) goto nodata; size = SKB_WITH_OVERHEAD(PG_ALLOC_SZ(size)); @@ -5493,7 +5494,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, gfp_mask |= __GFP_MEMALLOC; #ifdef CONFIG_SECURITY_TEMPESTA size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - data = __pg_skb_alloc(size, gfp_mask, NUMA_NO_NODE); + data = pg_skb_alloc(size, gfp_mask, NUMA_NO_NODE); if (!data) return -ENOMEM; size = SKB_WITH_OVERHEAD(PG_ALLOC_SZ(size)); @@ -5632,7 +5633,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, gfp_mask |= __GFP_MEMALLOC; #ifdef CONFIG_SECURITY_TEMPESTA size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - data = __pg_skb_alloc(size, gfp_mask, NUMA_NO_NODE); + data = pg_skb_alloc(size, gfp_mask, NUMA_NO_NODE); if (!data) return -ENOMEM; size = SKB_WITH_OVERHEAD(PG_ALLOC_SZ(size)); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c6e64e8f5..4e79cb5ef 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -37,6 +37,9 @@ #define pr_fmt(fmt) "TCP: " fmt #include +#ifdef CONFIG_SECURITY_TEMPESTA +#include +#endif #include #include @@ -2330,7 +2333,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, cwnd_quota, max_segs), nonagle); - +#ifdef CONFIG_SECURITY_TEMPESTA + if (sk->sk_write_xmit && tempesta_tls_skb_type(skb)) { + if (unlikely(limit <= TLS_MAX_OVERHEAD)) { + net_warn_ratelimited("%s: too small MSS %u" + " for TLS\n", + __func__, mss_now); + break; + } + if (limit > TLS_MAX_PAYLOAD_SIZE + TLS_MAX_OVERHEAD) + limit = TLS_MAX_PAYLOAD_SIZE; + else + limit -= TLS_MAX_OVERHEAD; + } +#endif if (skb->len > limit && unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) break; @@ -2339,7 +2355,34 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags); if (tcp_small_queue_check(sk, skb, 0)) break; - +#ifdef CONFIG_SECURITY_TEMPESTA + /* + * This isn't the only place where tcp_transmit_skb() is called, + * but this is the only place where we are from Tempesta FW + * ss_do_send(), so call the hook here. At this point, with + * @limit adjusted above, we have exact understanding how much + * data we can and should send to the peer, so we call + * encryption here and get the best TLS record size. + * + * TODO Sometimes HTTP servers send headers and response body in + * different TCP segments, so coalesce skbs for transmission to + * get 16KB (maximum size of TLS message). + */ + if (sk->sk_write_xmit && tempesta_tls_skb_type(skb)) { + result = sk->sk_write_xmit(sk, skb, limit); + if (unlikely(result)) { + net_warn_ratelimited( + "Tempesta: cannot encrypt data (%d)," + " reset a TLS connection.\n", result); + /* + * FIXME #984 WARNING: at net/core/stream.c:205 + * sk_stream_kill_queues+0x106/0x120 + */ + tcp_reset(sk); + break; + } + } +#endif if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) break;