diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 532480a46..d27c08f48 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -86,6 +86,10 @@ static bool interrupted_user_mode(void)
  */
 bool irq_fpu_usable(void)
 {
+#ifdef CONFIG_SECURITY_TEMPESTA
+	if (likely(in_serving_softirq()))
+		return true;
+#endif
 	return !in_interrupt() ||
 		interrupted_user_mode() ||
 		interrupted_kernel_fpu_idle();
diff --git a/crypto/api.c b/crypto/api.c
index 941cd4c6c..5fd308d8e 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -451,7 +451,11 @@ void *crypto_create_tfm(struct crypto_alg *alg,
 	tfmsize = frontend->tfmsize;
 	total = tfmsize + sizeof(*tfm) + frontend->extsize(alg);
 
+#ifdef CONFIG_SECURITY_TEMPESTA
+	mem = kzalloc(total, GFP_ATOMIC);
+#else
 	mem = kzalloc(total, GFP_KERNEL);
+#endif
 	if (mem == NULL)
 		goto out_err;
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 46bf7cc7d..b698c645b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -148,11 +148,22 @@ static inline bool dev_xmit_complete(int rc)
 # define LL_MAX_HEADER 32
 #endif
 
+#ifdef CONFIG_SECURITY_TEMPESTA
+/*
+ * For Tempesta case the most traffic is TLS encrypted, so we need the extra
+ * room for TLS record header and explicit IV on skb allocation to avoid data
+ * movement on tcp_write_xmit(). Not all skbs have TLS headers - not a big deal
+ * to allocate 16 more bytes (5 - TLS header, 8 - IV, 3 - alignment).
+ */
+#define TLS_MAX_HDR	16
+#else
+#define TLS_MAX_HDR	0
+#endif
 #if !IS_ENABLED(CONFIG_NET_IPIP) && !IS_ENABLED(CONFIG_NET_IPGRE) && \
     !IS_ENABLED(CONFIG_IPV6_SIT) && !IS_ENABLED(CONFIG_IPV6_TUNNEL)
-#define MAX_HEADER LL_MAX_HEADER
+#define MAX_HEADER (LL_MAX_HEADER + TLS_MAX_HDR)
 #else
-#define MAX_HEADER (LL_MAX_HEADER + 48)
+#define MAX_HEADER (LL_MAX_HEADER + 48 + TLS_MAX_HDR)
 #endif
 
 /*
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e6438bfa9..0d90fcf05 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -232,6 +232,12 @@
 	SKB_WITH_OVERHEAD((PAGE_SIZE << (ORDER)) - (X))
 #define SKB_MAX_HEAD(X)		(SKB_MAX_ORDER((X), 0))
 #define SKB_MAX_ALLOC		(SKB_MAX_ORDER(0, 2))
+#ifdef CONFIG_SECURITY_TEMPESTA
+#define SKB_MAX_HEADER	(PAGE_SIZE - MAX_TCP_HEADER			\
+			 - SKB_DATA_ALIGN(sizeof(struct sk_buff))	\
+			 - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) \
+			 - SKB_DATA_ALIGN(1))
+#endif
 
 /* return minimum truesize of one skb containing X bytes of data */
 #define SKB_TRUESIZE(X) ((X) +						\
@@ -854,6 +860,50 @@ struct sk_buff {
 #define SKB_ALLOC_RX		0x02
 #define SKB_ALLOC_NAPI		0x04
 
+#ifdef CONFIG_SECURITY_TEMPESTA
+/**
+ * The skb type is used only for time between @skb was inserted into TCP send
+ * queue and it's processed (first time) in tcp_write_xmit(). This time the @skb
+ * isn't scheduled yet, so we can use skb->dev for our needs to avoid extending
+ * sk_buff. We use the least significant bit to be sure that this isn't a
+ * pointer to not to break anything. TLS message type << 1 is always smaller
+ * than 0xff.
+ */
+static inline void
+tempesta_tls_skb_settype(struct sk_buff *skb, unsigned char type)
+{
+	BUG_ON(type >= 0x80);
+	WARN_ON_ONCE(skb->dev);
+
+	skb->dev = (void *)((type << 1) | 1UL);
+}
+
+static inline unsigned char
+tempesta_tls_skb_type(struct sk_buff *skb)
+{
+	unsigned long d = (unsigned long)skb->dev;
+
+	if (!(d & 1UL))
+		return 0; /* a pointer in skb->dev */
+	return d >> 1;
+}
+
+static inline void
+tempesta_tls_skb_typecp(struct sk_buff *dst, struct sk_buff *src)
+{
+	dst->dev = src->dev;
+}
+
+static inline void
+tempesta_tls_skb_clear(struct sk_buff *skb)
+{
+	unsigned long d = (unsigned long)skb->dev;
+
+	WARN_ON_ONCE(d & ~0xff);
+	skb->dev = NULL;
+}
+#endif
+
 /* Returns true if the skb was allocated from PFMEMALLOC reserves */
 static inline bool skb_pfmemalloc(const struct sk_buff *skb)
 {
@@ -972,6 +1022,7 @@ void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
 bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
 		      bool *fragstolen, int *delta_truesize);
 
+void *pg_skb_alloc(unsigned int size, gfp_t gfp_mask, int node);
 struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags,
 			    int node);
 struct sk_buff *__build_skb(void *data, unsigned int frag_size);
diff --git a/include/net/sock.h b/include/net/sock.h
index ece2126c0..4b990fe91 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -471,6 +471,11 @@ struct sock {
 	void			(*sk_state_change)(struct sock *sk);
 	void			(*sk_data_ready)(struct sock *sk);
 	void			(*sk_write_space)(struct sock *sk);
+#ifdef CONFIG_SECURITY_TEMPESTA
+	int			(*sk_write_xmit)(struct sock *sk,
+						 struct sk_buff *skb,
+						 unsigned int limit);
+#endif
 	void			(*sk_error_report)(struct sock *sk);
 	int			(*sk_backlog_rcv)(struct sock *sk,
 						  struct sk_buff *skb);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b2a6ca581..0f6bd0cf2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1719,6 +1719,9 @@ static inline void tcp_insert_write_queue_after(struct sk_buff *skb,
 						struct sk_buff *buff,
 						struct sock *sk)
 {
+#ifdef CONFIG_SECURITY_TEMPESTA
+	tempesta_tls_skb_typecp(buff, skb);
+#endif
 	__skb_queue_after(&sk->sk_write_queue, skb, buff);
 }
 
diff --git a/include/net/tls.h b/include/net/tls.h
index df950383b..4a99f03df 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -55,6 +55,13 @@
 
 #define TLS_AAD_SPACE_SIZE		13
 
+#ifdef CONFIG_SECURITY_TEMPESTA
+#define TLS_MAX_TAG_SZ			16
+/* Maximum size for required skb overhead: header, IV, tag. */
+#define TLS_MAX_OVERHEAD		(TLS_HEADER_SIZE + TLS_AAD_SPACE_SIZE \
+					 + TLS_MAX_TAG_SZ)
+#endif
+
 struct tls_sw_context {
 	struct crypto_aead *aead_send;
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4a8b1b165..92e9a635d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -219,8 +219,8 @@ __pg_pool_shrink(TfwSkbMemPool *pool)
 	return true;
 }
 
-static void *
-__pg_skb_alloc(unsigned int size, gfp_t gfp_mask, int node)
+void *
+pg_skb_alloc(unsigned int size, gfp_t gfp_mask, int node)
 {
 	/*
 	 * Don't disable softirq if hardirqs are already disabled to avoid
@@ -319,6 +319,7 @@ do {									\
 #undef PREEMPT_CTX_DISABLE
 #undef PREEMPT_CTX_ENABLE
 }
+EXPORT_SYMBOL(pg_skb_alloc);
 #endif
 
 static void
@@ -455,7 +456,7 @@ __alloc_skb(unsigned int size, gfp_t gfp_mask, int flags, int node)
 	if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
 		gfp_mask |= __GFP_MEMALLOC;
 
-	if (!(skb = __pg_skb_alloc(n, gfp_mask, node)))
+	if (!(skb = pg_skb_alloc(n, gfp_mask, node)))
 		return NULL;
 
 	data = (u8 *)skb + skb_sz;
@@ -1706,7 +1707,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	if (skb_pfmemalloc(skb))
 		gfp_mask |= __GFP_MEMALLOC;
 #ifdef CONFIG_SECURITY_TEMPESTA
-	data = __pg_skb_alloc(size, gfp_mask, NUMA_NO_NODE);
+	data = pg_skb_alloc(size, gfp_mask, NUMA_NO_NODE);
 	if (!data)
 		goto nodata;
 	size = SKB_WITH_OVERHEAD(PG_ALLOC_SZ(size));
@@ -5493,7 +5494,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
 		gfp_mask |= __GFP_MEMALLOC;
 #ifdef CONFIG_SECURITY_TEMPESTA
 	size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-	data = __pg_skb_alloc(size, gfp_mask, NUMA_NO_NODE);
+	data = pg_skb_alloc(size, gfp_mask, NUMA_NO_NODE);
 	if (!data)
 		return -ENOMEM;
 	size = SKB_WITH_OVERHEAD(PG_ALLOC_SZ(size));
@@ -5632,7 +5633,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
 		gfp_mask |= __GFP_MEMALLOC;
 #ifdef CONFIG_SECURITY_TEMPESTA
 	size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-	data = __pg_skb_alloc(size, gfp_mask, NUMA_NO_NODE);
+	data = pg_skb_alloc(size, gfp_mask, NUMA_NO_NODE);
 	if (!data)
 		return -ENOMEM;
 	size = SKB_WITH_OVERHEAD(PG_ALLOC_SZ(size));
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c6e64e8f5..4e79cb5ef 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -37,6 +37,9 @@
 #define pr_fmt(fmt) "TCP: " fmt
 
 #include <net/tcp.h>
+#ifdef CONFIG_SECURITY_TEMPESTA
+#include <net/tls.h>
+#endif
 
 #include <linux/compiler.h>
 #include <linux/gfp.h>
@@ -2330,7 +2333,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 							  cwnd_quota,
 							  max_segs),
 						    nonagle);
-
+#ifdef CONFIG_SECURITY_TEMPESTA
+		if (sk->sk_write_xmit && tempesta_tls_skb_type(skb)) {
+			if (unlikely(limit <= TLS_MAX_OVERHEAD)) {
+				net_warn_ratelimited("%s: too small MSS %u"
+						     " for TLS\n",
+						     __func__, mss_now);
+				break;
+			}
+			if (limit > TLS_MAX_PAYLOAD_SIZE + TLS_MAX_OVERHEAD)
+				limit = TLS_MAX_PAYLOAD_SIZE;
+			else
+				limit -= TLS_MAX_OVERHEAD;
+		}
+#endif
 		if (skb->len > limit &&
 		    unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
 			break;
@@ -2339,7 +2355,34 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 			clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
 		if (tcp_small_queue_check(sk, skb, 0))
 			break;
-
+#ifdef CONFIG_SECURITY_TEMPESTA
+		/*
+		 * This isn't the only place where tcp_transmit_skb() is called,
+		 * but this is the only place where we are from Tempesta FW
+		 * ss_do_send(), so call the hook here. At this point, with
+		 * @limit adjusted above, we have exact understanding how much
+		 * data we can and should send to the peer, so we call
+		 * encryption here and get the best TLS record size.
+		 *
+		 * TODO Sometimes HTTP servers send headers and response body in
+		 * different TCP segments, so coalesce skbs for transmission to
+		 * get 16KB (maximum size of TLS message).
+		 */
+		if (sk->sk_write_xmit && tempesta_tls_skb_type(skb)) {
+			result = sk->sk_write_xmit(sk, skb, limit);
+			if (unlikely(result)) {
+				net_warn_ratelimited(
+					"Tempesta: cannot encrypt data (%d),"
+					" reset a TLS connection.\n", result);
+				/*
+				 * FIXME #984 WARNING: at net/core/stream.c:205
+				 * sk_stream_kill_queues+0x106/0x120
+				 */
+				tcp_reset(sk);
+				break;
+			}
+		}
+#endif
 		if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
 			break;