From ccf9983e9812f670eb20831c5a6a56cf456f9f02 Mon Sep 17 00:00:00 2001
From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com>
Date: Wed, 4 Dec 2024 07:38:35 +0100
Subject: [PATCH] helper/hasher general improvements (#41863) (#41874)

* Fix a TOCTOU by opening the file handle and then doing stat(), instead of
   doning stat() and then opening.
 * Make sure this is a regular file, otherwise you could trick auditbeat into
   hashing an infinite source like a pipe.
 * Allow for rate (but not file size) to be infinite, this is needed for an
   upcoming new backend for module/system/process.
 * Finally, fix error messages that show up on ECS, see below.

before:
```
failed to hash executable /d/e/beats/x-pack/auditbeat/auditbeat for PID 50751: failed to hash file /d/e/beats/x-pack/auditbeat/auditbeat: hasher: file size 143673152 exceeds max file size
```

after:
```
failed to hash executable /d/e/beats/x-pack/auditbeat/auditbeat for PID 50804: size 143673152 exceeds max file size
```

(cherry picked from commit 8b38b65ec4fb301c084b9a5741f3e7490c82b4b3)

Co-authored-by: Christiano Haesbaert <haesbaert@elastic.co>
---
 CHANGELOG.next.asciidoc           |  4 +++
 auditbeat/helper/hasher/hasher.go | 43 +++++++++++++++++++++----------
 2 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc
index fbf70b93dafa..31976b4bf36a 100644
--- a/CHANGELOG.next.asciidoc
+++ b/CHANGELOG.next.asciidoc
@@ -80,6 +80,10 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
 
 *Auditbeat*
 
+- auditd: Request status from a separate socket to avoid data congestion {pull}41207[41207]
+- auditd: Use ECS `event.type: end` instead of `stop` for SERVICE_STOP, DAEMON_ABORT, and DAEMON_END messages. {pull}41558[41558]
+- auditd: Update syscall names for Linux 6.11. {pull}41558[41558]
+- hasher: Geneneral improvements and fixes. {pull}41863[41863]
 
 *Filebeat*
 
diff --git a/auditbeat/helper/hasher/hasher.go b/auditbeat/helper/hasher/hasher.go
index d0612ab731df..4f97f5705a74 100644
--- a/auditbeat/helper/hasher/hasher.go
+++ b/auditbeat/helper/hasher/hasher.go
@@ -26,7 +26,6 @@ import (
 	"fmt"
 	"hash"
 	"io"
-	"os"
 	"strings"
 	"time"
 
@@ -124,7 +123,7 @@ type FileTooLargeError struct {
 
 // Error returns the error message for FileTooLargeError.
 func (e FileTooLargeError) Error() string {
-	return fmt.Sprintf("hasher: file size %d exceeds max file size", e.fileSize)
+	return fmt.Sprintf("size %d exceeds max file size", e.fileSize)
 }
 
 // Config contains the configuration of a FileHasher.
@@ -174,11 +173,19 @@ type FileHasher struct {
 
 // NewFileHasher creates a new FileHasher.
 func NewFileHasher(c Config, done <-chan struct{}) (*FileHasher, error) {
+	var limit rate.Limit
+
+	if c.ScanRateBytesPerSec == 0 {
+		limit = rate.Inf
+	} else {
+		limit = rate.Limit(c.ScanRateBytesPerSec)
+	}
+
 	return &FileHasher{
 		config: c,
 		limiter: rate.NewLimiter(
-			rate.Limit(c.ScanRateBytesPerSec), // Rate
-			int(c.MaxFileSizeBytes),           // Burst
+			limit,                   // Rate
+			int(c.MaxFileSizeBytes), // Burst
 		),
 		done: done,
 	}, nil
@@ -186,16 +193,26 @@ func NewFileHasher(c Config, done <-chan struct{}) (*FileHasher, error) {
 
 // HashFile hashes the contents of a file.
 func (hasher *FileHasher) HashFile(path string) (map[HashType]Digest, error) {
-	info, err := os.Stat(path)
+	f, err := file.ReadOpen(path)
 	if err != nil {
-		return nil, fmt.Errorf("failed to stat file %v: %w", path, err)
+		return nil, fmt.Errorf("open: %w", err)
+	}
+	defer f.Close()
+
+	info, err := f.Stat()
+	if err != nil {
+		return nil, fmt.Errorf("stat: %w", err)
+	}
+	if !info.Mode().IsRegular() {
+		return nil, fmt.Errorf("not a regular file")
+
 	}
 
 	// Throttle reading and hashing rate.
 	if len(hasher.config.HashTypes) > 0 {
 		err = hasher.throttle(info.Size())
 		if err != nil {
-			return nil, fmt.Errorf("failed to hash file %v: %w", path, err)
+			return nil, err
 		}
 	}
 
@@ -210,15 +227,9 @@ func (hasher *FileHasher) HashFile(path string) (map[HashType]Digest, error) {
 	}
 
 	if len(hashes) > 0 {
-		f, err := file.ReadOpen(path)
-		if err != nil {
-			return nil, fmt.Errorf("failed to open file for hashing: %w", err)
-		}
-		defer f.Close()
-
 		hashWriter := multiWriter(hashes)
 		if _, err := io.Copy(hashWriter, f); err != nil {
-			return nil, fmt.Errorf("failed to calculate file hashes: %w", err)
+			return nil, err
 		}
 
 		nameToHash := make(map[HashType]Digest, len(hashes))
@@ -233,6 +244,10 @@ func (hasher *FileHasher) HashFile(path string) (map[HashType]Digest, error) {
 }
 
 func (hasher *FileHasher) throttle(fileSize int64) error {
+	// Burst is ignored if limit is infinite, so check it manually
+	if hasher.limiter.Limit() == rate.Inf && int(fileSize) > hasher.limiter.Burst() {
+		return FileTooLargeError{fileSize}
+	}
 	reservation := hasher.limiter.ReserveN(time.Now(), int(fileSize))
 	if !reservation.OK() {
 		// File is bigger than the max file size