Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LogQL: Pattern Parser #3837

Merged
merged 28 commits into from
Jun 15, 2021
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
e4856ca
The beginning of a fun story.
cyriltovena May 5, 2021
f46d46b
Working on adding ragel.
cyriltovena May 12, 2021
c9028b2
Merge remote-tracking branch 'upstream/main' into token
cyriltovena May 12, 2021
cd4d896
Adding AST parsing with Yacc and Ragel.
cyriltovena May 13, 2021
4195c92
Got a pattern parser working.
cyriltovena May 18, 2021
7035f9f
Setup tests and the matches algorithm.
cyriltovena May 19, 2021
a013f82
moar tests case.
cyriltovena May 19, 2021
beb4c53
Add some validation for the pattern expression.
cyriltovena May 19, 2021
16a1f94
Hooking to LogQL + performance boost.
cyriltovena May 19, 2021
9be47d8
Merge remote-tracking branch 'upstream/main' into token
cyriltovena Jun 2, 2021
b83c512
Adds documentation
cyriltovena Jun 10, 2021
611d15c
Improve bound check.
cyriltovena Jun 10, 2021
00ce458
Removes generated files from being linted.
cyriltovena Jun 10, 2021
411f574
Update docs/sources/logql/_index.md
cyriltovena Jun 10, 2021
1f77197
Update docs/sources/logql/_index.md
cyriltovena Jun 10, 2021
707782a
Review feedback
cyriltovena Jun 10, 2021
a5dd5c2
Update docs/sources/logql/_index.md
cyriltovena Jun 10, 2021
055db8d
Update docs/sources/logql/_index.md
cyriltovena Jun 15, 2021
570076f
Update docs/sources/logql/_index.md
cyriltovena Jun 15, 2021
bdb7c1e
Update docs/sources/logql/_index.md
cyriltovena Jun 15, 2021
2bda200
Update docs/sources/logql/_index.md
cyriltovena Jun 15, 2021
88d40b7
Update docs/sources/logql/_index.md
cyriltovena Jun 15, 2021
bab1961
Update docs/sources/logql/_index.md
cyriltovena Jun 15, 2021
609e0ea
Update docs/sources/logql/_index.md
cyriltovena Jun 15, 2021
2e26709
Update docs/sources/logql/_index.md
cyriltovena Jun 15, 2021
c9c29b7
Update docs/sources/logql/_index.md
cyriltovena Jun 15, 2021
a773bcb
Update docs/sources/logql/_index.md
cyriltovena Jun 15, 2021
d6f95eb
Docs suggestions
cyriltovena Jun 15, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .drone/drone.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,28 @@ workspace:

steps:
- name: test
image: grafana/loki-build-image:0.14.0
image: grafana/loki-build-image:0.15.0
commands:
- make BUILD_IN_CONTAINER=false test
depends_on:
- clone

- name: lint
image: grafana/loki-build-image:0.14.0
image: grafana/loki-build-image:0.15.0
commands:
- make BUILD_IN_CONTAINER=false lint
depends_on:
- clone

- name: check-generated-files
image: grafana/loki-build-image:0.14.0
image: grafana/loki-build-image:0.15.0
commands:
- make BUILD_IN_CONTAINER=false check-generated-files
depends_on:
- clone

- name: check-mod
image: grafana/loki-build-image:0.14.0
image: grafana/loki-build-image:0.15.0
commands:
- make BUILD_IN_CONTAINER=false check-mod
depends_on:
Expand Down
3 changes: 3 additions & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ run:
# no need to include all autogenerated files, we confidently recognize
# autogenerated files. If it's not please let us know.
skip-files:
- .*.pb.go
- .*.y.go
- .*.rl.go
# output configuration options
output:
# colored-line-number|line-number|json|tab|checkstyle, default is "colored-line-number"
Expand Down
43 changes: 33 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
.PHONY: push-images push-latest save-images load-images promtail-image loki-image build-image
.PHONY: bigtable-backup, push-bigtable-backup
.PHONY: benchmark-store, drone, check-mod
.PHONY: migrate migrate-image lint-markdown
.PHONY: migrate migrate-image lint-markdown ragel

SHELL = /usr/bin/env bash

Expand Down Expand Up @@ -38,7 +38,7 @@ DOCKER_IMAGE_DIRS := $(patsubst %/Dockerfile,%,$(DOCKERFILES))
# make BUILD_IN_CONTAINER=false target
# or you can override this with an environment variable
BUILD_IN_CONTAINER ?= true
BUILD_IMAGE_VERSION := 0.14.0
BUILD_IMAGE_VERSION := 0.15.0

# Docker image info
IMAGE_PREFIX ?= grafana
Expand Down Expand Up @@ -87,6 +87,10 @@ PROTO_GOS := $(patsubst %.proto,%.pb.go,$(PROTO_DEFS))
YACC_DEFS := $(shell find . $(DONT_FIND) -type f -name *.y -print)
YACC_GOS := $(patsubst %.y,%.y.go,$(YACC_DEFS))

# Ragel Files
RAGEL_DEFS := $(shell find . $(DONT_FIND) -type f -name *.rl -print)
RAGEL_GOS := $(patsubst %.rl,%.rl.go,$(RAGEL_DEFS))

# Promtail UI files
PROMTAIL_GENERATED_FILE := clients/pkg/promtail/server/ui/assets_vfsdata.go
PROMTAIL_UI_FILES := $(shell find ./clients/pkg/promtail/server/ui -type f -name assets_vfsdata.go -prune -o -print)
Expand Down Expand Up @@ -126,8 +130,8 @@ binfmt:
all: promtail logcli loki loki-canary check-generated-files

# This is really a check for the CI to make sure generated files are built and checked in manually
check-generated-files: touch-protobuf-sources yacc protos clients/pkg/promtail/server/ui/assets_vfsdata.go
@if ! (git diff --exit-code $(YACC_GOS) $(PROTO_GOS) $(PROMTAIL_GENERATED_FILE)); then \
check-generated-files: touch-protobuf-sources yacc ragel protos clients/pkg/promtail/server/ui/assets_vfsdata.go
@if ! (git diff --exit-code $(YACC_GOS) $(RAGEL_GOS) $(PROTO_GOS) $(PROMTAIL_GENERATED_FILE)); then \
echo "\nChanges found in generated files"; \
echo "Run 'make check-generated-files' and commit the changes to fix this error."; \
echo "If you are actively developing these files you can ignore this error"; \
Expand All @@ -147,7 +151,7 @@ touch-protobuf-sources:
# Logcli #
##########

logcli: yacc cmd/logcli/logcli
logcli: yacc ragel cmd/logcli/logcli

logcli-image:
$(SUDO) docker build -t $(IMAGE_PREFIX)/logcli:$(IMAGE_TAG) -f cmd/logcli/Dockerfile .
Expand All @@ -160,8 +164,8 @@ cmd/logcli/logcli: $(APP_GO_FILES) cmd/logcli/main.go
# Loki #
########

loki: protos yacc cmd/loki/loki
loki-debug: protos yacc cmd/loki/loki-debug
loki: protos yacc ragel cmd/loki/loki
loki-debug: protos yacc ragel cmd/loki/loki-debug

cmd/loki/loki: $(APP_GO_FILES) cmd/loki/main.go
CGO_ENABLED=0 go build $(GO_FLAGS) -o $@ ./$(@D)
Expand All @@ -175,7 +179,7 @@ cmd/loki/loki-debug: $(APP_GO_FILES) cmd/loki/main.go
# Loki-Canary #
###############

loki-canary: protos yacc cmd/loki-canary/loki-canary
loki-canary: protos yacc ragel cmd/loki-canary/loki-canary

cmd/loki-canary/loki-canary: $(APP_GO_FILES) cmd/loki-canary/main.go
CGO_ENABLED=0 go build $(GO_FLAGS) -o $@ ./$(@D)
Expand Down Expand Up @@ -206,8 +210,8 @@ PROMTAIL_DEBUG_GO_FLAGS = $(DYN_DEBUG_GO_FLAGS)
endif
endif

promtail: yacc clients/cmd/promtail/promtail
promtail-debug: yacc clients/cmd/promtail/promtail-debug
promtail: yacc ragel clients/cmd/promtail/promtail
promtail-debug: yacc ragel clients/cmd/promtail/promtail-debug

promtail-clean-assets:
rm -rf clients/pkg/promtail/server/ui/assets_vfsdata.go
Expand Down Expand Up @@ -308,6 +312,25 @@ else
rm ${@}.back
endif

#########
# Ragels #
#########

ragel: $(RAGEL_GOS)

%.rl.go: %.rl
ifeq ($(BUILD_IN_CONTAINER),true)
@mkdir -p $(shell pwd)/.pkg
@mkdir -p $(shell pwd)/.cache
$(SUDO) docker run $(RM) $(TTY) -i \
-v $(shell pwd)/.cache:/go/cache$(MOUNT_FLAGS) \
-v $(shell pwd)/.pkg:/go/pkg$(MOUNT_FLAGS) \
-v $(shell pwd):/src/loki$(MOUNT_FLAGS) \
$(IMAGE_PREFIX)/loki-build-image:$(BUILD_IMAGE_VERSION) $@;
else
ragel -Z $< -o $@
endif

#############
# Protobufs #
#############
Expand Down
59 changes: 57 additions & 2 deletions docs/sources/logql/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,10 @@ In case of errors, for instance if the line is not in the expected format, the l

If an extracted label key name already exists in the original log stream, the extracted label key will be suffixed with the `_extracted` keyword to make the distinction between the two labels. You can forcefully override the original label using a [label formatter expression](#labels-format-expression). However if an extracted key appears twice, only the latest label value will be kept.

We support currently support [json](#json), [logfmt](#logfmt), [regexp](#regexp) and [unpack](#unpack) parsers.
We support currently support [json](#json), [logfmt](#logfmt), [pattern](#pattern), [regexp](#regexp) and [unpack](#unpack) parsers.
cyriltovena marked this conversation as resolved.
Show resolved Hide resolved

It's easier to use the predefined parsers like `json` and `logfmt` when you can, falling back to `regexp` when the log lines have unusual structure. Multiple parsers can be used during the same log pipeline which is useful when you want to parse complex logs. ([see examples](#multiple-parsers))
It's easier to use the predefined parsers like `json` and `logfmt` when you can, otherwise `pattern` and `regexp` parser can be used when the log lines have unusual structure. However the `pattern` parser is easier and faster to write, but also perform better than the `regexp` one.
cyriltovena marked this conversation as resolved.
Show resolved Hide resolved
Multiple parsers can be used during the same log pipeline which is useful when you want to parse complex logs. ([see examples](#multiple-parsers))
cyriltovena marked this conversation as resolved.
Show resolved Hide resolved

##### Json

Expand Down Expand Up @@ -277,6 +278,60 @@ will get those labels extracted:
"status" => "200"
```

##### pattern
cyriltovena marked this conversation as resolved.
Show resolved Hide resolved

The pattern parser allow to explicitly extract fields from your log lines by defining a pattern expression which matches how the log line is structured.

For example the following nginx log line:
dannykopping marked this conversation as resolved.
Show resolved Hide resolved
cyriltovena marked this conversation as resolved.
Show resolved Hide resolved

```log
0.191.12.2 - - [10/Jun/2021:09:14:29 +0000] "GET /api/plugins/versioncheck HTTP/1.1" 200 2 "-" "Go-http-client/2.0" "13.76.247.102, 34.120.177.193" "TLSv1.2" "US" ""
```

Can be parsed with the expression:

`<ip> - - <_> "<method> <uri> <_>" <status> <size> <_> "<agent>" <_>`

and will extract the following fields
cyriltovena marked this conversation as resolved.
Show resolved Hide resolved

```kv
"ip" => "0.191.12.2"
"method" => "GET"
"uri" => "/api/plugins/versioncheck"
"status" => "200"
"size" => "2"
"agent" => "Go-http-client/2.0"
```

A pattern expression is composed of captures and literals.

A capture is noted `<example>` where `example` is the name of the field that will be extracted.
Captures can be unnamed using `<_>`, in which case the capture skips matched content.
cyriltovena marked this conversation as resolved.
Show resolved Hide resolved

Captures are matched from the beginning or the previous set of literals, to the end or the next set of literals.
cyriltovena marked this conversation as resolved.
Show resolved Hide resolved
If a capture is not matched, the pattern parser will stop.

Literals can be any sequence of UTF-8 characters, including whitespace.
cyriltovena marked this conversation as resolved.
Show resolved Hide resolved

By default a pattern expression is anchored at the start, this means if the expression start with literals, the log line must also start with the same set of literals. You can use `<_>` at the beginning if you do not want to anchor your expression at the start.
cyriltovena marked this conversation as resolved.
Show resolved Hide resolved

For example, given the following log line:

```log
level=debug ts=2021-06-10T09:24:13.472094048Z caller=logging.go:66 traceID=0568b66ad2d9294c msg="POST /loki/api/v1/push (204) 16.652862ms"
```

If you want to match from `msg="`, you can use the following expression:

```pattern
<_> msg="<method> <path> (<status>) <latency>"
```
cyriltovena marked this conversation as resolved.
Show resolved Hide resolved

A pattern expression is invalid if:

- it does not contain any named capture.
- it contains two consecutive captures not separated by whitespace.
cyriltovena marked this conversation as resolved.
Show resolved Hide resolved

##### regexp

Unlike the logfmt and json, which extract implicitly all values and takes no parameters, the **regexp** parser takes a single parameter `| regexp "<re>"` which is the regular expression using the [Golang](https://golang.org/) [RE2 syntax](https://github.com/google/re2/wiki/Syntax).
Expand Down
2 changes: 1 addition & 1 deletion loki-build-image/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ RUN GO111MODULE=on go get github.com/fatih/[email protected]
FROM golang:1.16.2-buster
RUN apt-get update && \
apt-get install -qy \
musl gnupg \
musl gnupg ragel \
file zip unzip jq gettext\
protobuf-compiler libprotobuf-dev \
libsystemd-dev && \
Expand Down
11 changes: 7 additions & 4 deletions pkg/logql/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,8 @@ func (e *labelParserExpr) Stage() (log.Stage, error) {
return log.NewRegexpParser(e.param)
case OpParserTypeUnpack:
return log.NewUnpackParser(), nil
case OpParserTypePattern:
return log.NewPatternParser(e.param)
default:
return nil, fmt.Errorf("unknown parser operator: %s", e.op)
}
Expand Down Expand Up @@ -601,10 +603,11 @@ const (
OpTypeLTE = "<="

// parsers
OpParserTypeJSON = "json"
OpParserTypeLogfmt = "logfmt"
OpParserTypeRegexp = "regexp"
OpParserTypeUnpack = "unpack"
OpParserTypeJSON = "json"
OpParserTypeLogfmt = "logfmt"
OpParserTypeRegexp = "regexp"
OpParserTypeUnpack = "unpack"
OpParserTypePattern = "pattern"

OpFmtLine = "line_format"
OpFmtLabel = "label_format"
Expand Down
12 changes: 12 additions & 0 deletions pkg/logql/ast_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ func Test_logSelectorExpr_String(t *testing.T) {
{`{foo="bar", bar!="baz"} != "bip" !~ ".+bop" | json`, true},
{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | logfmt`, true},
{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | unpack | foo>5`, true},
{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | pattern "<foo> bar <buzz>" | foo>5`, true},
{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | logfmt | b>=10GB`, true},
{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | regexp "(?P<foo>foo|bar)"`, true},
{`{foo="bar"} |= "baz" |~ "blip" != "flip" !~ "flap" | regexp "(?P<foo>foo|bar)" | ( ( foo<5.01 , bar>20ms ) or foo="bar" ) | line_format "blip{{.boop}}bap" | label_format foo=bar,bar="blip{{.blop}}"`, true},
Expand Down Expand Up @@ -69,6 +70,7 @@ func Test_SampleExpr_String(t *testing.T) {
`sum(count_over_time({job="mysql"} | json [5m] offset 10m))`,
`sum(count_over_time({job="mysql"} | logfmt [5m]))`,
`sum(count_over_time({job="mysql"} | logfmt [5m] offset 10m))`,
`sum(count_over_time({job="mysql"} | pattern "<foo> bar <buzz>" | json [5m]))`,
`sum(count_over_time({job="mysql"} | unpack | json [5m]))`,
`sum(count_over_time({job="mysql"} | regexp "(?P<foo>foo|bar)" [5m]))`,
`sum(count_over_time({job="mysql"} | regexp "(?P<foo>foo|bar)" [5m] offset 10m))`,
Expand Down Expand Up @@ -358,6 +360,8 @@ func Test_parserExpr_Parser(t *testing.T) {
{"json", OpParserTypeJSON, "", log.NewJSONParser(), false},
{"unpack", OpParserTypeUnpack, "", log.NewUnpackParser(), false},
{"logfmt", OpParserTypeLogfmt, "", log.NewLogfmtParser(), false},
{"pattern", OpParserTypePattern, "<foo> bar <buzz>", mustNewPatternParser("<foo> bar <buzz>"), false},
{"pattern err", OpParserTypePattern, "bar", nil, true},
{"regexp", OpParserTypeRegexp, "(?P<foo>foo)", mustNewRegexParser("(?P<foo>foo)"), false},
{"regexp err ", OpParserTypeRegexp, "foo", nil, true},
}
Expand Down Expand Up @@ -389,6 +393,14 @@ func mustNewRegexParser(re string) log.Stage {
return r
}

func mustNewPatternParser(p string) log.Stage {
r, err := log.NewPatternParser(p)
if err != nil {
panic(err)
}
return r
}

func Test_canInjectVectorGrouping(t *testing.T) {
tests := []struct {
vecOp string
Expand Down
3 changes: 2 additions & 1 deletion pkg/logql/expr.y
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ import (
OPEN_PARENTHESIS CLOSE_PARENTHESIS BY WITHOUT COUNT_OVER_TIME RATE SUM AVG MAX MIN COUNT STDDEV STDVAR BOTTOMK TOPK
BYTES_OVER_TIME BYTES_RATE BOOL JSON REGEXP LOGFMT PIPE LINE_FMT LABEL_FMT UNWRAP AVG_OVER_TIME SUM_OVER_TIME MIN_OVER_TIME
MAX_OVER_TIME STDVAR_OVER_TIME STDDEV_OVER_TIME QUANTILE_OVER_TIME BYTES_CONV DURATION_CONV DURATION_SECONDS_CONV
FIRST_OVER_TIME LAST_OVER_TIME ABSENT_OVER_TIME LABEL_REPLACE UNPACK OFFSET
FIRST_OVER_TIME LAST_OVER_TIME ABSENT_OVER_TIME LABEL_REPLACE UNPACK OFFSET PATTERN

// Operators are listed with increasing precedence.
%left <binOp> OR
Expand Down Expand Up @@ -246,6 +246,7 @@ labelParser:
| LOGFMT { $$ = newLabelParserExpr(OpParserTypeLogfmt, "") }
| REGEXP STRING { $$ = newLabelParserExpr(OpParserTypeRegexp, $2) }
| UNPACK { $$ = newLabelParserExpr(OpParserTypeUnpack, "") }
| PATTERN STRING { $$ = newLabelParserExpr(OpParserTypePattern, $2) }
;

jsonExpressionParser:
Expand Down
Loading