From f8f96046ac06cb9b02c387d4997fc711c9586338 Mon Sep 17 00:00:00 2001 From: Daniel Jaglowski Date: Fri, 27 Sep 2024 15:26:44 -0400 Subject: [PATCH] [pkg/ottl] Add GetXML Converter (#35462) --- .chloggen/ottl-get-xml.yaml | 27 +++++ pkg/ottl/e2e/e2e_test.go | 6 + pkg/ottl/ottlfuncs/README.md | 32 ++++++ pkg/ottl/ottlfuncs/func_get_xml.go | 62 ++++++++++ pkg/ottl/ottlfuncs/func_get_xml_test.go | 144 ++++++++++++++++++++++++ pkg/ottl/ottlfuncs/functions.go | 1 + 6 files changed, 272 insertions(+) create mode 100644 .chloggen/ottl-get-xml.yaml create mode 100644 pkg/ottl/ottlfuncs/func_get_xml.go create mode 100644 pkg/ottl/ottlfuncs/func_get_xml_test.go diff --git a/.chloggen/ottl-get-xml.yaml b/.chloggen/ottl-get-xml.yaml new file mode 100644 index 000000000000..3e5556b3ba19 --- /dev/null +++ b/.chloggen/ottl-get-xml.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: pkg/ottl + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add GetXML Converter + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [35462] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/pkg/ottl/e2e/e2e_test.go b/pkg/ottl/e2e/e2e_test.go index 97ff3ad107ef..b908319382a4 100644 --- a/pkg/ottl/e2e/e2e_test.go +++ b/pkg/ottl/e2e/e2e_test.go @@ -444,6 +444,12 @@ func Test_e2e_converters(t *testing.T) { tCtx.GetLogRecord().Attributes().PutInt("test", 1) }, }, + { + statement: `set(attributes["test"], GetXML("12", "/a//b"))`, + want: func(tCtx ottllog.TransformContext) { + tCtx.GetLogRecord().Attributes().PutStr("test", "12") + }, + }, { statement: `set(attributes["test"], Hex(1.0))`, want: func(tCtx ottllog.TransformContext) { diff --git a/pkg/ottl/ottlfuncs/README.md b/pkg/ottl/ottlfuncs/README.md index 8edd407d1d1d..a4f0281c4d2c 100644 --- a/pkg/ottl/ottlfuncs/README.md +++ b/pkg/ottl/ottlfuncs/README.md @@ -418,6 +418,7 @@ Available Converters: - [ExtractGrokPatterns](#extractgrokpatterns) - [FNV](#fnv) - [Format](#format) +- [GetXML](#getxml) - [Hex](#hex) - [Hour](#hour) - [Hours](#hours) @@ -742,6 +743,37 @@ Examples: - `Format("%04d-%02d-%02d", [Year(Now()), Month(Now()), Day(Now())])` - `Format("%s/%s/%04d-%02d-%02d.log", [attributes["hostname"], body["program"], Year(Now()), Month(Now()), Day(Now())])` + +### GetXML + +`GetXML(target, xpath)` + +The `GetXML` Converter returns an XML string with selected elements. + +`target` is a Getter that returns a string. This string should be in XML format. +If `target` is not a string, nil, or is not valid xml, `GetXML` will return an error. + +`xpath` is a string that specifies an [XPath](https://www.w3.org/TR/1999/REC-xpath-19991116/) expression that +selects one or more elements. Currently, this converter only supports selecting elements. + +Examples: + +Get all elements at the root of the document with tag "a" + +- `GetXML(body, "/a")` + +Gel all elements anywhere in the document with tag "a" + +- `GetXML(body, "//a")` + +Get the first element at the root of the document with tag "a" + +- `GetXML(body, "/a[1]")` + +Get all elements in the document with tag "a" that have an attribute "b" with value "c" + +- `GetXML(body, "//a[@b='c']")` + ### Hex `Hex(value)` diff --git a/pkg/ottl/ottlfuncs/func_get_xml.go b/pkg/ottl/ottlfuncs/func_get_xml.go new file mode 100644 index 000000000000..d5390b62da63 --- /dev/null +++ b/pkg/ottl/ottlfuncs/func_get_xml.go @@ -0,0 +1,62 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs" + +import ( + "context" + "fmt" + + "github.com/antchfx/xmlquery" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" +) + +type GetXMLArguments[K any] struct { + Target ottl.StringGetter[K] + XPath string +} + +func NewGetXMLFactory[K any]() ottl.Factory[K] { + return ottl.NewFactory("GetXML", &GetXMLArguments[K]{}, createGetXMLFunction[K]) +} + +func createGetXMLFunction[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[K], error) { + args, ok := oArgs.(*GetXMLArguments[K]) + + if !ok { + return nil, fmt.Errorf("GetXML args must be of type *GetXMLAguments[K]") + } + + if err := validateXPath(args.XPath); err != nil { + return nil, err + } + + return getXML(args.Target, args.XPath), nil +} + +// getXML returns a XML formatted string that is a result of matching elements from the target XML. +func getXML[K any](target ottl.StringGetter[K], xPath string) ottl.ExprFunc[K] { + return func(ctx context.Context, tCtx K) (any, error) { + var doc *xmlquery.Node + if targetVal, err := target.Get(ctx, tCtx); err != nil { + return nil, err + } else if doc, err = parseNodesXML(targetVal); err != nil { + return nil, err + } + + nodes, err := xmlquery.QueryAll(doc, xPath) + if err != nil { + return nil, err + } + + result := &xmlquery.Node{Type: xmlquery.DocumentNode} + for _, n := range nodes { + if n.Type != xmlquery.ElementNode { + continue + } + xmlquery.AddChild(result, n) + } + return result.OutputXML(false), nil + } +} diff --git a/pkg/ottl/ottlfuncs/func_get_xml_test.go b/pkg/ottl/ottlfuncs/func_get_xml_test.go new file mode 100644 index 000000000000..26b8bfde5bc9 --- /dev/null +++ b/pkg/ottl/ottlfuncs/func_get_xml_test.go @@ -0,0 +1,144 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs" + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" +) + +func Test_GetXML(t *testing.T) { + tests := []struct { + name string + document string + xPath string + want string + }{ + { + name: "get single element", + document: ``, + xPath: "/a/b", + want: ``, + }, + { + name: "get single complex element", + document: `hello`, + xPath: "/a", + want: `hello`, + }, + { + name: "get uniform elements from same parent", + document: `helloworld`, + xPath: "/a/b", + want: `helloworld`, + }, + { + name: "get nonuniform elements from same parent", + document: `helloworld`, + xPath: "/a/*", + want: `helloworld`, + }, + { + name: "get elements from various places", + document: `123`, + xPath: "/a//x", + want: `123`, + }, + { + name: "get filtered elements from various places", + document: `123`, + xPath: "/a//x[@env='prod']", + want: `13`, + }, + { + name: "ignore empty", + document: ``, + xPath: "/", + want: ``, + }, + { + name: "ignore declaration", + document: ``, + xPath: "/*", + want: ``, + }, + { + name: "ignore comments", + document: ``, + xPath: "/*", + want: ``, + }, + { + name: "ignore attribute selection", + document: ``, + xPath: "/@foo", + want: ``, + }, + { + name: "ignore text selection", + document: `hello`, + xPath: "/a/text()", + want: ``, + }, + { + name: "ignore chardata selection", + document: ``, + xPath: "/a/text()", + want: ``, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + factory := NewGetXMLFactory[any]() + exprFunc, err := factory.CreateFunction( + ottl.FunctionContext{}, + &GetXMLArguments[any]{ + Target: ottl.StandardStringGetter[any]{ + Getter: func(_ context.Context, _ any) (any, error) { + return tt.document, nil + }, + }, + XPath: tt.xPath, + }) + assert.NoError(t, err) + + result, err := exprFunc(context.Background(), nil) + assert.NoError(t, err) + assert.Equal(t, tt.want, result) + }) + } +} + +func TestCreateGetXMLFunc(t *testing.T) { + factory := NewGetXMLFactory[any]() + fCtx := ottl.FunctionContext{} + + // Invalid arg type + exprFunc, err := factory.CreateFunction(fCtx, nil) + assert.Error(t, err) + assert.Nil(t, exprFunc) + + // Invalid XPath should error on function creation + exprFunc, err = factory.CreateFunction( + fCtx, &GetXMLArguments[any]{ + XPath: "!", + }) + assert.Error(t, err) + assert.Nil(t, exprFunc) + + // Invalid XML should error on function execution + exprFunc, err = factory.CreateFunction( + fCtx, &GetXMLArguments[any]{ + Target: invalidXMLGetter(), + XPath: "/", + }) + assert.NoError(t, err) + assert.NotNil(t, exprFunc) + _, err = exprFunc(context.Background(), nil) + assert.Error(t, err) +} diff --git a/pkg/ottl/ottlfuncs/functions.go b/pkg/ottl/ottlfuncs/functions.go index ca165bf1b999..fc61975c6a09 100644 --- a/pkg/ottl/ottlfuncs/functions.go +++ b/pkg/ottl/ottlfuncs/functions.go @@ -46,6 +46,7 @@ func converters[K any]() []ottl.Factory[K] { NewExtractPatternsFactory[K](), NewExtractGrokPatternsFactory[K](), NewFnvFactory[K](), + NewGetXMLFactory[K](), NewHourFactory[K](), NewHoursFactory[K](), NewIntFactory[K](),