From 417e48ae1c485e046c64bd8fbdb4baefc097fdc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonatan=20M=C3=A4nnchen?= Date: Sat, 1 Apr 2023 02:12:02 +0200 Subject: [PATCH] Implement Parser / Composer --- .credo.exs | 14 +- .github/workflows/part_test.yml | 2 + .gitmodules | 3 + README.md | 8 +- lib/purl.ex | 237 +++++++++++++++++++++- lib/purl/composer.ex | 56 ++++++ lib/purl/error/duplicate_qualifier.ex | 18 ++ lib/purl/error/invalid_field.ex | 20 ++ lib/purl/error/invalid_scheme.ex | 19 ++ lib/purl/error/special_case_failed.ex | 11 + lib/purl/parser.ex | 276 ++++++++++++++++++++++++++ lib/purl/special_case.ex | 168 ++++++++++++++++ mix.exs | 3 +- test/purl_test.exs | 71 ++++++- test/spec | 1 + 15 files changed, 893 insertions(+), 14 deletions(-) create mode 100644 .gitmodules create mode 100644 lib/purl/composer.ex create mode 100644 lib/purl/error/duplicate_qualifier.ex create mode 100644 lib/purl/error/invalid_field.ex create mode 100644 lib/purl/error/invalid_scheme.ex create mode 100644 lib/purl/error/special_case_failed.ex create mode 100644 lib/purl/parser.ex create mode 100644 lib/purl/special_case.ex create mode 160000 test/spec diff --git a/.credo.exs b/.credo.exs index dde4ffc..666b797 100644 --- a/.credo.exs +++ b/.credo.exs @@ -17,7 +17,6 @@ # ## Consistency Checks # - {Credo.Check.Consistency.ExceptionNames, []}, {Credo.Check.Consistency.LineEndings, []}, {Credo.Check.Consistency.ParameterPatternMatching, []}, {Credo.Check.Consistency.SpaceAroundOperators, []}, @@ -82,7 +81,6 @@ {Credo.Check.Refactor.FilterCount, []}, {Credo.Check.Refactor.FilterFilter, []}, {Credo.Check.Refactor.RejectReject, []}, - {Credo.Check.Refactor.RedundantWithClauseResult, []}, # ## Warnings @@ -123,7 +121,6 @@ {Credo.Check.Readability.SinglePipe, []}, {Credo.Check.Readability.Specs, [include_defp: true]}, {Credo.Check.Readability.StrictModuleLayout, []}, - {Credo.Check.Refactor.ABCSize, []}, {Credo.Check.Refactor.AppendSingleItem, []}, {Credo.Check.Refactor.DoubleBooleanNegation, []}, {Credo.Check.Refactor.IoPuts, []}, @@ -134,6 +131,16 @@ {Credo.Check.Warning.UnsafeToAtom, []} ], disabled: [ + # + ## Consistency Checks + # + {Credo.Check.Consistency.ExceptionNames, []}, + + # + ## Refactoring Opportunities + # + {Credo.Check.Refactor.RedundantWithClauseResult, []}, + # # Controversial and experimental checks (opt-in, just move the check to `:enabled` # and be sure to use `mix credo --strict` to see low priority checks) @@ -144,6 +151,7 @@ {Credo.Check.Readability.SingleFunctionToBlockPipe, []}, {Credo.Check.Readability.WithCustomTaggedTuple, []}, {Credo.Check.Readability.OnePipePerLine, []}, + {Credo.Check.Refactor.ABCSize, []}, {Credo.Check.Refactor.FilterReject, []}, {Credo.Check.Refactor.MapMap, []}, {Credo.Check.Refactor.ModuleDependencies, []}, diff --git a/.github/workflows/part_test.yml b/.github/workflows/part_test.yml index d3ea693..0baf545 100644 --- a/.github/workflows/part_test.yml +++ b/.github/workflows/part_test.yml @@ -70,6 +70,8 @@ jobs: steps: - uses: actions/checkout@v3 + with: + submodules: 'true' - uses: erlef/setup-beam@v1 id: setupBEAM with: diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..1dcf6ac --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "test/spec"] + path = test/spec + url = git@github.com:package-url/purl-spec.git diff --git a/README.md b/README.md index 31ecadc..6ad2744 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,13 @@ See [the documentation][docs]. ## Usage -TODO +```elixir +iex> Purl.new("pkg:hex/purl") +{:ok, %Purl{type: "hex", name: "purl"}} + +iex> Purl.to_string(%Purl{type: "hex", name: "purl"}) +"pkg:hex/purl" +``` ## Installation diff --git a/lib/purl.ex b/lib/purl.ex index 6c268b0..fd0a7b2 100644 --- a/lib/purl.ex +++ b/lib/purl.ex @@ -1,19 +1,242 @@ defmodule Purl do @moduledoc """ - Documentation for `Purl`. + Elixir Implementation of the purl (package url) specification. + + ## Specification + + https://github.com/package-url/purl-spec + + **Format**: `pkg:type/namespace/name@version?qualifiers#subpath` + + > #### License {: .neutral} + > + > A lot of the documentation was taken directly from the specification. It is + > licensed under the MIT License: + > ``` + > Copyright (c) the purl authors + > + > Permission is hereby granted, free of charge, to any person obtaining a copy of + > this software and associated documentation files (the "Software"), to deal in + > the Software without restriction, including without limitation the rights to + > use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + > the Software, and to permit persons to whom the Software is furnished to do so, + > subject to the following conditions: + > + > The above copyright notice and this permission notice shall be included in all + > copies or substantial portions of the Software. + > + > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + > FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + > COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + > IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + > CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + > ``` + """ + + alias Purl.Composer + alias Purl.Parser + alias Purl.SpecialCase + + # credo:disable-for-next-line Credo.Check.Warning.SpecWithStruct + @type parse_error :: + %URI.Error{} + | Purl.Error.InvalidField.t() + | Purl.Error.DuplicateQualifier.t() + | Purl.Error.InvalidScheme.t() + + @typedoc """ + the package "type" or package "protocol" such as `maven`, `npm`, `nuget`, + `gem`, `pypi`, etc. + + Known types: https://github.com/package-url/purl-spec/blob/master/PURL-TYPES.rst + + ## Validation + + * The package type is composed only of ASCII letters and numbers, '.', '+' and '-' (period, plus, and dash) + * The type cannot start with a number + * The type cannot contains spaces + * The type must NOT be percent-encoded + * The type is case insensitive. The canonical form is lowercase + """ + @type type :: String.t() + + @typedoc """ + Segment of the namespace + + ## Validation + + * must not contain a '/' + * must not be empty + * A URL host or Authority must NOT be used as a namespace. Use instead a + `repository_url` qualifier. Note however that for some types, the namespace + may look like a host. + """ + @type namespace_segment :: String.t() + + @typedoc """ + some name prefix such as a Maven groupid, a Docker image owner, a GitHub user + or organization + + The values are type-specific. + """ + @type namespace :: [namespace_segment()] + + @typedoc """ + the name of the package + """ + @type name :: String.t() + + @typedoc """ + the version of the package + + A version is a plain and opaque string. Some package types use versioning + conventions such as semver for NPMs or nevra conventions for RPMS. A type may + define a procedure to compare and sort versions, but there is no reliable and + uniform way to do such comparison consistently. + """ + @type version :: Version.t() | String.t() + + @typedoc """ + qualifier key + + ## Validation + + * The key must be composed only of ASCII letters and numbers, '.', '-' and '_' (period, dash and underscore) + * A key cannot start with a number + * A key must NOT be percent-encoded + * A key is case insensitive. The canonical form is lowercase + * A key cannot contains spaces + """ + @type qualifier_key :: String.t() + + @typedoc """ + qualifier value + + ## Validation + * value cannot be an empty string: a key=value pair with an empty value is the + same as no key/value at all for this key + """ + @type qualifier_value :: String.t() + + @typedoc """ + extra qualifying data for a package such as an OS, architecture, a distro, + etc. + + The values are type-specific. + + ## Validation + * key must be unique within the keys of the qualifiers string """ + @type qualifiers :: %{optional(qualifier_key()) => qualifier_value()} + + @typedoc """ + subpath segment + + ## Validation + * must not contain a '/' + * must not be any of '..' or '.' + * must not be empty + """ + @type subpath_segment :: String.t() + + @typedoc """ + extra subpath within a package, relative to the package root + """ + @type subpath :: [subpath_segment()] + + @typedoc """ + Package URL struct + """ + @type t :: %__MODULE__{ + type: type(), + namespace: namespace(), + name: name(), + version: version() | nil, + qualifiers: qualifiers(), + subpath: subpath() + } + + @enforce_keys [:type, :name] + defstruct [:type, :name, namespace: [], version: nil, qualifiers: %{}, subpath: []] + + @doc """ + Formats purl as string + + ## Examples + + iex> Purl.to_string(%Purl{type: "hex", name: "purl"}) + "pkg:hex/purl" + + """ + @spec to_string(purl :: t()) :: String.t() + def to_string(%Purl{} = purl), do: purl |> to_uri() |> URI.to_string() + + # @doc """ + # Converts a purl to a `URI` + + # ## Examples + + # iex> Purl.to_uri(%Purl{type: "hex", name: "purl"}) + # %URI{scheme: "pkg", path: "hex/purl"} + + # """ + @spec to_uri(purl :: t()) :: URI.t() + defdelegate to_uri(purl), to: Composer, as: :compose_uri @doc """ - Hello world. + Creates a new purl struct from a `Purl`, `URI` or string. ## Examples - iex> Purl.hello() - :world + iex> Purl.new("pkg:hex/purl") + {:ok, %Purl{type: "hex", name: "purl"}} """ - @spec hello :: :world - def hello do - :world + @spec new(purl :: String.t() | URI.t() | Purl.t()) :: + {:ok, Purl.t()} | {:error, parse_error() | Purl.Error.SpecialCaseFailed.t()} + def new(purl) do + with {:ok, purl} <- Parser.parse(purl), + {:ok, purl} <- SpecialCase.apply(purl) do + {:ok, purl} + end + end + + @doc """ + Similar to `new/1` but raises `URI.Error`, `Purl.Error.InvalidField` or + `Purl.Error.InvalidURI` if an invalid input is given. + + ## Examples + + iex> Purl.new!("pkg:hex/purl") + %Purl{type: "hex", name: "purl"} + + iex> Purl.new!(">pkg:hex/purl") + ** (URI.Error) cannot parse due to reason invalid_uri: ">" + + iex> Purl.new!("pkg:hex*/purl") + ** (Purl.Error.InvalidField) invalid field type, \"hex*\" given + + """ + @spec new!(purl :: String.t() | URI.t() | Purl.t()) :: Purl.t() + def new!(purl) do + case new(purl) do + {:ok, purl} -> purl + {:error, reason} -> raise reason + end + end + + defimpl String.Chars do + @impl String.Chars + def to_string(%Purl{} = purl), do: Purl.to_string(purl) + end + + defimpl Inspect do + import Inspect.Algebra + + @impl Inspect + def inspect(%Purl{} = purl, opts) do + concat(["Purl.parse!(", to_doc(Purl.to_string(purl), opts), ")"]) + end end end diff --git a/lib/purl/composer.ex b/lib/purl/composer.ex new file mode 100644 index 0000000..2cb238f --- /dev/null +++ b/lib/purl/composer.ex @@ -0,0 +1,56 @@ +defmodule Purl.Composer do + @moduledoc false + + @spec compose_uri(purl :: Purl.t()) :: URI.t() + def compose_uri(purl) + + def compose_uri(%Purl{version: %Version{} = version} = purl), + do: compose_uri(%Purl{purl | version: Version.to_string(version)}) + + def compose_uri(%Purl{ + type: type, + namespace: namespace, + name: name, + version: version, + qualifiers: qualifiers, + subpath: subpath + }) do + %URI{ + scheme: "pkg", + path: + Enum.join( + [type | encode_namespace(namespace)] ++ + [ + case version do + nil -> name + version -> "#{name}@#{URI.encode(version)}" + end + ], + "/" + ), + query: + unless qualifiers == %{} do + encode_qualifiers(qualifiers) + end, + fragment: + unless subpath == [] do + Enum.join(subpath, "/") + end + } + end + + @spec encode_namespace(namespace :: Purl.namespace()) :: [String.t()] + defp encode_namespace(namespace) do + Enum.map(namespace, fn namespace_segment -> + URI.encode(namespace_segment, &(&1 != ?@ and URI.char_unescaped?(&1))) + end) + end + + @spec encode_qualifiers(qualifiers :: Purl.qualifiers()) :: String.t() + defp encode_qualifiers(qualifiers) do + Enum.map_join(qualifiers, "&", fn {key, value} -> + URI.encode(key, &URI.char_unreserved?/1) <> + "=" <> URI.encode(value, &(&1 == ?/ or URI.char_unescaped?(&1))) + end) + end +end diff --git a/lib/purl/error/duplicate_qualifier.ex b/lib/purl/error/duplicate_qualifier.ex new file mode 100644 index 0000000..bbe736b --- /dev/null +++ b/lib/purl/error/duplicate_qualifier.ex @@ -0,0 +1,18 @@ +defmodule Purl.Error.DuplicateQualifier do + @moduledoc """ + Error raised if qualifier keys are duplicated + """ + + @type t :: %__MODULE__{ + key: String.t() + } + + defexception [:key] + + @impl Exception + def message(error) + + def message(%__MODULE__{key: key}) do + "qualifier #{key} is duplicated" + end +end diff --git a/lib/purl/error/invalid_field.ex b/lib/purl/error/invalid_field.ex new file mode 100644 index 0000000..9004eb6 --- /dev/null +++ b/lib/purl/error/invalid_field.ex @@ -0,0 +1,20 @@ +defmodule Purl.Error.InvalidField do + @moduledoc """ + Error raised in field values are invalid + """ + + @type t :: %__MODULE__{ + field: atom(), + value: String.t() + } + + defexception [:field, :value] + + @impl Exception + def message(error) + + def message(%__MODULE__{field: field, value: value}) do + formatted_value = inspect(value, pretty: true) + "invalid field #{field}, #{formatted_value} given" + end +end diff --git a/lib/purl/error/invalid_scheme.ex b/lib/purl/error/invalid_scheme.ex new file mode 100644 index 0000000..94caa13 --- /dev/null +++ b/lib/purl/error/invalid_scheme.ex @@ -0,0 +1,19 @@ +defmodule Purl.Error.InvalidScheme do + @moduledoc """ + Error raised if the scheme of the purl is incorrect + """ + + @type t :: %__MODULE__{ + scheme: String.t() | nil + } + + defexception [:scheme] + + @impl Exception + def message(error) + + def message(%__MODULE__{scheme: scheme}) do + formatted_scheme = inspect(scheme, pretty: true) + "scheme #{formatted_scheme} is invalid, pkg expected" + end +end diff --git a/lib/purl/error/special_case_failed.ex b/lib/purl/error/special_case_failed.ex new file mode 100644 index 0000000..f531da7 --- /dev/null +++ b/lib/purl/error/special_case_failed.ex @@ -0,0 +1,11 @@ +defmodule Purl.Error.SpecialCaseFailed do + @moduledoc """ + Error raised if special rules for type are not fulfilled + """ + + @type t :: %__MODULE__{ + message: String.t() + } + + defexception [:message] +end diff --git a/lib/purl/parser.ex b/lib/purl/parser.ex new file mode 100644 index 0000000..ea59d32 --- /dev/null +++ b/lib/purl/parser.ex @@ -0,0 +1,276 @@ +defmodule Purl.Parser do + @moduledoc false + + @spec parse(purl :: String.t() | URI.t() | Purl.t()) :: + {:ok, Purl.t()} | {:error, Purl.parse_error()} + def parse(purl) + + def parse(string) when is_binary(string) do + case URI.new(string) do + {:ok, uri} -> + parse(uri) + + {:error, invalid} -> + {:error, %URI.Error{action: :parse, reason: :invalid_uri, part: invalid}} + end + end + + def parse(%URI{scheme: nil} = _uri), do: {:error, %Purl.Error.InvalidScheme{scheme: nil}} + + def parse(%URI{scheme: scheme} = _uri) when scheme != "pkg", + do: {:error, %Purl.Error.InvalidScheme{scheme: scheme}} + + def parse(%URI{scheme: "pkg", authority: type, path: path} = uri) when type != nil, + do: parse(%URI{uri | authority: nil, path: type <> "/" <> path}) + + def parse(%URI{scheme: "pkg", host: type, path: path} = uri) when type != nil, + do: parse(%URI{uri | host: nil, path: type <> "/" <> path}) + + def parse( + %URI{ + scheme: "pkg", + authority: nil, + host: nil, + path: path, + fragment: fragment, + query: query + } = _uri + ) do + with {:ok, {type, namespace, name, version}} <- parse_path(path), + {:ok, qualifiers} <- parse_query(query) do + parse(%Purl{ + type: type, + namespace: namespace, + name: name, + version: version, + qualifiers: qualifiers, + subpath: + case fragment do + nil -> [] + fragment -> fragment |> String.trim("/") |> String.split("/") + end + }) + end + end + + def parse( + %Purl{ + type: type, + namespace: namespace, + name: name, + version: version, + qualifiers: qualifiers, + subpath: subpath + } = _purl + ) do + with {:ok, type} <- parse_type(type), + {:ok, namespace} <- parse_namespace(namespace), + {:ok, name} <- parse_name(name), + {:ok, version} <- parse_version(version), + {:ok, qualifiers} <- parse_qualifiers(qualifiers), + {:ok, subpath} <- parse_subpath(subpath) do + {:ok, + %Purl{ + type: type, + namespace: namespace, + name: name, + version: version, + qualifiers: qualifiers, + subpath: subpath + }} + end + end + + @spec parse_path(path :: String.t()) :: + {:ok, {String.t(), [String.t()], String.t(), String.t() | nil}} + | {:error, Purl.parse_error()} + defp parse_path(path) do + case String.split(path, "/", trim: true) do + [type | [_name_or_namespace | _rest] = rest] -> + type = String.downcase(type) + + {name, namespace} = List.pop_at(rest, -1) + + namespace = Enum.map(namespace, &URI.decode/1) + + {name, version} = + case String.split(name, "@", parts: 2) do + [name, version] -> {name, URI.decode(version)} + [name] -> {name, nil} + end + + {:ok, {type, namespace, name, version}} + + [_one] -> + {:error, %Purl.Error.InvalidField{field: :name, value: ""}} + end + end + + @spec parse_query(query :: String.t() | nil) :: + {:ok, %{optional(String.t()) => String.t()}} | {:error, Purl.parse_error()} + defp parse_query(query) + defp parse_query(nil), do: {:ok, %{}} + + defp parse_query(query) do + query + |> URI.query_decoder() + |> Enum.reduce_while({:ok, %{}}, fn {key, value}, {:ok, acc} -> + key = String.downcase(key) + + if Map.has_key?(acc, key) do + {:halt, {:error, %Purl.Error.DuplicateQualifier{key: key}}} + else + {:cont, {:ok, Map.put(acc, key, value)}} + end + end) + end + + @spec parse_type(type :: String.t()) :: + {:ok, Purl.type()} | {:error, Purl.Error.InvalidField.t()} + defp parse_type(type) when is_binary(type) do + if Regex.match?(~r/^[a-zA-Z\.\+\-][a-zA-Z0-9\.\+\-]+$/, type) do + {:ok, type} + else + {:error, %Purl.Error.InvalidField{field: :type, value: type}} + end + end + + @spec parse_namespace(namespace :: [String.t()]) :: + {:ok, Purl.namespace()} | {:error, Purl.Error.InvalidField.t()} + defp parse_namespace(namespace) when is_list(namespace) do + namespace + |> Enum.reduce_while({:ok, []}, fn + segment, {:ok, acc} -> + case parse_namespace_segment(segment) do + {:ok, segment} -> {:cont, {:ok, [segment | acc]}} + {:error, reason} -> {:halt, {:error, reason}} + end + end) + |> case do + {:ok, namespace} -> {:ok, Enum.reverse(namespace)} + {:error, reason} -> {:error, reason} + end + end + + @spec parse_namespace_segment(segment :: String.t()) :: + {:ok, Purl.namespace_segment()} | {:error, Purl.Error.InvalidField.t()} + defp parse_namespace_segment(segment) + + defp parse_namespace_segment(""), + do: {:error, %Purl.Error.InvalidField{field: :namespace, value: ""}} + + defp parse_namespace_segment(segment) do + cond do + !String.valid?(segment) -> + {:error, %Purl.Error.InvalidField{field: :namespace, value: segment}} + + String.contains?(segment, "/") -> + {:error, %Purl.Error.InvalidField{field: :namespace, value: segment}} + + true -> + {:ok, segment} + end + end + + @spec parse_name(name :: String.t()) :: + {:ok, Purl.name()} | {:error, Purl.Error.InvalidField.t()} + defp parse_name(name) + defp parse_name(""), do: {:error, %Purl.Error.InvalidField{field: :name, value: ""}} + + defp parse_name(name) when is_binary(name) do + if String.valid?(name) do + {:ok, name} + else + {:error, %Purl.Error.InvalidField{field: :name, value: name}} + end + end + + @spec parse_version(version :: Version.t() | String.t() | nil) :: + {:ok, Purl.version() | nil} | {:error, Purl.Error.InvalidField.t()} + defp parse_version(version) + defp parse_version(nil), do: {:ok, nil} + defp parse_version(%Version{} = version), do: {:ok, version} + + defp parse_version(version) when is_binary(version) do + if String.valid?(version) do + {:ok, version} + else + {:error, %Purl.Error.InvalidField{field: :version, value: version}} + end + end + + @spec parse_qualifiers(qualifiers :: %{optional(String.t()) => String.t()}) :: + {:ok, Purl.qualifiers()} | {:error, Purl.Error.InvalidField.t()} + defp parse_qualifiers(%{} = qualifiers) do + Enum.reduce_while(qualifiers, {:ok, %{}}, fn {qualifier_key, qualifier_value}, {:ok, acc} -> + with {:ok, qualifier_key} <- parse_qualifier_key(qualifier_key), + {:ok, qualifier_value} <- parse_qualifier_value(qualifier_value) do + {:cont, {:ok, Map.put(acc, qualifier_key, qualifier_value)}} + else + error -> {:halt, error} + end + end) + end + + @spec parse_qualifier_key(qualifier_key :: String.t()) :: + {:ok, Purl.qualifier_key()} | {:error, Purl.Error.InvalidField.t()} + defp parse_qualifier_key(qualifier_key) + + defp parse_qualifier_key(""), + do: {:error, %Purl.Error.InvalidField{field: :qualifiers, value: ""}} + + defp parse_qualifier_key(qualifier_key) do + if Regex.match?(~r/^[a-zA-Z\.\-\_][a-zA-Z0-9\.\-\_]+$/, qualifier_key) do + {:ok, qualifier_key} + else + {:error, %Purl.Error.InvalidField{field: :qualifiers, value: qualifier_key}} + end + end + + @spec parse_qualifier_value(qualifier_value :: String.t()) :: + {:ok, Purl.qualifier_value()} | {:error, Purl.Error.InvalidField.t()} + defp parse_qualifier_value(qualifier_value) + + defp parse_qualifier_value(""), + do: {:error, %Purl.Error.InvalidField{field: :qualifiers, value: ""}} + + defp parse_qualifier_value(qualifier_value) do + if String.valid?(qualifier_value) do + {:ok, qualifier_value} + else + {:error, %Purl.Error.InvalidField{field: :qualifiers, value: qualifier_value}} + end + end + + @spec parse_subpath(subpath :: [String.t()]) :: + {:ok, Purl.subpath()} | {:error, Purl.Error.InvalidField.t()} + defp parse_subpath(subpath) when is_list(subpath) do + subpath + |> Enum.reduce_while({:ok, []}, fn + segment, {:ok, acc} -> + case parse_subpath_segment(segment) do + {:ok, segment} -> {:cont, {:ok, [segment | acc]}} + {:error, reason} -> {:halt, {:error, reason}} + end + end) + |> case do + {:ok, subpath} -> {:ok, Enum.reverse(subpath)} + {:error, reason} -> {:error, reason} + end + end + + @spec parse_subpath_segment(segment :: String.t()) :: + {:ok, Purl.subpath_segment()} | {:error, Purl.Error.InvalidField.t()} + defp parse_subpath_segment(segment) + + defp parse_subpath_segment(segment) when segment in ["", ".", ".."], + do: {:error, %Purl.Error.InvalidField{field: :subpath, value: segment}} + + defp parse_subpath_segment(segment) when is_binary(segment) do + if String.valid?(segment) do + {:ok, segment} + else + {:error, %Purl.Error.InvalidField{field: :subpath, value: segment}} + end + end +end diff --git a/lib/purl/special_case.ex b/lib/purl/special_case.ex new file mode 100644 index 0000000..83694b5 --- /dev/null +++ b/lib/purl/special_case.ex @@ -0,0 +1,168 @@ +defmodule Purl.SpecialCase do + @moduledoc false + + @rules %{ + "bitbucket" => %{ + name_case_sensitive: false, + name_normalize: :hyphen_case, + namespace_case_sensitive: false + }, + "cran" => %{ + version_required: true + }, + "composer" => %{ + name_case_sensitive: false, + namespace_case_sensitive: false + }, + "github" => %{ + name_case_sensitive: false, + namespace_case_sensitive: false + }, + "huggingface" => %{ + version_case_sensitive: false + }, + "pypi" => %{ + name_case_sensitive: false, + name_normalize: :hyphen_case + }, + "swift" => %{ + version_required: true, + namespace_min_length: 1 + } + } + + @spec apply(purl :: Purl.t()) :: + {:ok, Purl.t()} | {:error, Purl.Error.SpecialCaseFailed.t()} + def apply(purl) do + with {:ok, purl} <- downcase_namespace(purl), + {:ok, purl} <- enforce_namespace_length(purl), + {:ok, purl} <- downcase_name(purl), + {:ok, purl} <- normalize_name(purl), + {:ok, purl} <- downcase_version(purl), + {:ok, purl} <- version_required(purl), + {:ok, purl} <- conan_enforce_channel(purl) do + {:ok, purl} + end + end + + @spec downcase_namespace(purl :: Purl.t()) :: {:ok, Purl.t()} + defp downcase_namespace(purl) + + for {type, %{namespace_case_sensitive: false}} <- @rules do + defp downcase_namespace(%Purl{type: unquote(type), namespace: namespace} = purl), + do: {:ok, %Purl{purl | namespace: Enum.map(namespace, &String.downcase/1)}} + end + + defp downcase_namespace(purl), do: {:ok, purl} + + @spec enforce_namespace_length(purl :: Purl.t()) :: + {:ok, Purl.t()} | {:error, Purl.Error.SpecialCaseFailed.t()} + defp enforce_namespace_length(purl) + + for {type, %{namespace_min_length: min_length}} <- @rules do + defp enforce_namespace_length(%Purl{type: unquote(type), namespace: namespace} = purl) do + if length(namespace) >= unquote(min_length) do + {:ok, purl} + else + {:error, %Purl.Error.SpecialCaseFailed{message: "namespace missing"}} + end + end + end + + defp enforce_namespace_length(purl), do: {:ok, purl} + + @spec downcase_name(purl :: Purl.t()) :: {:ok, Purl.t()} + defp downcase_name(purl) + + for {type, %{name_case_sensitive: false}} <- @rules do + defp downcase_name(%Purl{type: unquote(type), name: name} = purl), + do: {:ok, %Purl{purl | name: String.downcase(name)}} + end + + defp downcase_name( + %Purl{type: "mlflow", name: name, qualifiers: %{"repository_url" => repository_url}} = + purl + ) do + with {:ok, %URI{host: host}} <- URI.new(repository_url), + true <- String.ends_with?(host, "azuredatabricks.net") do + {:ok, %Purl{purl | name: String.downcase(name)}} + else + _other -> {:ok, purl} + end + end + + defp downcase_name(purl), do: {:ok, purl} + + @spec normalize_name(purl :: Purl.t()) :: {:ok, Purl.t()} + defp normalize_name(purl) + + for {type, %{name_normalize: :hyphen_case}} <- @rules do + defp normalize_name(%Purl{type: unquote(type), name: name} = purl), + do: {:ok, %Purl{purl | name: String.replace(name, "_", "-")}} + end + + defp normalize_name(purl), do: {:ok, purl} + + @spec downcase_version(purl :: Purl.t()) :: {:ok, Purl.t()} + defp downcase_version(purl) + + for {type, %{version_case_sensitive: false}} <- @rules do + defp downcase_version(%Purl{type: unquote(type), version: version} = purl) + when is_binary(version), + do: {:ok, %Purl{purl | version: String.downcase(version)}} + + defp downcase_version(%Purl{type: unquote(type), version: %Version{} = version} = purl), + do: + {:ok, + %Purl{ + purl + | version: version |> Version.to_string() |> String.downcase() |> Version.parse!() + }} + end + + defp downcase_version(purl), do: {:ok, purl} + + @spec version_required(purl :: Purl.t()) :: + {:ok, Purl.t()} | {:error, Purl.Error.SpecialCaseFailed.t()} + defp version_required(purl) + + for {type, %{version_required: true}} <- @rules do + defp version_required(%Purl{type: unquote(type), version: nil} = _purl), + do: {:error, %Purl.Error.SpecialCaseFailed{message: "version missing"}} + end + + defp version_required(purl), do: {:ok, purl} + + @spec conan_enforce_channel(purl :: Purl.t()) :: + {:ok, Purl.t()} | {:error, Purl.Error.SpecialCaseFailed.t()} + defp conan_enforce_channel(purl) + + defp conan_enforce_channel( + %Purl{ + type: "conan", + namespace: [_one | _rest], + qualifiers: qualifiers + } = purl + ) + when is_map_key(qualifiers, "channel"), + do: {:ok, purl} + + defp conan_enforce_channel( + %Purl{ + type: "conan", + namespace: [], + qualifiers: qualifiers + } = purl + ) + when not is_map_key(qualifiers, "channel"), + do: {:ok, purl} + + defp conan_enforce_channel(%Purl{type: "conan"}), + do: + {:error, + %Purl.Error.SpecialCaseFailed{ + message: "either namespace & channel must both be present or both be absent" + }} + + defp conan_enforce_channel(purl), do: {:ok, purl} +end diff --git a/mix.exs b/mix.exs index 927d716..1cc61fb 100644 --- a/mix.exs +++ b/mix.exs @@ -70,7 +70,8 @@ defmodule Purl.MixProject do {:ssl_verify_fun, "~> 1.1", manager: :rebar3, only: [:test], runtime: false, override: true}, {:dialyxir, "~> 1.0", only: [:dev], runtime: false}, - {:credo, "~> 1.0", only: [:dev], runtime: false} + {:credo, "~> 1.0", only: [:dev], runtime: false}, + {:jason, "~> 1.4", only: [:dev, :test]} ] end end diff --git a/test/purl_test.exs b/test/purl_test.exs index 83b053f..a256bb8 100644 --- a/test/purl_test.exs +++ b/test/purl_test.exs @@ -1,8 +1,75 @@ defmodule PurlTest do use ExUnit.Case, async: true + doctest Purl - test "greets the world" do - assert Purl.hello() == :world + spec_tests = + "test/spec/test-suite-data.json" + |> File.read!() + |> Jason.decode!() + + describe inspect(&Purl.new/1) do + test "should return a purl for a string" do + assert {:ok, + %Purl{ + type: "hex", + namespace: ["name", "space"], + name: "purl", + version: "1.0.0", + qualifiers: %{"key" => "value"}, + subpath: ["path", "to", "directory"] + }} = Purl.new("pkg:hex/name/space/purl@1.0.0?key=value#/path/to/directory/") + end + end + + describe inspect(&Purl.new!/1) do + test "should return a purl for a string" do + assert %Purl{type: "hex", name: "purl"} = Purl.new!("pkg:hex/purl") + end + end + + describe "specification verification" do + for %{"description" => description, "is_invalid" => is_invalid?, "purl" => purl} = + verification <- spec_tests do + if is_invalid? do + test description do + assert {:error, _reason} = Purl.new(unquote(purl)) + end + else + test description do + %{ + "type" => type, + "namespace" => namespace, + "name" => name, + "version" => version, + "qualifiers" => qualifiers, + "subpath" => subpath, + "canonical_purl" => canonical + } = unquote(Macro.escape(verification)) + + namespace = String.split(namespace || "", "/", trim: true) + + qualifiers = + case qualifiers do + nil -> %{} + qualifiers -> qualifiers + end + + subpath = String.split(subpath || "", "/", trim: true) + + assert {:ok, + %Purl{ + type: ^type, + namespace: ^namespace, + name: ^name, + version: ^version, + qualifiers: ^qualifiers, + subpath: ^subpath + } = parsed} = Purl.new(unquote(purl)) + + assert canonical == Purl.to_string(parsed) + end + end + end end end diff --git a/test/spec b/test/spec new file mode 160000 index 0000000..c02b002 --- /dev/null +++ b/test/spec @@ -0,0 +1 @@ +Subproject commit c02b002f09bdc88a501f62259eec18761957828a