Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(livecheck/pypi): update to use json endpoint to query version #18895

Merged
merged 3 commits into from
Dec 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 38 additions & 32 deletions Library/Homebrew/livecheck/strategy/pypi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,28 @@
module Homebrew
module Livecheck
module Strategy
# The {Pypi} strategy identifies versions of software at pypi.org by
# checking project pages for archive files.
# The {Pypi} strategy identifies the newest version of a PyPI package by
# checking the JSON API endpoint for the project and using the
# `info.version` field from the response.
#
# PyPI URLs have a standard format but the hexadecimal text between
# `/packages/` and the filename varies:
# PyPI URLs have a standard format:
# `https://files.pythonhosted.org/packages/<hex>/<hex>/<long_hex>/example-1.2.3.tar.gz`
#
# * `https://files.pythonhosted.org/packages/<hex>/<hex>/<long_hex>/example-1.2.3.tar.gz`
#
# As such, the default regex only targets the filename at the end of the
# URL.
# Upstream documentation for the PyPI JSON API can be found at:
# https://docs.pypi.org/api/json/#get-a-project
#
# @api public
class Pypi
NICE_NAME = "PyPI"

# The default `strategy` block used to extract version information when
# a `strategy` block isn't provided.
DEFAULT_BLOCK = T.let(proc do |json|
json.dig("info", "version").presence
end.freeze, T.proc.params(
arg0: T::Hash[String, T.untyped],
).returns(T.nilable(String)))

# The `Regexp` used to extract the package name and suffix (e.g. file
# extension) from the URL basename.
FILENAME_REGEX = /
Expand All @@ -44,10 +51,8 @@ def self.match?(url)
URL_MATCH_REGEX.match?(url)
end

# Extracts information from a provided URL and uses it to generate
# various input values used by the strategy to check for new versions.
# Some of these values act as defaults and can be overridden in a
# `livecheck` block.
# Extracts the package name from the provided URL and uses it to
# generate the PyPI JSON API URL for the project.
#
# @param url [String] the URL used to generate values
# @return [Hash]
Expand All @@ -58,40 +63,41 @@ def self.generate_input_values(url)
match = File.basename(url).match(FILENAME_REGEX)
return values if match.blank?

# It's not technically necessary to have the `#files` fragment at the
# end of the URL but it makes the debug output a bit more useful.
values[:url] = "https://pypi.org/project/#{T.must(match[:package_name]).gsub(/%20|_/, "-")}/#files"

# Use `\.t` instead of specific tarball extensions (e.g. .tar.gz)
suffix = T.must(match[:suffix]).sub(Strategy::TARBALL_EXTENSION_REGEX, ".t")
regex_suffix = Regexp.escape(suffix).gsub("\\-", "-")

# Example regex: `%r{href=.*?/packages.*?/example[._-]v?(\d+(?:\.\d+)*(?:[._-]post\d+)?)\.t}i`
regex_name = Regexp.escape(T.must(match[:package_name])).gsub(/\\[_-]/, "[_-]")
values[:regex] =
%r{href=.*?/packages.*?/#{regex_name}[._-]v?(\d+(?:\.\d+)*(?:[._-]post\d+)?)#{regex_suffix}}i
values[:url] = "https://pypi.org/pypi/#{T.must(match[:package_name]).gsub(/%20|_/, "-")}/json"

values
end

# Generates a URL and regex (if one isn't provided) and passes them
# to {PageMatch.find_versions} to identify versions in the content.
# Generates a PyPI JSON API URL for the project and identifies new
# versions using {Json#find_versions} with a block.
#
# @param url [String] the URL of the content to check
# @param regex [Regexp] a regex used for matching versions in content
# @param provided_content [String, nil] content to check instead of
# fetching
# @return [Hash]
sig {
params(
url: String,
regex: T.nilable(Regexp),
unused: T.untyped,
block: T.nilable(Proc),
url: String,
regex: T.nilable(Regexp),
provided_content: T.nilable(String),
unused: T.untyped,
block: T.nilable(Proc),
).returns(T::Hash[Symbol, T.untyped])
}
def self.find_versions(url:, regex: nil, **unused, &block)
def self.find_versions(url:, regex: nil, provided_content: nil, **unused, &block)
match_data = { matches: {}, regex:, url: }
samford marked this conversation as resolved.
Show resolved Hide resolved

generated = generate_input_values(url)
return match_data if generated.blank?

PageMatch.find_versions(url: generated[:url], regex: regex || generated[:regex], **unused, &block)
Json.find_versions(
url: generated[:url],
regex:,
provided_content:,
**unused,
&block || DEFAULT_BLOCK
)
end
end
end
Expand Down
88 changes: 86 additions & 2 deletions Library/Homebrew/test/livecheck/strategy/pypi_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,42 @@
let(:pypi_url) { "https://files.pythonhosted.org/packages/ab/cd/efg/example-package-1.2.3.tar.gz" }
let(:non_pypi_url) { "https://brew.sh/test" }

let(:regex) { /^v?(\d+(?:\.\d+)+)$/i }

let(:generated) do
{
url: "https://pypi.org/project/example-package/#files",
regex: %r{href=.*?/packages.*?/example[_-]package[._-]v?(\d+(?:\.\d+)*(?:[._-]post\d+)?)\.t}i,
url: "https://pypi.org/pypi/example-package/json",
}
end

# This is a limited subset of a PyPI JSON API response object, for the sake
# of testing.
let(:content) do
<<~JSON
{
"info": {
"version": "1.2.3"
}
}
JSON
end

let(:matches) { ["1.2.3"] }

let(:find_versions_return_hash) do
{
matches: {
"1.2.3" => Version.new("1.2.3"),
},
regex: nil,
url: generated[:url],
}
end

let(:find_versions_cached_return_hash) do
find_versions_return_hash.merge({ cached: true })
end

describe "::match?" do
it "returns true for a PyPI URL" do
expect(pypi.match?(pypi_url)).to be true
Expand All @@ -34,4 +63,59 @@
expect(pypi.generate_input_values(non_pypi_url)).to eq({})
end
end

describe "::find_versions" do
let(:match_data) do
cached = {
matches: matches.to_h { |v| [v, Version.new(v)] },
regex: nil,
url: generated[:url],
cached: true,
}

{
cached:,
cached_default: cached.merge({ matches: {} }),
}
end

it "finds versions in provided content" do
expect(pypi.find_versions(url: pypi_url, provided_content: content))
.to eq(match_data[:cached])
end

it "finds versions in provided content using a block" do
# NOTE: We only use a regex here to make sure it can be passed into the
# block, if necessary.
expect(pypi.find_versions(url: pypi_url, regex:, provided_content: content) do |json, regex|
match = json.dig("info", "version")&.match(regex)
next if match.blank?

match[1]
end).to eq(match_data[:cached].merge({ regex: }))

expect(pypi.find_versions(url: pypi_url, provided_content: content) do |json|
json.dig("info", "version").presence
end).to eq(match_data[:cached])
end

it "returns default match_data when block doesn't return version information" do
expect(pypi.find_versions(url: pypi_url, provided_content: '{"info":{"version":""}}'))
.to eq(match_data[:cached_default])
expect(pypi.find_versions(url: pypi_url, provided_content: '{"other":true}'))
.to eq(match_data[:cached_default])
end

it "returns default match_data when url is blank" do
expect(pypi.find_versions(url: "") { "1.2.3" })
.to eq({ matches: {}, regex: nil, url: "" })
end

it "returns default match_data when content is blank" do
expect(pypi.find_versions(url: pypi_url, provided_content: "{}") { "1.2.3" })
.to eq(match_data[:cached_default])
expect(pypi.find_versions(url: pypi_url, provided_content: "") { "1.2.3" })
.to eq(match_data[:cached_default])
end
end
end
Loading