-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Work with Twingly::URL objects instead of strings
- Loading branch information
1 parent
5288cd2
commit 3c6986b
Showing
18 changed files
with
631 additions
and
499 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,50 +1,129 @@ | ||
require 'addressable/uri' | ||
require 'public_suffix' | ||
require "addressable/uri" | ||
require "public_suffix" | ||
|
||
require_relative "url/null_url" | ||
require_relative "url/error" | ||
|
||
PublicSuffix::List.private_domains = false | ||
|
||
module Twingly | ||
module URL | ||
module_function | ||
class URL | ||
SCHEMES = %w(http https) | ||
ENDS_WITH_SLASH = /\/+$/ | ||
|
||
def self.parse(potential_url) | ||
potential_url = String(potential_url) | ||
|
||
# TODO: Can we make this less send-y? | ||
self.new.send(:setup, potential_url) | ||
rescue Twingly::URL::Error, Twingly::URL::Error::ParseError => error | ||
NullURL.new | ||
end | ||
|
||
def scheme | ||
addressable_uri.scheme | ||
end | ||
|
||
def trd | ||
public_suffix_domain.trd | ||
end | ||
|
||
def sld | ||
public_suffix_domain.sld | ||
end | ||
|
||
UrlObject = Struct.new(:url, :domain) do | ||
SCHEMES = %w(http https) | ||
def tld | ||
public_suffix_domain.tld | ||
end | ||
|
||
def valid? | ||
url && domain && SCHEMES.include?(url.normalized_scheme) | ||
def domain | ||
public_suffix_domain.domain | ||
end | ||
|
||
def host | ||
addressable_uri.host | ||
end | ||
|
||
def path | ||
addressable_uri.path | ||
end | ||
|
||
def without_scheme | ||
self.to_s.sub(/\A#{scheme}:/, "") | ||
end | ||
|
||
def normalized | ||
normalized_url = addressable_uri.dup | ||
|
||
normalized_url.scheme = normalized_scheme | ||
normalized_url.host = normalized_host | ||
normalized_url.path = normalized_path | ||
|
||
setup(normalized_url) | ||
end | ||
|
||
def normalized_scheme | ||
addressable_uri.scheme.downcase | ||
end | ||
|
||
def normalized_host | ||
host = addressable_uri.normalized_host | ||
domain = public_suffix_domain | ||
|
||
unless domain.subdomain? | ||
host = "www.#{host}" | ||
end | ||
|
||
host = normalize_blogspot(host, domain) | ||
|
||
host | ||
end | ||
|
||
def normalized_path | ||
path = strip_trailing_slashes(addressable_uri.path) | ||
|
||
(path.empty?) ? "/" : path | ||
end | ||
|
||
def parse(potential_url) | ||
url, domain = extract_url_and_domain(potential_url) | ||
UrlObject.new(url, domain) | ||
def valid? | ||
addressable_uri && public_suffix_domain && SCHEMES.include?(normalized_scheme) | ||
end | ||
|
||
def extract_urls(text_or_array) | ||
potential_urls = Array(text_or_array).flat_map(&:split) | ||
potential_urls.map do |potential_url| | ||
potential_url if validate(potential_url) | ||
end.compact | ||
def to_s | ||
addressable_uri.to_s | ||
end | ||
|
||
def extract_url_and_domain(potential_url) | ||
addressable_uri = Addressable::URI.heuristic_parse(potential_url) | ||
private | ||
|
||
return invalid_url unless addressable_uri | ||
attr_reader :addressable_uri, :public_suffix_domain | ||
|
||
domain = PublicSuffix.parse(addressable_uri.display_uri.host) | ||
def setup(potential_url) | ||
if potential_url.is_a?(Addressable::URI) | ||
@addressable_uri = potential_url | ||
else | ||
@addressable_uri = Addressable::URI.heuristic_parse(potential_url) | ||
end | ||
|
||
raise Twingly::Error::ParseError if addressable_uri.nil? | ||
|
||
[addressable_uri, domain] | ||
rescue PublicSuffix::DomainInvalid, Addressable::URI::InvalidURIError | ||
invalid_url | ||
@public_suffix_domain = PublicSuffix.parse(addressable_uri.display_uri.host) | ||
|
||
self | ||
rescue Addressable::URI::InvalidURIError, PublicSuffix::DomainInvalid => error | ||
error.extend(Twingly::URL::Error) | ||
raise | ||
end | ||
|
||
def validate(potential_url) | ||
parse(potential_url).valid? | ||
def normalize_blogspot(host, domain) | ||
if domain.sld.downcase == "blogspot" | ||
host.sub(/\Awww\./i, "").sub(/#{domain.tld}\z/i, "com") | ||
else | ||
host | ||
end | ||
end | ||
|
||
def invalid_url | ||
[nil, nil] | ||
def strip_trailing_slashes(path) | ||
path.sub(ENDS_WITH_SLASH, "") | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
module Twingly | ||
class URL | ||
module Error | ||
class ParseError < StandardError | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
require 'digest' | ||
|
||
module Twingly | ||
module URL | ||
class URL | ||
module Hasher | ||
module_function | ||
|
||
|
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
module Twingly | ||
class URL | ||
class NullURL | ||
def method_missing(name, *) | ||
error = NoMethodError.new("undefined method `#{name}'") | ||
raise error unless Twingly::URL.instance_methods.include?(name) | ||
|
||
"" | ||
end | ||
|
||
def normalized | ||
self | ||
end | ||
|
||
def valid? | ||
false | ||
end | ||
|
||
def to_s | ||
"" | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
module Twingly | ||
module URL | ||
class URL | ||
VERSION = '1.3.4' | ||
end | ||
end |
Oops, something went wrong.