Skip to content

Commit

Permalink
Merge pull request #21 from twingly/remove-scheme
Browse files Browse the repository at this point in the history
Twingly::URL::Utilities.remove_scheme
  • Loading branch information
jage committed Dec 11, 2014
2 parents 097bba9 + fd68a58 commit e7eb7ff
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 0 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ Twingly URL tools.
* `Twingly::URL::Hasher.documentdb_hash(url)` - SHA256 unsigned long, native endian digest
* `Twingly::URL::Hasher.autopingdb_hash(url)` - SHA256 64-bit signed, native endian digest
* `Twingly::URL::Hasher.pingloggerdb_hash(url)` - SHA256 64-bit unsigned, native endian digest
* `twingly/url/utilities` - Utilities to work with URLs
* `Twingly::URL::Utilities.remove_scheme(url)` - Removes scheme from HTTP/HTTPS URLs (`http://twingly.com` -> `//twingly.com`)

## Normalization example

Expand Down
13 changes: 13 additions & 0 deletions lib/twingly/url/utilities.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
module Twingly
module URL
module Utilities
module_function

PROTOCOL_EXPRESSION = /^https?:/i

def remove_scheme(url)
url.sub(PROTOCOL_EXPRESSION, '')
end
end
end
end
1 change: 1 addition & 0 deletions test/test_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
require 'twingly/url'
require 'twingly/url/hasher'
require 'twingly/url/normalizer'
require 'twingly/url/utilities'
82 changes: 82 additions & 0 deletions test/unit/utilities_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
require 'test_helper'

class TestUtilities < MiniTest::Unit::TestCase
context ".normalize" do
should "not remove scheme from non HTTP(S) URLs" do
url = 'gopher://www.duh.se/'

result = Twingly::URL::Utilities.remove_scheme(url)
assert_equal 'gopher://www.duh.se/', result
end

should "remove scheme from mixed case HTTP URL" do
url = 'HttP://www.duh.se/'

result = Twingly::URL::Utilities.remove_scheme(url)
assert_equal '//www.duh.se/', result
end

should "remove scheme from mixed case HTTPS URL" do
url = 'hTTpS://www.duh.se/'

result = Twingly::URL::Utilities.remove_scheme(url)
assert_equal '//www.duh.se/', result
end

should "remove scheme from lowercase HTTP URL" do
url = 'http://www.duh.se/'

result = Twingly::URL::Utilities.remove_scheme(url)
assert_equal '//www.duh.se/', result
end

should "remove scheme from lowercase HTTPS URL" do
url = 'https://www.duh.se/'

result = Twingly::URL::Utilities.remove_scheme(url)
assert_equal '//www.duh.se/', result
end

should "remove scheme from uppercase HTTP URL" do
url = 'HTTP://WWW.DUH.SE/'

result = Twingly::URL::Utilities.remove_scheme(url)
assert_equal '//WWW.DUH.SE/', result
end

should "remove scheme from uppercase HTTPS URL" do
url = 'HTTPS://WWW.DUH.SE/'

result = Twingly::URL::Utilities.remove_scheme(url)
assert_equal '//WWW.DUH.SE/', result
end

should "remove scheme from URL with non ASCII characters" do
url = 'http://www.thecloset.gr/people/bloggers-pick-ιωάννα-τσιγαρίδα'

result = Twingly::URL::Utilities.remove_scheme(url)
assert_equal '//www.thecloset.gr/people/bloggers-pick-ιωάννα-τσιγαρίδα', result
end

should "only remove scheme from HTTP URL" do
url = 'http://feedjira.herokuapp.com/?url=http://developer.twingly.com/feed.xml'

result = Twingly::URL::Utilities.remove_scheme(url)
assert_equal '//feedjira.herokuapp.com/?url=http://developer.twingly.com/feed.xml', result
end

should "only remove scheme from HTTPS URL" do
url = 'https://feedjira.herokuapp.com/?url=https://signalvnoise.com/posts.rss'

result = Twingly::URL::Utilities.remove_scheme(url)
assert_equal '//feedjira.herokuapp.com/?url=https://signalvnoise.com/posts.rss', result
end

should "not remove scheme from non HTTP(S) URLs with parameter" do
url = 'ftp://ftp.example.com/?url=https://www.example.com/'

result = Twingly::URL::Utilities.remove_scheme(url)
assert_equal 'ftp://ftp.example.com/?url=https://www.example.com/', result
end
end
end

0 comments on commit e7eb7ff

Please sign in to comment.