Skip to content

Commit

Permalink
Merge pull request #36 from twingly/rspec
Browse files Browse the repository at this point in the history
Change from minitest to rspec
  • Loading branch information
roback committed Sep 8, 2015
2 parents 7ea8c42 + 8b0ab5d commit 86f5286
Show file tree
Hide file tree
Showing 15 changed files with 338 additions and 326 deletions.
3 changes: 3 additions & 0 deletions .rspec
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
--color
--format documentation
--require spec_helper
30 changes: 16 additions & 14 deletions Rakefile
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
require 'bundler/setup'
namespace :profile do
require_relative "profile/profile"

task default: 'test:unit'
task test: 'test:unit'
task :normalize_url do |task|
require "twingly/url/normalizer"

require 'rake/testtask'
namespace :test do
Rake::TestTask.new(:unit) do |test|
test.pattern = "test/unit/*_test.rb"
test.libs << 'lib'
test.libs << 'test'
Profile.measure "normalizing a short URL", 1000 do
Twingly::URL::Normalizer.normalize_url('http://www.duh.se/')
end
end
end

begin
require "rspec/core/rake_task"

task default: "spec"

Rake::TestTask.new(:profile) do |test|
test.pattern = "test/profile/*_test.rb"
test.libs << 'lib'
test.libs << 'test'
test.libs << 'test/lib'
RSpec::Core::RakeTask.new(:spec) do |task|
task.pattern = "spec/lib/**/*_spec.rb"
end
rescue LoadError
end
4 changes: 2 additions & 2 deletions lib/twingly/url.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@

PublicSuffix::List.private_domains = false

SCHEMES = %w(http https)

module Twingly
module URL
module_function

UrlObject = Struct.new(:url, :domain) do
SCHEMES = %w(http https)

def valid?
url && domain && SCHEMES.include?(url.normalized_scheme)
end
Expand Down
23 changes: 23 additions & 0 deletions profile/profile.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
require "ruby-prof"

class Profile
def self.measure(name, count, &block)
RubyProf.start

count.times do
block.call
end

result = RubyProf.stop
result_directory = "tmp"
Dir.mkdir(result_directory) unless File.exists?(result_directory)
printer = RubyProf::MultiPrinter.new(result)
printer.print(path: result_directory)

puts "Measured #{name} #{count} times"
puts "Generated reports:"
Dir.entries(result_directory).reject { |entry| entry.end_with?(".") }.each do |file|
puts " #{result_directory}/#{file}"
end
end
end
33 changes: 33 additions & 0 deletions spec/lib/twingly/url/hasher_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
require "spec_helper"

describe Twingly::URL::Hasher do
describe ".taskdb_hash" do
it "returns a MD5 hexdigest" do
expect(Twingly::URL::Hasher.taskdb_hash("http://blog.twingly.com/")).to eq "B1E2D5AECF6649C2E44D17AEA3E0F4"
end
end

describe ".blogstream_hash" do
it "returns a MD5 hexdigest" do
expect(Twingly::URL::Hasher.blogstream_hash("http://blog.twingly.com/")).to eq "B1E2D5AECF6649C2E44D17AEA3E0F4"
end
end

describe ".documentdb_hash" do
it "returns a SHA256 unsigned long, native endian digest" do
expect(Twingly::URL::Hasher.documentdb_hash("http://blog.twingly.com/")).to eq 15340752212397415993
end
end

describe ".autopingdb_hash" do
it "returns a SHA256 64-bit signed, native endian digest" do
expect(Twingly::URL::Hasher.autopingdb_hash("http://blog.twingly.com/")).to eq -3105991861312135623
end
end

describe ".pingloggerdb_hash" do
it "returns a SHA256 64-bit unsigned, native endian digest" do
expect(Twingly::URL::Hasher.pingloggerdb_hash("http://blog.twingly.com/")).to eq 15340752212397415993
end
end
end
125 changes: 125 additions & 0 deletions spec/lib/twingly/url/normalization_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
require "spec_helper"

describe Twingly::URL::Normalizer do
let (:normalizer) { Twingly::URL::Normalizer }

describe ".normalize" do
it "accepts a String" do
expect { normalizer.normalize("") }.not_to raise_error
end

it "accepts an Array" do
expect { normalizer.normalize([]) }.not_to raise_error
end

it "handles URL with ] in it" do
url = "http://www.iwaseki.co.jp/cgi/yybbs/yybbs.cgi/%DEuropean]buy"
expect { normalizer.normalize(url) }.not_to raise_error
end

it "handles URL with reference to another URL in it" do
url = "http://news.google.com/news/url?sa=t&fd=R&usg=AFQjCNGc4A_sfGS6fMMqggiK_8h6yk2miw&url=http:%20%20%20//fansided.com/2013/08/02/nike-decides-to-drop-milwaukee-brewers-ryan-braun"
expect { normalizer.normalize(url) }.not_to raise_error
end

it "handles URL with umlauts in host" do
url = "http://www.åäö.se/"
expect(normalizer.normalize(url)).to eq([url])
end

it "handles URL with umlauts in path" do
url = "http://www.aoo.se/öö"
expect(normalizer.normalize(url)).to eq([url])
end

it "does not blow up when there's only protocol in the text" do
url = "http://"
expect { normalizer.normalize(url) }.not_to raise_error
end

it "does not blow up when there's no URL in the text" do
url = "Just some text"
expect { normalizer.normalize(url) }.not_to raise_error
end

it "does not create URLs for normal words" do
url = "This is, just, some words. Yay!"
expect(normalizer.normalize(url)).to eq([])
end
end

describe ".extract_urls" do
it "detects two urls in a String" do
urls = "http://blog.twingly.com/ http://twingly.com/"
response = normalizer.extract_urls(urls)

expect(response.size).to eq(2)
end

it "detects two urls in an Array" do
urls = %w(http://blog.twingly.com/ http://twingly.com/)
response = normalizer.extract_urls(urls)

expect(response.size).to eq(2)
end

it "always returns an Array" do
response = normalizer.extract_urls(nil)

expect(response).to be_instance_of(Array)
end
end

describe ".normalize_url" do
it "adds www if host is missing a subdomain" do
url = "http://twingly.com/"

expect(normalizer.normalize_url(url)).to eq("http://www.twingly.com/")
end

it "does not add www if the host has a subdomain" do
url = "http://blog.twingly.com/"

expect(normalizer.normalize_url(url)).to eq(url)
end

it "keeps www if the host already has it" do
url = "http://www.twingly.com/"

expect(normalizer.normalize_url(url)).to eq(url)
end

it "adds a trailing slash if missing" do
url = "http://www.twingly.com"
expected = "http://www.twingly.com/"

expect(normalizer.normalize_url(url)).to eq(expected)
end

it "is able to normalize a url without protocol" do
url = "www.twingly.com/"
expected = "http://www.twingly.com/"

expect(normalizer.normalize_url(url)).to eq(expected)
end

it "does not return broken URLs" do
url = "http://www.twingly."

expect(normalizer.normalize_url(url)).to eq(nil)
end

it "does not add www. to blogspot blogs" do
url = "http://jlchen1026.blogspot.com/"

expect(normalizer.normalize_url(url)).to eq(url)
end

it "downcases the URL" do
url = "http://www.Twingly.com/"
expected = url.downcase

expect(normalizer.normalize_url(url)).to eq(expected)
end
end
end
37 changes: 37 additions & 0 deletions spec/lib/twingly/url/url_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
require "spec_helper"

describe Twingly::URL do
describe ".parse" do
%w(http://http http:/// http:// http:/ http: htttp a 1 ?).each do |invalid_url|
it "handles the invalid url '#{invalid_url}'" do
expect { described_class.parse(invalid_url) }.not_to raise_error
end
end

describe ".valid?" do
%w(ftp://blog.twingly.com/ blablahttp://blog.twingly.com/).each do |invalid_url|
it "returns false for non-http and https" do
expect(described_class.parse(invalid_url).valid?).to be false
end
end

%w(http://blog.twingly.com/ hTTP://blog.twingly.com/ https://blog.twingly.com).each do |valid_url|
it "returns true for the valid url '#{valid_url}" do
expect(described_class.parse(valid_url).valid?).to be true
end
end
end
end

describe ".validate" do
it "returns true for a valid url" do
expect(described_class.validate("http://blog.twingly.com/")).to be true
end

%w(http:// feedville.com,2007-06-19:/blends/16171).each do |invalid_url|
it "returns false for the invalid url '#{invalid_url}'" do
expect(described_class.validate(invalid_url)).to be_falsey
end
end
end
end
80 changes: 80 additions & 0 deletions spec/lib/twingly/url/utilities_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
require "spec_helper"

describe Twingly::URL::Utilities do
describe ".normalize" do
it "does not remove scheme from non HTTP(S) URLs" do
url = "gopher://www.duh.se/"

expect(described_class.remove_scheme(url)).to eq(url)
end

it "removes scheme from mixed case HTTP URL" do
url = "HttP://www.duh.se/"
expected = "//www.duh.se/"

expect(described_class.remove_scheme(url)).to eq(expected)
end

it "removes scheme from mixed case HTTPS URL" do
url = "hTTpS://www.duh.se/"
expected = "//www.duh.se/"

expect(described_class.remove_scheme(url)).to eq(expected)
end

it "removes scheme from lowercase HTTP URL" do
url = "http://www.duh.se/"
expected = "//www.duh.se/"

expect(described_class.remove_scheme(url)).to eq(expected)
end

it "removes scheme from lowercase HTTPS URL" do
url = "https://www.duh.se/"
expected = "//www.duh.se/"

expect(described_class.remove_scheme(url)).to eq(expected)
end

it "removes scheme from uppercase HTTP URL" do
url = "HTTP://WWW.DUH.SE/"
expected = "//WWW.DUH.SE/"

expect(described_class.remove_scheme(url)).to eq(expected)
end

it "removes scheme from uppercase HTTPS URL" do
url = "HTTPS://WWW.DUH.SE/"
expected = "//WWW.DUH.SE/"

expect(described_class.remove_scheme(url)).to eq(expected)
end

it "removes scheme from URL with non ASCII characters" do
url = "http://www.thecloset.gr/people/bloggers-pick-ιωάννα-τσιγαρίδα"
expected = "//www.thecloset.gr/people/bloggers-pick-ιωάννα-τσιγαρίδα"

expect(described_class.remove_scheme(url)).to eq(expected)
end

it "only removes scheme from HTTP URL" do
url = "http://feedjira.herokuapp.com/?url=http://developer.twingly.com/feed.xml"
expected = "//feedjira.herokuapp.com/?url=http://developer.twingly.com/feed.xml"

expect(described_class.remove_scheme(url)).to eq(expected)
end

it "only removes scheme from HTTPS URL" do
url = "https://feedjira.herokuapp.com/?url=https://signalvnoise.com/posts.rss"
expected = "//feedjira.herokuapp.com/?url=https://signalvnoise.com/posts.rss"

expect(described_class.remove_scheme(url)).to eq(expected)
end

it "does not remove scheme from non HTTP(S) URLs with parameter" do
url = "ftp://ftp.example.com/?url=https://www.example.com/"

expect(described_class.remove_scheme(url)).to eq(url)
end
end
end
18 changes: 18 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
require "twingly/url"
require "twingly/url/hasher"
require "twingly/url/normalizer"
require "twingly/url/utilities"

RSpec.configure do |config|
config.expect_with :rspec do |expectations|
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
end

config.mock_with :rspec do |mocks|
mocks.verify_partial_doubles = true
end

config.order = :random

Kernel.srand config.seed
end
11 changes: 0 additions & 11 deletions test/test_helper.rb

This file was deleted.

Loading

0 comments on commit 86f5286

Please sign in to comment.