Merge pull request #36 from twingly/rspec

Change from minitest to rspec
twingly · Sep 8, 2015 · 86f5286 · 86f5286
2 parents 7ea8c42 + 8b0ab5d
commit 86f5286
Show file tree

Hide file tree

Showing 15 changed files with 338 additions and 326 deletions.
diff --git a/.rspec b/.rspec
@@ -0,0 +1,3 @@
+--color
+--format documentation
+--require spec_helper
diff --git a/Rakefile b/Rakefile
@@ -1,20 +1,22 @@
-require 'bundler/setup'
+namespace :profile do
+  require_relative "profile/profile"
 
-task default: 'test:unit'
-task test:    'test:unit'
+  task :normalize_url do |task|
+    require "twingly/url/normalizer"
 
-require 'rake/testtask'
-namespace :test do
-  Rake::TestTask.new(:unit) do |test|
-    test.pattern = "test/unit/*_test.rb"
-    test.libs << 'lib'
-    test.libs << 'test'
+    Profile.measure "normalizing a short URL", 1000 do
+      Twingly::URL::Normalizer.normalize_url('http://www.duh.se/')
+    end
   end
+end
+
+begin
+  require "rspec/core/rake_task"
+
+  task default: "spec"
 
-  Rake::TestTask.new(:profile) do |test|
-    test.pattern = "test/profile/*_test.rb"
-    test.libs << 'lib'
-    test.libs << 'test'
-    test.libs << 'test/lib'
+  RSpec::Core::RakeTask.new(:spec) do |task|
+    task.pattern = "spec/lib/**/*_spec.rb"
   end
+rescue LoadError
 end
diff --git a/lib/twingly/url.rb b/lib/twingly/url.rb
@@ -3,13 +3,13 @@
 
 PublicSuffix::List.private_domains = false
 
-SCHEMES = %w(http https)
-
 module Twingly
   module URL
     module_function
 
     UrlObject = Struct.new(:url, :domain) do
+      SCHEMES = %w(http https)
+
       def valid?
         url && domain && SCHEMES.include?(url.normalized_scheme)
       end

diff --git a/profile/profile.rb b/profile/profile.rb
@@ -0,0 +1,23 @@
+require "ruby-prof"
+
+class Profile
+  def self.measure(name, count, &block)
+    RubyProf.start
+
+    count.times do
+      block.call
+    end
+
+    result = RubyProf.stop
+    result_directory = "tmp"
+    Dir.mkdir(result_directory) unless File.exists?(result_directory)
+    printer = RubyProf::MultiPrinter.new(result)
+    printer.print(path: result_directory)
+
+    puts "Measured #{name} #{count} times"
+    puts "Generated reports:"
+    Dir.entries(result_directory).reject { |entry| entry.end_with?(".") }.each do |file|
+      puts "  #{result_directory}/#{file}"
+    end
+  end
+end
diff --git a/spec/lib/twingly/url/hasher_spec.rb b/spec/lib/twingly/url/hasher_spec.rb
@@ -0,0 +1,33 @@
+require "spec_helper"
+
+describe Twingly::URL::Hasher do
+  describe ".taskdb_hash" do
+    it "returns a MD5 hexdigest" do
+      expect(Twingly::URL::Hasher.taskdb_hash("http://blog.twingly.com/")).to eq "B1E2D5AECF6649C2E44D17AEA3E0F4"
+    end
+  end
+
+  describe ".blogstream_hash" do
+    it "returns a MD5 hexdigest" do
+      expect(Twingly::URL::Hasher.blogstream_hash("http://blog.twingly.com/")).to eq "B1E2D5AECF6649C2E44D17AEA3E0F4"
+    end
+  end
+
+  describe ".documentdb_hash" do
+    it "returns a SHA256 unsigned long, native endian digest" do
+      expect(Twingly::URL::Hasher.documentdb_hash("http://blog.twingly.com/")).to eq 15340752212397415993
+    end
+  end
+
+  describe ".autopingdb_hash" do
+    it "returns a SHA256 64-bit signed, native endian digest" do
+      expect(Twingly::URL::Hasher.autopingdb_hash("http://blog.twingly.com/")).to eq -3105991861312135623
+    end
+  end
+
+  describe ".pingloggerdb_hash" do
+    it "returns a SHA256 64-bit unsigned, native endian digest" do
+      expect(Twingly::URL::Hasher.pingloggerdb_hash("http://blog.twingly.com/")).to eq 15340752212397415993
+    end
+  end
+end
diff --git a/spec/lib/twingly/url/normalization_spec.rb b/spec/lib/twingly/url/normalization_spec.rb
@@ -0,0 +1,125 @@
+require "spec_helper"
+
+describe Twingly::URL::Normalizer do
+  let (:normalizer) { Twingly::URL::Normalizer }
+
+  describe ".normalize" do
+    it "accepts a String" do
+      expect { normalizer.normalize("") }.not_to raise_error
+    end
+
+    it "accepts an Array" do
+      expect { normalizer.normalize([]) }.not_to raise_error
+    end
+
+    it "handles URL with ] in it" do
+      url = "http://www.iwaseki.co.jp/cgi/yybbs/yybbs.cgi/%DEuropean]buy"
+      expect { normalizer.normalize(url) }.not_to raise_error
+    end
+
+    it "handles URL with reference to another URL in it" do
+      url = "http://news.google.com/news/url?sa=t&fd=R&usg=AFQjCNGc4A_sfGS6fMMqggiK_8h6yk2miw&url=http:%20%20%20//fansided.com/2013/08/02/nike-decides-to-drop-milwaukee-brewers-ryan-braun"
+      expect { normalizer.normalize(url) }.not_to raise_error
+    end
+
+    it "handles URL with umlauts in host" do
+      url = "http://www.åäö.se/"
+      expect(normalizer.normalize(url)).to eq([url])
+    end
+
+    it "handles URL with umlauts in path" do
+      url = "http://www.aoo.se/öö"
+      expect(normalizer.normalize(url)).to eq([url])
+    end
+
+    it "does not blow up when there's only protocol in the text" do
+      url = "http://"
+      expect { normalizer.normalize(url) }.not_to raise_error
+    end
+
+    it "does not blow up when there's no URL in the text" do
+      url = "Just some text"
+      expect { normalizer.normalize(url) }.not_to raise_error
+    end
+
+    it "does not create URLs for normal words" do
+      url = "This is, just, some words. Yay!"
+      expect(normalizer.normalize(url)).to eq([])
+    end
+  end
+
+  describe ".extract_urls" do
+    it "detects two urls in a String" do
+      urls = "http://blog.twingly.com/ http://twingly.com/"
+      response = normalizer.extract_urls(urls)
+
+      expect(response.size).to eq(2)
+    end
+
+    it "detects two urls in an Array" do
+      urls = %w(http://blog.twingly.com/ http://twingly.com/)
+      response = normalizer.extract_urls(urls)
+
+      expect(response.size).to eq(2)
+    end
+
+    it "always returns an Array" do
+      response = normalizer.extract_urls(nil)
+
+      expect(response).to be_instance_of(Array)
+    end
+  end
+
+  describe ".normalize_url" do
+    it "adds www if host is missing a subdomain" do
+      url = "http://twingly.com/"
+
+      expect(normalizer.normalize_url(url)).to eq("http://www.twingly.com/")
+    end
+
+    it "does not add www if the host has a subdomain" do
+      url = "http://blog.twingly.com/"
+
+      expect(normalizer.normalize_url(url)).to eq(url)
+    end
+
+    it "keeps www if the host already has it" do
+      url = "http://www.twingly.com/"
+
+      expect(normalizer.normalize_url(url)).to eq(url)
+    end
+
+    it "adds a trailing slash if missing" do
+      url = "http://www.twingly.com"
+      expected = "http://www.twingly.com/"
+
+      expect(normalizer.normalize_url(url)).to eq(expected)
+    end
+
+    it "is able to normalize a url without protocol" do
+      url = "www.twingly.com/"
+      expected = "http://www.twingly.com/"
+
+      expect(normalizer.normalize_url(url)).to eq(expected)
+    end
+
+    it "does not return broken URLs" do
+      url = "http://www.twingly."
+
+      expect(normalizer.normalize_url(url)).to eq(nil)
+    end
+
+    it "does not add www. to blogspot blogs" do
+      url = "http://jlchen1026.blogspot.com/"
+
+      expect(normalizer.normalize_url(url)).to eq(url)
+    end
+
+    it "downcases the URL" do
+      url = "http://www.Twingly.com/"
+      expected = url.downcase
+
+      expect(normalizer.normalize_url(url)).to eq(expected)
+    end
+  end
+end
diff --git a/spec/lib/twingly/url/url_spec.rb b/spec/lib/twingly/url/url_spec.rb
@@ -0,0 +1,37 @@
+require "spec_helper"
+
+describe Twingly::URL do
+  describe ".parse" do
+    %w(http://http http:/// http:// http:/ http: htttp a 1 ?).each do |invalid_url|
+      it "handles the invalid url '#{invalid_url}'" do
+        expect { described_class.parse(invalid_url) }.not_to raise_error
+      end
+    end
+
+    describe ".valid?" do
+      %w(ftp://blog.twingly.com/ blablahttp://blog.twingly.com/).each do |invalid_url|
+        it "returns false for non-http and https" do
+          expect(described_class.parse(invalid_url).valid?).to be false
+        end
+      end
+
+      %w(http://blog.twingly.com/ hTTP://blog.twingly.com/ https://blog.twingly.com).each do |valid_url|
+        it "returns true for the valid url '#{valid_url}" do
+          expect(described_class.parse(valid_url).valid?).to be true
+        end
+      end
+    end
+  end
+
+  describe ".validate" do
+    it "returns true for a valid url" do
+      expect(described_class.validate("http://blog.twingly.com/")).to be true
+    end
+
+    %w(http:// feedville.com,2007-06-19:/blends/16171).each do |invalid_url|
+      it "returns false for the invalid url '#{invalid_url}'" do
+        expect(described_class.validate(invalid_url)).to be_falsey
+      end
+    end
+  end
+end
diff --git a/spec/lib/twingly/url/utilities_spec.rb b/spec/lib/twingly/url/utilities_spec.rb
@@ -0,0 +1,80 @@
+require "spec_helper"
+
+describe Twingly::URL::Utilities do
+  describe ".normalize" do
+    it "does not remove scheme from non HTTP(S) URLs" do
+      url = "gopher://www.duh.se/"
+
+      expect(described_class.remove_scheme(url)).to eq(url)
+    end
+
+    it "removes scheme from mixed case HTTP URL" do
+      url = "HttP://www.duh.se/"
+      expected = "//www.duh.se/"
+
+      expect(described_class.remove_scheme(url)).to eq(expected)
+    end
+
+    it "removes scheme from mixed case HTTPS URL" do
+      url = "hTTpS://www.duh.se/"
+      expected = "//www.duh.se/"
+
+      expect(described_class.remove_scheme(url)).to eq(expected)
+    end
+
+    it "removes scheme from lowercase HTTP URL" do
+      url = "http://www.duh.se/"
+      expected = "//www.duh.se/"
+
+      expect(described_class.remove_scheme(url)).to eq(expected)
+    end
+
+    it "removes scheme from lowercase HTTPS URL" do
+      url = "https://www.duh.se/"
+      expected = "//www.duh.se/"
+
+      expect(described_class.remove_scheme(url)).to eq(expected)
+    end
+
+    it "removes scheme from uppercase HTTP URL" do
+      url = "HTTP://WWW.DUH.SE/"
+      expected = "//WWW.DUH.SE/"
+
+      expect(described_class.remove_scheme(url)).to eq(expected)
+    end
+
+    it "removes scheme from uppercase HTTPS URL" do
+      url = "HTTPS://WWW.DUH.SE/"
+      expected = "//WWW.DUH.SE/"
+
+      expect(described_class.remove_scheme(url)).to eq(expected)
+    end
+
+    it "removes scheme from URL with non ASCII characters" do
+      url = "http://www.thecloset.gr/people/bloggers-pick-ιωάννα-τσιγαρίδα"
+      expected = "//www.thecloset.gr/people/bloggers-pick-ιωάννα-τσιγαρίδα"
+
+      expect(described_class.remove_scheme(url)).to eq(expected)
+    end
+
+    it "only removes scheme from HTTP URL" do
+      url = "http://feedjira.herokuapp.com/?url=http://developer.twingly.com/feed.xml"
+      expected = "//feedjira.herokuapp.com/?url=http://developer.twingly.com/feed.xml"
+
+      expect(described_class.remove_scheme(url)).to eq(expected)
+    end
+
+    it "only removes scheme from HTTPS URL" do
+      url = "https://feedjira.herokuapp.com/?url=https://signalvnoise.com/posts.rss"
+      expected = "//feedjira.herokuapp.com/?url=https://signalvnoise.com/posts.rss"
+
+      expect(described_class.remove_scheme(url)).to eq(expected)
+    end
+
+    it "does not remove scheme from non HTTP(S) URLs with parameter" do
+      url = "ftp://ftp.example.com/?url=https://www.example.com/"
+
+      expect(described_class.remove_scheme(url)).to eq(url)
+    end
+  end
+end
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
@@ -0,0 +1,18 @@
+require "twingly/url"
+require "twingly/url/hasher"
+require "twingly/url/normalizer"
+require "twingly/url/utilities"
+
+RSpec.configure do |config|
+  config.expect_with :rspec do |expectations|
+    expectations.include_chain_clauses_in_custom_matcher_descriptions = true
+  end
+
+  config.mock_with :rspec do |mocks|
+    mocks.verify_partial_doubles = true
+  end
+
+  config.order = :random
+
+  Kernel.srand config.seed
+end
diff --git a/test/test_helper.rb b/test/test_helper.rb