From fae4486abc62ca12c20fff6ad1a3937feaec3cc0 Mon Sep 17 00:00:00 2001 From: Felipe Koch Date: Thu, 8 Apr 2010 12:55:49 -0300 Subject: [PATCH] Added Yahoo! BOSS, requires :api_key param --- lib/ruby-web-search.rb | 179 +++++++++++++++++- ...-search.gemspec => ruby-web-search.gemspec | 0 2 files changed, 177 insertions(+), 2 deletions(-) rename ruby-search.gemspec => ruby-web-search.gemspec (100%) diff --git a/lib/ruby-web-search.rb b/lib/ruby-web-search.rb index 2e239aa..4da6d7d 100644 --- a/lib/ruby-web-search.rb +++ b/lib/ruby-web-search.rb @@ -1,6 +1,6 @@ require 'rubygems' -require 'CGI' -require 'JSON' +require 'cgi' +require 'json' # begin # gem 'curb' @@ -190,5 +190,180 @@ def limit(req_size) end #of Response end #of Google + + class Yahoo + + def self.search(options={}) + query = ::RubyWebSearch::Yahoo::Query.new(options) + query.execute + end + + def self.unthreaded_search(options={}) + query = ::RubyWebSearch::Yahoo::Query.new(options) + query.execute_unthreaded + end + + class Query + attr_accessor :query, :start_index, :result_size, :filter, :country_code, :language_code + attr_accessor :safe_search, :type, :custom_search_engine_id, :version, :referer, :request_url + attr_accessor :size, :cursor, :custom_request_url, :response, :api_key + + class Error < StandardError; end + + SEARCH_BASE_URLS = { :web => "http://boss.yahooapis.com/ysearch/web", + :news => "http://boss.yahooapis.com/ysearch/news", + :image => "http://boss.yahooapis.com/ysearch/images", + } + + # + # You can overwrite the query building process by passing the request url to use. + # + # ==== Params + # query + # api_key + # start_index + # result_size small or large (4 or 8 results) default: small + # filter + # country_code 2 letters language code for the country you want + # to limit to + # language_code (Web only) + # safe_search active, moderate or off. Default: active (web only) + # custom_search_engine_id optional argument supplying the unique id for + # the Custom Search Engine that should be used for the request (e.g., 000455696194071821846:reviews). + # (web only) + # + def initialize(options={}) + if options[:custom_request_url] + @custom_request_url = options[:request_url] + else + @query = options[:query] + raise Yahoo::Query::Error, "You need to pass a query" unless @query + @cursor = options[:start_index] || 0 + @result_size = options[:result_size] + @filter = options[:filter] + @type = options[:type] || :web + @country_code = options[:country_code] + @language_code = options[:language_code] + @safe_search = options[:safe_search] + @custom_search_engine_id = options[:custom_search_engine_id] + @version = options[:version] || "1" + @referer = options[:referer] || "http://github.com/mattetti/" + @api_key = options[:api_key] + raise Yahoo::Query::Error, "You need to pass an api key" unless @api_key + @size = options[:size] || 5 + @result_size = "large" if size > 5 # increase the result set size to avoid making too many requests + @size = 10 if (@result_size == "large" && size < 10) + end + @response ||= Response.new(:query => (query || custom_request_url), :size => size) + end + + def build_request + if custom_request_url + custom_request_url + else + @request_url = "#{SEARCH_BASE_URLS[type]}/v#{version}/#{CGI.escape(query)}" + @request_url << "?appid=#{api_key}" + @request_url << "&count=#{result_size}" if result_size + @request_url << "&start=#{cursor}" if cursor > 0 + @request_url << "&lang=#{language_code}®ion=#{country_code}" if language_code && country_code + + puts request_url if $RUBY_WEB_SEARCH_DEBUG + request_url + end + end + + def build_requests + if custom_request_url + requests = [custom_request_url] + else + requests = [] + # limiting to 10 responses per request + (size / 10.to_f).ceil.times do |n| + url = "#{SEARCH_BASE_URLS[type]}/v#{version}/#{CGI.escape(query)}" + url << "?appid=#{api_key}" + url << "&count=#{result_size}" if result_size + url << "&lang=#{language_code}®ion=#{country_code}" if language_code && country_code + url << "&start=#{cursor}" if cursor > 0 + @cursor += 10 + requests << url + end + + puts requests.inspect if $RUBY_WEB_SEARCH_DEBUG + requests + end + end + + # Makes the request to Google + # if a larger set was requested than what is returned, + # more requests are made until the correct amount is available + def execute_unthreaded + @curl_request ||= ::Curl::Easy.new(){ |curl| curl.headers["Referer"] = referer } + @curl_request.url = build_request + @curl_request.perform + results = JSON.load(@curl_request.body_str) + + response.process(results) + @cursor = response.results.size - 1 + if ((cursor + 1) < size && custom_request_url.nil?) + puts "cursor: #{cursor} requested results size: #{size}" if $RUBY_WEB_SEARCH_DEBUG + execute_unthreaded + else + response.limit(size) + end + end + + # Makes the request to Google + # if a larger set was requested than what is returned, + # more requests are made until the correct amount is available + def execute + threads = build_requests.map do |req| + Thread.new do + curl_request = ::Curl::Easy.new(req){ |curl| curl.headers["Referer"] = referer } + curl_request.perform + JSON.load(curl_request.body_str) + end + end + threads.each do |t| + response.process(t.value) + end + response.limit(size) + end + + end #of Query + + + class Response + attr_reader :results, :status, :query, :size, :estimated_result_count + def initialize(google_raw_response={}) + process(google_raw_response) unless google_raw_response.empty? + end + + def process(google_raw_response={}) + @query ||= google_raw_response[:query] + @size ||= google_raw_response[:size] + @results ||= [] + @status = google_raw_response["ysearchresponse"]["responsecode"].to_i if google_raw_response["ysearchresponse"] + if google_raw_response["ysearchresponse"] && google_raw_response["ysearchresponse"]["resultset_web"] && status && status == 200 + estimated_result_count ||= google_raw_response["ysearchresponse"]["totalhits"] + @results += google_raw_response["ysearchresponse"]["resultset_web"].map do |r| + { + :title => r["title"], + :url => r["clickurl"], + :cache_url => r["cacheUrl"], + :content => r["abstract"], + :domain => r["url"] + } + end + end + + def limit(req_size) + @results = @results[0...req_size] + self + end + + end + end #of Response + + end #of Yahoo end \ No newline at end of file diff --git a/ruby-search.gemspec b/ruby-web-search.gemspec similarity index 100% rename from ruby-search.gemspec rename to ruby-web-search.gemspec