feat: guess part of speech

vpukhanov · Jul 28, 2024 · 7ca4260 · 7ca4260
1 parent cd2528d
commit 7ca4260
Show file tree

Hide file tree

Showing 8 changed files with 109 additions and 6 deletions.
diff --git a/.rubocop.yml b/.rubocop.yml
@@ -14,5 +14,11 @@ Rails/RefuteMethods:
     - "spec/**/*"
 
 Style/StringLiterals:
+  Enabled: true
+  EnforcedStyle: double_quotes
   Include:
+    - "app/**/*"
+    - "config/**/*"
+    - "lib/**/*"
     - "spec/**/*"
+    - "Gemfile"
diff --git a/Gemfile b/Gemfile
@@ -47,6 +47,9 @@ gem "bootsnap", require: false
 # Use Active Storage variants [https://guides.rubyonrails.org/active_storage_overview.html#transforming-images]
 # gem "image_processing", "~> 1.2"
 
+# Classify terms' part-of-speech
+gem "engtagger"
+
 group :development, :test do
   # See https://guides.rubyonrails.org/debugging_rails_applications.html#debugging-with-the-debug-gem
   gem "debug", platforms: %i[ mri windows ]

diff --git a/Gemfile.lock b/Gemfile.lock
@@ -105,6 +105,8 @@ GEM
       reline (>= 0.3.8)
     diff-lcs (1.5.1)
     drb (2.2.1)
+    engtagger (0.4.1)
+      lru_redux
     erubi (1.13.0)
     globalid (1.2.1)
       activesupport (>= 6.1)
@@ -128,6 +130,7 @@ GEM
     loofah (2.22.0)
       crass (~> 1.0.2)
       nokogiri (>= 1.12.0)
+    lru_redux (1.1.0)
     mail (2.8.1)
       mini_mime (>= 0.1.1)
       net-imap
@@ -343,6 +346,7 @@ DEPENDENCIES
   bootsnap
   capybara
   debug
+  engtagger
   importmap-rails
   jbuilder
   puma (>= 5.0)

diff --git a/app/assets/stylesheets/application.tailwind.css b/app/assets/stylesheets/application.tailwind.css
@@ -6,4 +6,12 @@
   a {
     @apply text-blue-600 hover:text-blue-800;
   }
+
+  .tooltip {
+    @apply invisible absolute;
+  }
+
+  .has-tooltip:hover .tooltip {
+    @apply visible z-50;
+  }
 }
diff --git a/app/models/term.rb b/app/models/term.rb
@@ -6,6 +6,7 @@ class Term
   attribute :definition, :string
   attribute :example, :string
   attribute :author, :string
+  attribute :part_of_speech, :string
 
   def initialize(attributes = {})
     super

diff --git a/app/services/part_of_speech_classifier.rb b/app/services/part_of_speech_classifier.rb
@@ -0,0 +1,69 @@
+require "engtagger"
+
+class PartOfSpeechClassifier
+  POS_TAG_HUMANIZED = {
+    "nn" => "noun",
+    "nns" => "noun",
+    "nnp" => "noun",
+    "nnps" => "noun",
+    "vb" => "verb",
+    "vbd" => "verb",
+    "vbg" => "verb",
+    "vbn" => "verb",
+    "vbp" => "verb",
+    "vbz" => "verb",
+    "jj" => "adjective",
+    "jjr" => "adjective",
+    "jjs" => "adjective",
+    "rb" => "adverb",
+    "rbr" => "adverb",
+    "rbs" => "adverb",
+    "in" => "preposition",
+    "cc" => "conjugation",
+    "dt" => "determiner",
+    "cd" => "number",
+    "prp" => "pronoun",
+    "prp$" => "pronoun",
+    "rp" => "particle",
+    "fw" => "misc",
+    "sym" => "misc",
+    "." => "punctuation"
+  }
+
+  def self.classify(word, definition)
+    new.classify(word, definition)
+  end
+
+  def initialize
+    @tagger = EngTagger.new
+  end
+
+  def classify(word, definition)
+    return nil unless single_word?(word)
+
+    # Tag the word alone first
+    word_tag = @tagger.add_tags(word)
+    word_pos = word_tag.scan(/<(\w+)>/).flatten.first
+
+    # If the word alone doesn't give a clear POS, use the definition for context
+    if word_pos == "nn" || word_pos == "nnp"
+      text = "#{word} #{definition}"
+      tagged = @tagger.add_tags(text)
+      pos = tagged.scan(/<(\w+)>/).flatten.first
+    else
+      pos = word_pos
+    end
+
+    humanize_tag pos
+  end
+
+  private
+
+  def single_word?(word)
+    word.split.size == 1
+  end
+
+  def humanize_tag(tag)
+    POS_TAG_HUMANIZED[tag] || "misc."
+  end
+end
diff --git a/app/services/term_fetcher.rb b/app/services/term_fetcher.rb
@@ -8,14 +8,17 @@ def initialize(word)
   end
 
   def fetch
-    definitions = UrbanDictionaryService.define(@word)
-    first_definition = definitions.first
+    all_definitions = UrbanDictionaryService.define(@word)
+    definition = all_definitions.first
+
+    part_of_speech = PartOfSpeechClassifier.classify(definition["word"], definition["definition"])
 
     Term.new(
-      word: first_definition["word"],
-      definition: first_definition["definition"],
-      example: first_definition["example"],
-      author: first_definition["author"]
+      word: definition["word"],
+      definition: definition["definition"],
+      example: definition["example"],
+      author: definition["author"],
+      part_of_speech: part_of_speech
     )
   end
 end
diff --git a/app/views/terms/show.html.erb b/app/views/terms/show.html.erb
@@ -1,6 +1,15 @@
 <div class="max-w-3xl mx-auto py-12">
   <article>
     <h1 class="text-5xl font-bold"><%= @term.word %></h1>
+    <% if @term.part_of_speech.present? %>
+      <p class="text-xl italic mt-2">
+        <%= @term.part_of_speech %>
+        <span class="has-tooltip inline-block ml-2 not-italic">
+          <span class="tooltip rounded shadow-lg py-2 px-4 bg-gray-100 text-gray-800 mt-8">Our best guess</span>
+          <i class="fa-solid fa-wand-magic-sparkles"></i>
+        </span>
+      </p>
+    <% end %>
 
     <% if @term.definition.present? %>
       <p class="text-xl leading-relaxed mt-8">