Skip to content

Commit

Permalink
feat: guess part of speech
Browse files Browse the repository at this point in the history
  • Loading branch information
vpukhanov committed Jul 28, 2024
1 parent cd2528d commit 7ca4260
Show file tree
Hide file tree
Showing 8 changed files with 109 additions and 6 deletions.
6 changes: 6 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,11 @@ Rails/RefuteMethods:
- "spec/**/*"

Style/StringLiterals:
Enabled: true
EnforcedStyle: double_quotes
Include:
- "app/**/*"
- "config/**/*"
- "lib/**/*"
- "spec/**/*"
- "Gemfile"
3 changes: 3 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ gem "bootsnap", require: false
# Use Active Storage variants [https://guides.rubyonrails.org/active_storage_overview.html#transforming-images]
# gem "image_processing", "~> 1.2"

# Classify terms' part-of-speech
gem "engtagger"

group :development, :test do
# See https://guides.rubyonrails.org/debugging_rails_applications.html#debugging-with-the-debug-gem
gem "debug", platforms: %i[ mri windows ]
Expand Down
4 changes: 4 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ GEM
reline (>= 0.3.8)
diff-lcs (1.5.1)
drb (2.2.1)
engtagger (0.4.1)
lru_redux
erubi (1.13.0)
globalid (1.2.1)
activesupport (>= 6.1)
Expand All @@ -128,6 +130,7 @@ GEM
loofah (2.22.0)
crass (~> 1.0.2)
nokogiri (>= 1.12.0)
lru_redux (1.1.0)
mail (2.8.1)
mini_mime (>= 0.1.1)
net-imap
Expand Down Expand Up @@ -343,6 +346,7 @@ DEPENDENCIES
bootsnap
capybara
debug
engtagger
importmap-rails
jbuilder
puma (>= 5.0)
Expand Down
8 changes: 8 additions & 0 deletions app/assets/stylesheets/application.tailwind.css
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,12 @@
a {
@apply text-blue-600 hover:text-blue-800;
}

.tooltip {
@apply invisible absolute;
}

.has-tooltip:hover .tooltip {
@apply visible z-50;
}
}
1 change: 1 addition & 0 deletions app/models/term.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class Term
attribute :definition, :string
attribute :example, :string
attribute :author, :string
attribute :part_of_speech, :string

def initialize(attributes = {})
super
Expand Down
69 changes: 69 additions & 0 deletions app/services/part_of_speech_classifier.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
require "engtagger"

class PartOfSpeechClassifier
POS_TAG_HUMANIZED = {
"nn" => "noun",
"nns" => "noun",
"nnp" => "noun",
"nnps" => "noun",
"vb" => "verb",
"vbd" => "verb",
"vbg" => "verb",
"vbn" => "verb",
"vbp" => "verb",
"vbz" => "verb",
"jj" => "adjective",
"jjr" => "adjective",
"jjs" => "adjective",
"rb" => "adverb",
"rbr" => "adverb",
"rbs" => "adverb",
"in" => "preposition",
"cc" => "conjugation",
"dt" => "determiner",
"cd" => "number",
"prp" => "pronoun",
"prp$" => "pronoun",
"rp" => "particle",
"fw" => "misc",
"sym" => "misc",
"." => "punctuation"
}

def self.classify(word, definition)
new.classify(word, definition)
end

def initialize
@tagger = EngTagger.new
end

def classify(word, definition)
return nil unless single_word?(word)

# Tag the word alone first
word_tag = @tagger.add_tags(word)
word_pos = word_tag.scan(/<(\w+)>/).flatten.first

# If the word alone doesn't give a clear POS, use the definition for context
if word_pos == "nn" || word_pos == "nnp"
text = "#{word} #{definition}"
tagged = @tagger.add_tags(text)
pos = tagged.scan(/<(\w+)>/).flatten.first
else
pos = word_pos
end

humanize_tag pos
end

private

def single_word?(word)
word.split.size == 1
end

def humanize_tag(tag)
POS_TAG_HUMANIZED[tag] || "misc."
end
end
15 changes: 9 additions & 6 deletions app/services/term_fetcher.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@ def initialize(word)
end

def fetch
definitions = UrbanDictionaryService.define(@word)
first_definition = definitions.first
all_definitions = UrbanDictionaryService.define(@word)
definition = all_definitions.first

part_of_speech = PartOfSpeechClassifier.classify(definition["word"], definition["definition"])

Term.new(
word: first_definition["word"],
definition: first_definition["definition"],
example: first_definition["example"],
author: first_definition["author"]
word: definition["word"],
definition: definition["definition"],
example: definition["example"],
author: definition["author"],
part_of_speech: part_of_speech
)
end
end
9 changes: 9 additions & 0 deletions app/views/terms/show.html.erb
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
<div class="max-w-3xl mx-auto py-12">
<article>
<h1 class="text-5xl font-bold"><%= @term.word %></h1>
<% if @term.part_of_speech.present? %>
<p class="text-xl italic mt-2">
<%= @term.part_of_speech %>
<span class="has-tooltip inline-block ml-2 not-italic">
<span class="tooltip rounded shadow-lg py-2 px-4 bg-gray-100 text-gray-800 mt-8">Our best guess</span>
<i class="fa-solid fa-wand-magic-sparkles"></i>
</span>
</p>
<% end %>

<% if @term.definition.present? %>
<p class="text-xl leading-relaxed mt-8">
Expand Down

0 comments on commit 7ca4260

Please sign in to comment.