Skip to content

Commit

Permalink
Introduce Homonyms
Browse files Browse the repository at this point in the history
This concept has been introduced in order to avoid
duplicate function definitions.
  • Loading branch information
kevin-johnson-shopify committed Dec 6, 2019
1 parent 6c384f2 commit a5c3c69
Show file tree
Hide file tree
Showing 4 changed files with 249 additions and 45 deletions.
Binary file modified gherkin-languages.terms
Binary file not shown.
5 changes: 5 additions & 0 deletions lib/mix/gherkin_languages.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ defmodule Mix.Tasks.GherkinLanguages do
use Mix.Task
alias Gherkin.Scanner.LanguageSupport
@shortdoc "Converts `gherkin-languages.json` to pallatable format"
def run(["--no-save"]) do
LanguageSupport.parse()
|> IO.inspect(limit: :infinity)
end

def run(_) do
LanguageSupport.unload()
end
Expand Down
210 changes: 182 additions & 28 deletions lib/scanner/lib/language_supports.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2,60 +2,214 @@ defmodule Gherkin.Scanner.LanguageSupport do
@gherkin_languages "gherkin-languages"
@gherkin_languages_source "#{@gherkin_languages}.json"
@gherkin_languages_resource "#{@gherkin_languages}.terms"
@homonyms ["Агар ", "* ", "अनी ", "Tha ", "Þá ", "Ða ", "Þa "]
@moduledoc_homonyms @homonyms |> Enum.map(&(" * '#{&1}'")) |> Enum.join("\n")

@moduledoc """
Normalizes each entry under '#{@gherkin_languages_source}' to
following template:
The main purpose of this module is to facilitate full international
language support by normalizing each entry under
'#{@gherkin_languages_source}' to the following format:
```elixir
%{
feature: [],
rule: [],
background: [],
scenario_outline: [],
example: [],
given: [],
when: [],
then: [],
but: [],
and: [],
examples: [],
# Top Level Gherkin Keywords
feature: ["Feature", "Business Need", "Ability"],
rule: ["Rule"],
background: ["Background"],
scenario_outline: ["Scenario Outline", "Scenario Template"],
example: ["Example", "Scenario"],
examples: ["Examples", "Scenarios"],
# Step Level Gherkin Keywords
given: ["Given "],
when: ["When "],
then: ["Then "],
and: ["And "],
but: ["But "],
# Meta
name: "English",
native: "English",
direction: :ltr,
name: "",
native: "",
homonyms: %{
"* " => %{
given: :when,
when: :then
then: :and,
and: :and,
but: :but,
default: :given,
}
},
}
and persists the same as '#{@gherkin_languages_resource}'
```
and persisting the same as '#{@gherkin_languages_resource}'.
The `# Meta` section comprises of the keys `:name` and `:native` which
are part and parcel of the #{@gherkin_languages_source} standard. The
other keys are newly introduced:
* `:direction` is to designate if it pertains a `:ltr`(Left to Right)
or `:rtl` (Right to Left) language. This can be derived thanks to
the contents under `:native`.
* `:homonyms` represent the various keywords that are the same
accross languages, such as "* " to mean any of the Step Level Gherkin
Keywords or within a language, such as `"Tha "` for old English to
mean `When` and `Then`. Currently the `homonyms` existent are:
#{@moduledoc_homonyms}
Each homonym has a sequence of keywords that it can logically revolve
to. For the above sample presented, this would mean that the
following feature:
```cucumber
Feature: Some Feature
Scenario: Some Scenario
* A
* B
* C
* D
```
could be interpreted as:
```cucumber
Feature: Some Feature
Scenario: Some Scenario
Given A
When B
Then C
And D
```
"""
def gherkin_languages_source, do: @gherkin_languages_source
def gherkin_languages_resource, do: @gherkin_languages_resource

@doc """
Convenience function that provides the contents under the resource:
'#{@gherkin_languages_resource}'
"""
def all, do: load()

@doc """
Saves parsed content to: '#{@gherkin_languages_resource}' in `binary`
format.
"""
def unload do
content = parse() |> :erlang.term_to_binary()
File.write!(@gherkin_languages_resource, content)
end

@doc """
Loads: '#{@gherkin_languages_resource}' as Erlang compatible `terms`.
"""
def load do
@gherkin_languages_resource
|> File.read!()
|> :erlang.binary_to_term()
end

@doc """
Parses the content of '#{@gherkin_languages_source}' into the desired
format.
"""
def parse do
@gherkin_languages_source
|> File.read!()
|> :jiffy.decode([:return_maps, :copy_strings])
|> Enum.reduce(%{}, fn {language, translations}, a ->
normalized_translations =
Enum.reduce(translations, %{}, fn {key, val}, a ->
Map.put(a, handle_key(key), val)
{%{homonyms: homonyms}, remainder} =
Enum.reduce(translations, %{}, fn
{"name", val}, a -> Map.put(a, :name, val)
{"native", val}, a -> Map.put(a, :native, val)
{key, vals}, a -> normalized_key = handle_key(key)
{homonyms, remainder} = vals
|> Enum.uniq
|> seperate_out_homonyms(@homonyms)

a
|> Map.put(normalized_key, remainder)
|> put_in([Access.key(:homonyms, %{}), normalized_key], homonyms)
end)
|> Map.put(:direction, :ltr)
|> Map.split([:homonyms])

normalized_homonyms =
homonyms
|> Enum.reduce(%{}, fn
{_, :none}, a -> a
{keyword, homonyms_for_keyword}, a -> Enum.reduce(homonyms_for_keyword, a, fn homonym, a ->
put_in(a, [Access.key(homonym, %{}), keyword], next_keyword(keyword, homonym, homonyms))
end)
end)
|> Enum.reduce(%{}, fn {homonym, keywords_sequence}, a ->
default_homonym = cond do
keywords_sequence[:given] -> :given
keywords_sequence[:when] -> :when
keywords_sequence[:then] -> :then
keywords_sequence[:and] -> :and
keywords_sequence[:but] -> :but
true -> raise "Developer Error. Keywords Sequence Has No Members"
end

put_in(a, [Access.key(homonym, keywords_sequence), :default], default_homonym)
end)

normalized_translations = Map.put(remainder, :homonyms, normalized_homonyms)
Map.put(a, language, normalized_translations)
end)
end

def unload do
content = parse() |> :erlang.term_to_binary()
File.write!(@gherkin_languages_resource, content)
defp seperate_out_homonyms(words, homonyms) do
words
|> Enum.split_with(fn e -> e in homonyms end)
|> case do
{[], ^words} -> {:none, words}
paritioned_result -> paritioned_result
end
end

def load do
@gherkin_languages_resource
|> File.read!()
|> :erlang.binary_to_term()
defp next_keyword(:given, homonym, homonyms) do
cond do
homonym in (homonyms[:when] || []) -> :when
homonym in (homonyms[:then] || []) -> :then
homonym in (homonyms[:and] || []) -> :and
true -> :given
end
end

defp next_keyword(:when, homonym, homonyms) do
cond do
homonym in (homonyms[:then] || []) -> :then
homonym in (homonyms[:and] || []) -> :and
true -> :given
end
end

defp next_keyword(:then, homonym, homonyms) do
cond do
homonym in (homonyms[:and] || []) -> :and
true -> :given
end
end

defp next_keyword(:and, homonym, homonyms) do
cond do
homonym in (homonyms[:and] || []) -> :and
true -> :given
end
end

defp next_keyword(:but, homonym, homonyms) do
cond do
homonym in (homonyms[:but] || []) -> :but
homonym in (homonyms[:and] || []) -> :and
true -> :given
end
end

defp handle_key("scenarioOutline"), do: :scenario_outline
defp handle_key("scenario"), do: :example
defp handle_key(key), do: String.to_atom(key)

def all, do: load()
end
79 changes: 62 additions & 17 deletions lib/scanner/scanner.ex
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ defmodule Gherkin.Scanner do
end

@languages Gherkin.Scanner.LanguageSupport.all()
# @languages []

Enum.each(@languages, fn {language,
%{
Expand All @@ -71,8 +72,34 @@ defmodule Gherkin.Scanner do
but: but_phrasals,
and: and_phrasals,
examples: examples_phrasals,
direction: language_direction
direction: language_direction,
homonyms: homonym_phrasals,
}} ->

Enum.each(homonym_phrasals, fn {phrasal, next_in_sequence_lookup} ->
{%{default: default_homonym}, next_in_sequence_lookup} = Map.split(next_in_sequence_lookup, [:default])

def map_to_token(
unquote(language),
<<unquote(phrasal), rest::binary>>,
index,
column,
context = %Context{}
) do
{:token, prev_keyword, _, _, _} = Context.peek(context)

unquote(Macro.escape(next_in_sequence_lookup))
|> Map.get(prev_keyword, unquote(default_homonym))
|> case do
:given -> handle_given(unquote(language_direction), unquote(phrasal), rest, index, column, context)
:when -> handle_when(unquote(language_direction), unquote(phrasal), rest, index, column, context)
:then -> handle_then(unquote(language_direction), unquote(phrasal), rest, index, column, context)
:and -> handle_and(unquote(language_direction), unquote(phrasal), rest, index, column, context)
:but -> handle_but(unquote(language_direction), unquote(phrasal), rest, index, column, context)
end
end
end)

Enum.each(feature_phrasals, fn phrasal ->
def map_to_token(
unquote(language),
Expand Down Expand Up @@ -123,38 +150,34 @@ defmodule Gherkin.Scanner do
column,
context = %Context{}
) do
_language_direction = unquote(language_direction)

token = Token.given(index, column, unquote(phrasal), rest)
{token, Context.push(context, token)}
handle_given(unquote(language_direction), unquote(phrasal), rest, index, column, context)
end
end)

Enum.each(when_phrasals, fn phrasal ->
# IO.puts(":when, #{language}, #{phrasal}")
def map_to_token(
unquote(language),
<<unquote(phrasal), rest::binary>>,
index,
column,
context = %Context{}
) do
_language_direction = unquote(language_direction)
token = Token._when(index, column, unquote(phrasal), rest)
{token, Context.push(context, token)}

handle_when(unquote(language_direction), unquote(phrasal), rest, index, column, context)
end
end)

Enum.each(then_phrasals, fn phrasal ->
# IO.puts(":then, #{language}, #{phrasal}")
def map_to_token(
unquote(language),
<<unquote(phrasal), rest::binary>>,
index,
column,
context = %Context{}
) do
_language_direction = unquote(language_direction)
token = Token.then(index, column, unquote(phrasal), rest)
{token, Context.push(context, token)}
handle_then(unquote(language_direction), unquote(phrasal), rest, index, column, context)
end
end)

Expand All @@ -166,9 +189,7 @@ defmodule Gherkin.Scanner do
column,
context = %Context{}
) do
_language_direction = unquote(language_direction)
token = Token.but(index, column, unquote(phrasal), rest)
{token, Context.push(context, token)}
handle_but(unquote(language_direction), unquote(phrasal), rest, index, column, context)
end
end)

Expand All @@ -180,9 +201,7 @@ defmodule Gherkin.Scanner do
column,
context = %Context{}
) do
_language_direction = unquote(language_direction)
token = Token._and(index, column, unquote(phrasal), rest)
{token, Context.push(context, token)}
handle_and(unquote(language_direction), unquote(phrasal), rest, index, column, context)
end
end)

Expand Down Expand Up @@ -376,6 +395,32 @@ defmodule Gherkin.Scanner do
{Token.comment(index, column, "#", line_with_white_spaces_at_end_preserved), context}
end

def handle_given(_language_direction, phrasal, rest, index, column, context) do
token = Token.given(index, column, phrasal, rest)
{token, Context.push(context, token)}
end

def handle_when(_language_direction, phrasal, rest, index, column, context) do
token = Token._when(index, column, phrasal, rest)
{token, Context.push(context, token)}
end

def handle_then(_language_direction, phrasal, rest, index, column, context) do
token = Token.then(index, column, phrasal, rest)
{token, Context.push(context, token)}
end

def handle_and(_language_direction, phrasal, rest, index, column, context) do
token = Token._and(index, column, phrasal, rest)
{token, Context.push(context, token)}
end

def handle_but(_language_direction, phrasal, rest, index, column, context) do
token = Token.but(index, column, phrasal, rest)
{token, Context.push(context, token)}
end


defp handle_plain_text("", index, column) do
Token.empty(index, column)
end
Expand Down

0 comments on commit a5c3c69

Please sign in to comment.