Skip to content

Commit

Permalink
[4.0] Upgrade php-stemmer 2.0 (#27568)
Browse files Browse the repository at this point in the history
Refactoring stemmer integration by reducing the seperated classes and
using the stemmer factory to create a stemmer object.
This reduces maintenance because no change in Joomla! is required to
support new languages added by the upstream php-stemmer class
and we reduce complexity in Joomla! code base.
  • Loading branch information
HLeithner authored and wilsonge committed Jan 24, 2020
1 parent ce5e57a commit 66730a3
Show file tree
Hide file tree
Showing 17 changed files with 389 additions and 766 deletions.
59 changes: 50 additions & 9 deletions administrator/components/com_finder/Indexer/Language.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
defined('_JEXEC') or die;

use Joomla\String\StringHelper;
use Wamania\Snowball\NotFoundException;
use Wamania\Snowball\StemmerFactory;
use Wamania\Snowball\Stemmer\Stemmer;

/**
* Language support class for the Finder indexer package.
Expand Down Expand Up @@ -44,6 +47,42 @@ class Language
*/
public $spacer = ' ';

/**
* The stemmer object.
*
* @var Stemmer
* @since 4.0.0
*/
protected $stemmer = null;

/**
* Method to construct the language object.
*
* @since 4.0.0
*/
public function __construct($locale = null)
{
if ($locale !== null)
{
$this->language = $locale;
}

// Use our generic language handler if no language is set
if ($this->language === null)
{
$this->language = '*';
}

try
{
$this->stemmer = StemmerFactory::create($this->language);
}
catch (NotFoundException $e)
{
// We don't have a stemmer for the language
}
}

/**
* Method to get a language support object.
*
Expand All @@ -60,24 +99,21 @@ public static function getInstance($language)
return self::$instances[$language];
}

if ($language == '*')
{
self::$instances[$language] = new self;
$locale = '*';

return self::$instances[$language];
if ($language !== '*')
{
$locale = Helper::getPrimaryLanguage($language);
$class = '\\Joomla\\Component\\Finder\\Administrator\\Indexer\\Language\\' . ucfirst($locale);
}

$locale = Helper::getPrimaryLanguage($language);
$class = '\\Joomla\\Component\\Finder\\Administrator\\Indexer\\Language\\' . ucfirst($locale);

if (class_exists($class))
{
self::$instances[$language] = new $class;
}
else
{
self::$instances[$language] = new self;
self::$instances[$language]->language = $locale;
self::$instances[$language] = new self($locale);
}

return self::$instances[$language];
Expand Down Expand Up @@ -137,6 +173,11 @@ public function tokenise($input)
*/
public function stem($token)
{
if ($this->stemmer !== null)
{
return $this->stemmer->stem($token);
}

return $token;
}
}
62 changes: 0 additions & 62 deletions administrator/components/com_finder/Indexer/Language/Da.php

This file was deleted.

62 changes: 0 additions & 62 deletions administrator/components/com_finder/Indexer/Language/De.php

This file was deleted.

12 changes: 11 additions & 1 deletion administrator/components/com_finder/Indexer/Language/El.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,23 @@ class El extends Language
*/
public $language = 'el';

/**
* Method to construct the language object.
*
* @since 4.0.0
*/
public function __construct($locale = null)
{
// Override parent constructor since we don't need to load an externel stemmer
}

/**
* Method to tokenise a text string. It takes into account the odd punctuation commonly used in Greek text, mapping
* it to ASCII punctuation.
*
* Reference: http://www.teicrete.gr/users/kutrulis/Glosika/Stixi.htm
*
* @param string $input The input to tokenise.
* @param string $input The input to tokenise.
*
* @return array An array of term strings.
*
Expand Down
62 changes: 0 additions & 62 deletions administrator/components/com_finder/Indexer/Language/En.php

This file was deleted.

62 changes: 0 additions & 62 deletions administrator/components/com_finder/Indexer/Language/Es.php

This file was deleted.

Loading

0 comments on commit 66730a3

Please sign in to comment.