Skip to content

Commit 6e3c898

Browse files
authored
Merge pull request #5 from amaccis/fix_stemmer_utf8
Fix utf8 issue, stemming portuguese
2 parents 6bba977 + c4a20d8 commit 6e3c898

File tree

3 files changed

+6
-7
lines changed

3 files changed

+6
-7
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ jobs:
1010
build:
1111
runs-on: ubuntu-latest
1212
container:
13-
image: amaccis/php-libstemmer:2.1.0
13+
image: amaccis/php-libstemmer:2.2.0
1414
steps:
1515
- name: Checkout
1616
uses: actions/checkout@v2

src/Stemmer.php

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,11 @@ class Stemmer implements StemmerInterface
1717

1818
private CData $stemmer;
1919

20-
private string $algorithm;
21-
2220
public function __construct(string $algorithm)
2321
{
2422

2523
$this->libstemmer = new Libstemmer();
2624
$this->stemmer = $this->libstemmer->sbStemmerNew($algorithm);
27-
$this->algorithm = $algorithm;
2825

2926
}
3027

@@ -48,7 +45,7 @@ public static function algorithms(): array
4845
public function stemWord(string $word): string
4946
{
5047

51-
$stem = $this->libstemmer->sbStemmerStem($this->stemmer, utf8_encode($word));
48+
$stem = $this->libstemmer->sbStemmerStem($this->stemmer, $word);
5249
$size = $this->libstemmer->sbStemmerLength($this->stemmer);
5350

5451
return $this->libstemmer->toString($stem, $size);

tests/StemmerTest.php

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ public function stemWordProvider(): array
4141

4242
return [
4343
['english', 'cycling', 'cycl'],
44-
['italian', 'camminare', 'cammin']
44+
['italian', 'camminare', 'cammin'],
45+
['portuguese', 'atribuição', 'atribuiçã']
4546
];
4647

4748
}
@@ -66,7 +67,8 @@ public function stemWordsProvider(): array
6667

6768
return [
6869
['english', ['cycling', 'doors'], ['cycl', 'door']],
69-
['italian', ['camminare', 'porte'], ['cammin', 'port']]
70+
['italian', ['camminare', 'porte'], ['cammin', 'port']],
71+
['portuguese', ['atribuição', 'obrigações'], ['atribuiçã', 'obrig']]
7072
];
7173

7274
}

0 commit comments

Comments
 (0)