3737import org .apache .lucene .analysis .cjk .CJKAnalyzer ;
3838import org .apache .lucene .analysis .cjk .CJKBigramFilter ;
3939import org .apache .lucene .analysis .cjk .CJKWidthFilter ;
40+ import org .apache .lucene .analysis .ckb .SoraniAnalyzer ;
4041import org .apache .lucene .analysis .ckb .SoraniNormalizationFilter ;
4142import org .apache .lucene .analysis .commongrams .CommonGramsFilter ;
4243import org .apache .lucene .analysis .core .DecimalDigitFilter ;
5253import org .apache .lucene .analysis .de .GermanAnalyzer ;
5354import org .apache .lucene .analysis .de .GermanNormalizationFilter ;
5455import org .apache .lucene .analysis .de .GermanStemFilter ;
56+ import org .apache .lucene .analysis .el .GreekAnalyzer ;
5557import org .apache .lucene .analysis .en .EnglishAnalyzer ;
5658import org .apache .lucene .analysis .en .KStemFilter ;
5759import org .apache .lucene .analysis .en .PorterStemFilter ;
60+ import org .apache .lucene .analysis .es .SpanishAnalyzer ;
5861import org .apache .lucene .analysis .eu .BasqueAnalyzer ;
62+ import org .apache .lucene .analysis .fa .PersianAnalyzer ;
5963import org .apache .lucene .analysis .fa .PersianNormalizationFilter ;
6064import org .apache .lucene .analysis .fi .FinnishAnalyzer ;
6165import org .apache .lucene .analysis .fr .FrenchAnalyzer ;
66+ import org .apache .lucene .analysis .ga .IrishAnalyzer ;
6267import org .apache .lucene .analysis .gl .GalicianAnalyzer ;
68+ import org .apache .lucene .analysis .hi .HindiAnalyzer ;
6369import org .apache .lucene .analysis .hi .HindiNormalizationFilter ;
70+ import org .apache .lucene .analysis .hu .HungarianAnalyzer ;
6471import org .apache .lucene .analysis .hy .ArmenianAnalyzer ;
72+ import org .apache .lucene .analysis .id .IndonesianAnalyzer ;
6573import org .apache .lucene .analysis .in .IndicNormalizationFilter ;
74+ import org .apache .lucene .analysis .it .ItalianAnalyzer ;
75+ import org .apache .lucene .analysis .lt .LithuanianAnalyzer ;
76+ import org .apache .lucene .analysis .lv .LatvianAnalyzer ;
6677import org .apache .lucene .analysis .miscellaneous .ASCIIFoldingFilter ;
6778import org .apache .lucene .analysis .miscellaneous .DisableGraphAttribute ;
6879import org .apache .lucene .analysis .miscellaneous .KeywordRepeatFilter ;
7990import org .apache .lucene .analysis .ngram .NGramTokenFilter ;
8091import org .apache .lucene .analysis .ngram .NGramTokenizer ;
8192import org .apache .lucene .analysis .nl .DutchAnalyzer ;
93+ import org .apache .lucene .analysis .no .NorwegianAnalyzer ;
8294import org .apache .lucene .analysis .path .PathHierarchyTokenizer ;
8395import org .apache .lucene .analysis .pattern .PatternTokenizer ;
8496import org .apache .lucene .analysis .payloads .DelimitedPayloadTokenFilter ;
8597import org .apache .lucene .analysis .payloads .TypeAsPayloadTokenFilter ;
98+ import org .apache .lucene .analysis .pt .PortugueseAnalyzer ;
8699import org .apache .lucene .analysis .reverse .ReverseStringFilter ;
100+ import org .apache .lucene .analysis .ro .RomanianAnalyzer ;
101+ import org .apache .lucene .analysis .ru .RussianAnalyzer ;
87102import org .apache .lucene .analysis .shingle .ShingleFilter ;
88103import org .apache .lucene .analysis .snowball .SnowballFilter ;
89104import org .apache .lucene .analysis .standard .ClassicFilter ;
90105import org .apache .lucene .analysis .standard .ClassicTokenizer ;
91106import org .apache .lucene .analysis .standard .StandardAnalyzer ;
92107import org .apache .lucene .analysis .standard .UAX29URLEmailTokenizer ;
108+ import org .apache .lucene .analysis .sv .SwedishAnalyzer ;
109+ import org .apache .lucene .analysis .th .ThaiAnalyzer ;
93110import org .apache .lucene .analysis .th .ThaiTokenizer ;
94111import org .apache .lucene .analysis .tr .ApostropheFilter ;
112+ import org .apache .lucene .analysis .tr .TurkishAnalyzer ;
95113import org .apache .lucene .analysis .util .ElisionFilter ;
96114import org .elasticsearch .common .logging .DeprecationLogger ;
97115import org .elasticsearch .common .logging .Loggers ;
@@ -130,6 +148,8 @@ public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAn
130148 analyzers .put ("standard_html_strip" , StandardHtmlStripAnalyzerProvider ::new );
131149 analyzers .put ("pattern" , PatternAnalyzerProvider ::new );
132150 analyzers .put ("snowball" , SnowballAnalyzerProvider ::new );
151+
152+ // Language analyzers:
133153 analyzers .put ("arabic" , ArabicAnalyzerProvider ::new );
134154 analyzers .put ("armenian" , ArmenianAnalyzerProvider ::new );
135155 analyzers .put ("basque" , BasqueAnalyzerProvider ::new );
@@ -147,6 +167,24 @@ public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAn
147167 analyzers .put ("french" , FrenchAnalyzerProvider ::new );
148168 analyzers .put ("galician" , GalicianAnalyzerProvider ::new );
149169 analyzers .put ("german" , GermanAnalyzerProvider ::new );
170+ analyzers .put ("greek" , GreekAnalyzerProvider ::new );
171+ analyzers .put ("hindi" , HindiAnalyzerProvider ::new );
172+ analyzers .put ("hungarian" , HungarianAnalyzerProvider ::new );
173+ analyzers .put ("indonesian" , IndonesianAnalyzerProvider ::new );
174+ analyzers .put ("irish" , IrishAnalyzerProvider ::new );
175+ analyzers .put ("italian" , ItalianAnalyzerProvider ::new );
176+ analyzers .put ("latvian" , LatvianAnalyzerProvider ::new );
177+ analyzers .put ("lithuanian" , LithuanianAnalyzerProvider ::new );
178+ analyzers .put ("norwegian" , NorwegianAnalyzerProvider ::new );
179+ analyzers .put ("persian" , PersianAnalyzerProvider ::new );
180+ analyzers .put ("portuguese" , PortugueseAnalyzerProvider ::new );
181+ analyzers .put ("romanian" , RomanianAnalyzerProvider ::new );
182+ analyzers .put ("russian" , RussianAnalyzerProvider ::new );
183+ analyzers .put ("sorani" , SoraniAnalyzerProvider ::new );
184+ analyzers .put ("spanish" , SpanishAnalyzerProvider ::new );
185+ analyzers .put ("swedish" , SwedishAnalyzerProvider ::new );
186+ analyzers .put ("turkish" , TurkishAnalyzerProvider ::new );
187+ analyzers .put ("thai" , ThaiAnalyzerProvider ::new );
150188 return analyzers ;
151189 }
152190
@@ -248,13 +286,15 @@ public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
248286 @ Override
249287 public List <PreBuiltAnalyzerProviderFactory > getPreBuiltAnalyzerProviderFactories () {
250288 List <PreBuiltAnalyzerProviderFactory > analyzers = new ArrayList <>();
251- analyzers .add (new PreBuiltAnalyzerProviderFactory ("standard_html_strip" , CachingStrategy .LUCENE ,
289+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("standard_html_strip" , CachingStrategy .ELASTICSEARCH ,
252290 () -> new StandardHtmlStripAnalyzer (CharArraySet .EMPTY_SET )));
253291 analyzers .add (new PreBuiltAnalyzerProviderFactory ("pattern" , CachingStrategy .ELASTICSEARCH ,
254292 () -> new PatternAnalyzer (Regex .compile ("\\ W+" /*PatternAnalyzer.NON_WORD_PATTERN*/ , null ), true ,
255293 CharArraySet .EMPTY_SET )));
256294 analyzers .add (new PreBuiltAnalyzerProviderFactory ("snowball" , CachingStrategy .LUCENE ,
257295 () -> new SnowballAnalyzer ("English" , StopAnalyzer .ENGLISH_STOP_WORDS_SET )));
296+
297+ // Language analyzers:
258298 analyzers .add (new PreBuiltAnalyzerProviderFactory ("arabic" , CachingStrategy .LUCENE , ArabicAnalyzer ::new ));
259299 analyzers .add (new PreBuiltAnalyzerProviderFactory ("armenian" , CachingStrategy .LUCENE , ArmenianAnalyzer ::new ));
260300 analyzers .add (new PreBuiltAnalyzerProviderFactory ("basque" , CachingStrategy .LUCENE , BasqueAnalyzer ::new ));
@@ -263,7 +303,7 @@ public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactorie
263303 analyzers .add (new PreBuiltAnalyzerProviderFactory ("bulgarian" , CachingStrategy .LUCENE , BulgarianAnalyzer ::new ));
264304 analyzers .add (new PreBuiltAnalyzerProviderFactory ("catalan" , CachingStrategy .LUCENE , CatalanAnalyzer ::new ));
265305 // chinese analyzer: only for old indices, best effort
266- analyzers .add (new PreBuiltAnalyzerProviderFactory ("chinese" , CachingStrategy .LUCENE , StandardAnalyzer ::new ));
306+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("chinese" , CachingStrategy .ONE , StandardAnalyzer ::new ));
267307 analyzers .add (new PreBuiltAnalyzerProviderFactory ("cjk" , CachingStrategy .LUCENE , CJKAnalyzer ::new ));
268308 analyzers .add (new PreBuiltAnalyzerProviderFactory ("czech" , CachingStrategy .LUCENE , CzechAnalyzer ::new ));
269309 analyzers .add (new PreBuiltAnalyzerProviderFactory ("danish" , CachingStrategy .LUCENE , DanishAnalyzer ::new ));
@@ -273,6 +313,24 @@ public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactorie
273313 analyzers .add (new PreBuiltAnalyzerProviderFactory ("french" , CachingStrategy .LUCENE , FrenchAnalyzer ::new ));
274314 analyzers .add (new PreBuiltAnalyzerProviderFactory ("galician" , CachingStrategy .LUCENE , GalicianAnalyzer ::new ));
275315 analyzers .add (new PreBuiltAnalyzerProviderFactory ("german" , CachingStrategy .LUCENE , GermanAnalyzer ::new ));
316+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("greek" , CachingStrategy .LUCENE , GreekAnalyzer ::new ));
317+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("hindi" , CachingStrategy .LUCENE , HindiAnalyzer ::new ));
318+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("hungarian" , CachingStrategy .LUCENE , HungarianAnalyzer ::new ));
319+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("indonesian" , CachingStrategy .LUCENE , IndonesianAnalyzer ::new ));
320+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("irish" , CachingStrategy .LUCENE , IrishAnalyzer ::new ));
321+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("italian" , CachingStrategy .LUCENE , ItalianAnalyzer ::new ));
322+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("latvian" , CachingStrategy .LUCENE , LatvianAnalyzer ::new ));
323+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("lithuanian" , CachingStrategy .LUCENE , LithuanianAnalyzer ::new ));
324+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("norwegian" , CachingStrategy .LUCENE , NorwegianAnalyzer ::new ));
325+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("persian" , CachingStrategy .LUCENE , PersianAnalyzer ::new ));
326+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("portuguese" , CachingStrategy .LUCENE , PortugueseAnalyzer ::new ));
327+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("romanian" , CachingStrategy .LUCENE , RomanianAnalyzer ::new ));
328+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("russian" , CachingStrategy .LUCENE , RussianAnalyzer ::new ));
329+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("sorani" , CachingStrategy .LUCENE , SoraniAnalyzer ::new ));
330+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("spanish" , CachingStrategy .LUCENE , SpanishAnalyzer ::new ));
331+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("swedish" , CachingStrategy .LUCENE , SwedishAnalyzer ::new ));
332+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("turkish" , CachingStrategy .LUCENE , TurkishAnalyzer ::new ));
333+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("thai" , CachingStrategy .LUCENE , ThaiAnalyzer ::new ));
276334 return analyzers ;
277335 }
278336
0 commit comments