From 3fa6517c34b4dd53ba3cd121216883d4f4f5d854 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 04:32:51 +0700 Subject: [PATCH 01/92] Add model 2023-07-30-albert_embeddings_ALR_BERT_ro --- ...023-07-30-albert_embeddings_ALR_BERT_ro.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_ALR_BERT_ro.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_ALR_BERT_ro.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_ALR_BERT_ro.md new file mode 100644 index 00000000000000..08882527743322 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_ALR_BERT_ro.md @@ -0,0 +1,99 @@ +--- +layout: model +title: Romanian ALBERT Embeddings (from dragosnicolae555) +author: John Snow Labs +name: albert_embeddings_ALR_BERT +date: 2023-07-30 +tags: [albert, embeddings, ro, open_source, onnx] +task: Embeddings +language: ro +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and imported into Spark NLP. `ALR_BERT` is a Romanian model orginally trained by `dragosnicolae555`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_ALR_BERT_ro_5.0.0_3.0_1690752767725.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_ALR_BERT_ro_5.0.0_3.0_1690752767725.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ +.setInputCol("text") \ +.setOutputCol("document") + +tokenizer = Tokenizer() \ +.setInputCols("document") \ +.setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("albert_embeddings_ALR_BERT","ro") \ +.setInputCols(["document", "token"]) \ +.setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["Îmi place Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() +.setInputCol("text") +.setOutputCol("document") + +val tokenizer = new Tokenizer() +.setInputCols(Array("document")) +.setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("albert_embeddings_ALR_BERT","ro") +.setInputCols(Array("document", "token")) +.setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("Îmi place Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("ro.embed.ALR_BERT").predict("""Îmi place Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_embeddings_ALR_BERT| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[bert]| +|Language:|ro| +|Size:|51.7 MB| +|Case sensitive:|false| \ No newline at end of file From cde32e0a93d9bae6f3054d734f06dce5f84a0f52 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 04:33:51 +0700 Subject: [PATCH 02/92] Add model 2023-07-30-albert_embeddings_albert_base_japanese_v1_ja --- ...t_embeddings_albert_base_japanese_v1_ja.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_japanese_v1_ja.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_japanese_v1_ja.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_japanese_v1_ja.md new file mode 100644 index 00000000000000..e97c48ba8709fb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_japanese_v1_ja.md @@ -0,0 +1,99 @@ +--- +layout: model +title: Japanese ALBERT Embeddings (from ken11) +author: John Snow Labs +name: albert_embeddings_albert_base_japanese_v1 +date: 2023-07-30 +tags: [albert, embeddings, ja, open_source, onnx] +task: Embeddings +language: ja +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and imported into Spark NLP. `albert-base-japanese-v1` is a Japanese model orginally trained by `ken11`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_base_japanese_v1_ja_5.0.0_3.0_1690752780150.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_base_japanese_v1_ja_5.0.0_3.0_1690752780150.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ +.setInputCol("text") \ +.setOutputCol("document") + +tokenizer = Tokenizer() \ +.setInputCols("document") \ +.setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("albert_embeddings_albert_base_japanese_v1","ja") \ +.setInputCols(["document", "token"]) \ +.setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["私はSpark NLPを愛しています"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() +.setInputCol("text") +.setOutputCol("document") + +val tokenizer = new Tokenizer() +.setInputCols(Array("document")) +.setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("albert_embeddings_albert_base_japanese_v1","ja") +.setInputCols(Array("document", "token")) +.setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("私はSpark NLPを愛しています").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("ja.embed.albert_base_japanese_v1").predict("""私はSpark NLPを愛しています""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_embeddings_albert_base_japanese_v1| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[bert]| +|Language:|ja| +|Size:|42.8 MB| +|Case sensitive:|false| \ No newline at end of file From 32c799ee0e99a928fc8c5d5e01d9d024be304af9 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 04:34:51 +0700 Subject: [PATCH 03/92] Add model 2023-07-30-albert_embeddings_albert_large_arabic_ar --- ...lbert_embeddings_albert_large_arabic_ar.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_large_arabic_ar.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_large_arabic_ar.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_large_arabic_ar.md new file mode 100644 index 00000000000000..2e1bcfa05cb027 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_large_arabic_ar.md @@ -0,0 +1,99 @@ +--- +layout: model +title: Arabic ALBERT Embeddings (Large) +author: John Snow Labs +name: albert_embeddings_albert_large_arabic +date: 2023-07-30 +tags: [albert, embeddings, ar, open_source, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and imported into Spark NLP. `albert-large-arabic` is a Arabic model orginally trained by `asafaya`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_large_arabic_ar_5.0.0_3.0_1690752835564.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_large_arabic_ar_5.0.0_3.0_1690752835564.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ +.setInputCol("text") \ +.setOutputCol("document") + +tokenizer = Tokenizer() \ +.setInputCols("document") \ +.setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("albert_embeddings_albert_large_arabic","ar") \ +.setInputCols(["document", "token"]) \ +.setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["أنا أحب شرارة NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() +.setInputCol("text") +.setOutputCol("document") + +val tokenizer = new Tokenizer() +.setInputCols(Array("document")) +.setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("albert_embeddings_albert_large_arabic","ar") +.setInputCols(Array("document", "token")) +.setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("أنا أحب شرارة NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("ar.embed.albert_large_arabic").predict("""أنا أحب شرارة NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_embeddings_albert_large_arabic| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[bert]| +|Language:|ar| +|Size:|62.8 MB| +|Case sensitive:|false| \ No newline at end of file From 7cb92b3f3b2ed4712c4a3b00ab9d56bd56343f11 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 04:35:51 +0700 Subject: [PATCH 04/92] Add model 2023-07-30-albert_embeddings_albert_fa_base_v2_fa --- ...-albert_embeddings_albert_fa_base_v2_fa.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_fa_base_v2_fa.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_fa_base_v2_fa.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_fa_base_v2_fa.md new file mode 100644 index 00000000000000..313848f8456401 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_fa_base_v2_fa.md @@ -0,0 +1,99 @@ +--- +layout: model +title: Persian ALBERT Embeddings (from m3hrdadfi) +author: John Snow Labs +name: albert_embeddings_albert_fa_base_v2 +date: 2023-07-30 +tags: [albert, embeddings, fa, open_source, onnx] +task: Embeddings +language: fa +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and imported into Spark NLP. `albert-fa-base-v2` is a Persian model orginally trained by `m3hrdadfi`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_fa_base_v2_fa_5.0.0_3.0_1690752839758.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_fa_base_v2_fa_5.0.0_3.0_1690752839758.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ +.setInputCol("text") \ +.setOutputCol("document") + +tokenizer = Tokenizer() \ +.setInputCols("document") \ +.setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("albert_embeddings_albert_fa_base_v2","fa") \ +.setInputCols(["document", "token"]) \ +.setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["من عاشق جرقه NLP هستم"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() +.setInputCol("text") +.setOutputCol("document") + +val tokenizer = new Tokenizer() +.setInputCols(Array("document")) +.setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("albert_embeddings_albert_fa_base_v2","fa") +.setInputCols(Array("document", "token")) +.setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("من عاشق جرقه NLP هستم").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fa.embed.albert").predict("""من عاشق جرقه NLP هستم""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_embeddings_albert_fa_base_v2| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[bert]| +|Language:|fa| +|Size:|66.3 MB| +|Case sensitive:|false| \ No newline at end of file From 7748145b843cb1b3414de5063e551988fb4d2ee0 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 04:36:51 +0700 Subject: [PATCH 05/92] Add model 2023-07-30-albert_embeddings_albert_german_ner_de --- ...-albert_embeddings_albert_german_ner_de.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_german_ner_de.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_german_ner_de.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_german_ner_de.md new file mode 100644 index 00000000000000..0791ee2829fadc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_german_ner_de.md @@ -0,0 +1,99 @@ +--- +layout: model +title: German ALBERT Embeddings (from abhilash1910) +author: John Snow Labs +name: albert_embeddings_albert_german_ner +date: 2023-07-30 +tags: [albert, embeddings, de, open_source, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and imported into Spark NLP. `albert-german-ner` is a German model orginally trained by `abhilash1910`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_german_ner_de_5.0.0_3.0_1690752850054.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_german_ner_de_5.0.0_3.0_1690752850054.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ +.setInputCol("text") \ +.setOutputCol("document") + +tokenizer = Tokenizer() \ +.setInputCols("document") \ +.setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("albert_embeddings_albert_german_ner","de") \ +.setInputCols(["document", "token"]) \ +.setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["Ich liebe Funken NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() +.setInputCol("text") +.setOutputCol("document") + +val tokenizer = new Tokenizer() +.setInputCols(Array("document")) +.setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("albert_embeddings_albert_german_ner","de") +.setInputCols(Array("document", "token")) +.setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("Ich liebe Funken NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("de.embed.albert_german_ner").predict("""Ich liebe Funken NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_embeddings_albert_german_ner| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[bert]| +|Language:|de| +|Size:|42.0 MB| +|Case sensitive:|false| \ No newline at end of file From 827cd1daca7b7f26f7f3d1dca8a66eac18414800 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 04:37:52 +0700 Subject: [PATCH 06/92] Add model 2023-07-30-albert_embeddings_albert_fa_zwnj_base_v2_fa --- ...rt_embeddings_albert_fa_zwnj_base_v2_fa.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_fa_zwnj_base_v2_fa.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_fa_zwnj_base_v2_fa.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_fa_zwnj_base_v2_fa.md new file mode 100644 index 00000000000000..789c042888482b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_fa_zwnj_base_v2_fa.md @@ -0,0 +1,99 @@ +--- +layout: model +title: Persian (Farsi) ALBERT Embeddings +author: John Snow Labs +name: albert_embeddings_albert_fa_zwnj_base_v2 +date: 2023-07-30 +tags: [albert, embeddings, fa, open_source, onnx] +task: Embeddings +language: fa +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and imported into Spark NLP. `albert-fa-zwnj-base-v2` is a Persian model orginally trained by `HooshvareLab`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_fa_zwnj_base_v2_fa_5.0.0_3.0_1690752897049.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_fa_zwnj_base_v2_fa_5.0.0_3.0_1690752897049.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ +.setInputCol("text") \ +.setOutputCol("document") + +tokenizer = Tokenizer() \ +.setInputCols("document") \ +.setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("albert_embeddings_albert_fa_zwnj_base_v2","fa") \ +.setInputCols(["document", "token"]) \ +.setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["من عاشق جرقه NLP هستم"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() +.setInputCol("text") +.setOutputCol("document") + +val tokenizer = new Tokenizer() +.setInputCols(Array("document")) +.setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("albert_embeddings_albert_fa_zwnj_base_v2","fa") +.setInputCols(Array("document", "token")) +.setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("من عاشق جرقه NLP هستم").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fa.embed.albert_fa_zwnj_base_v2").predict("""من عاشق جرقه NLP هستم""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_embeddings_albert_fa_zwnj_base_v2| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[bert]| +|Language:|fa| +|Size:|41.9 MB| +|Case sensitive:|false| \ No newline at end of file From 4d93517718a3522bb64ba13f4eac6baddbea792b Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 04:38:52 +0700 Subject: [PATCH 07/92] Add model 2023-07-30-albert_embeddings_marathi_albert_mr --- ...-30-albert_embeddings_marathi_albert_mr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_marathi_albert_mr.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_marathi_albert_mr.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_marathi_albert_mr.md new file mode 100644 index 00000000000000..7c918883dfd0a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_marathi_albert_mr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: Marathi ALBERT Embeddings (v1) +author: John Snow Labs +name: albert_embeddings_marathi_albert +date: 2023-07-30 +tags: [albert, embeddings, mr, open_source, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and imported into Spark NLP. `marathi-albert` is a Marathi model orginally trained by `l3cube-pune`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_marathi_albert_mr_5.0.0_3.0_1690752853424.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_marathi_albert_mr_5.0.0_3.0_1690752853424.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ +.setInputCol("text") \ +.setOutputCol("document") + +tokenizer = Tokenizer() \ +.setInputCols("document") \ +.setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("albert_embeddings_marathi_albert","mr") \ +.setInputCols(["document", "token"]) \ +.setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["मला स्पार्क एनएलपी आवडते"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() +.setInputCol("text") +.setOutputCol("document") + +val tokenizer = new Tokenizer() +.setInputCols(Array("document")) +.setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("albert_embeddings_marathi_albert","mr") +.setInputCols(Array("document", "token")) +.setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("मला स्पार्क एनएलपी आवडते").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("mr.embed.albert").predict("""मला स्पार्क एनएलपी आवडते""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_embeddings_marathi_albert| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[bert]| +|Language:|mr| +|Size:|42.6 MB| +|Case sensitive:|false| \ No newline at end of file From f77835a20176b6c49e90278148becc5bd6f3d85a Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 04:39:52 +0700 Subject: [PATCH 08/92] Add model 2023-07-30-albert_embeddings_albert_tiny_bahasa_cased_ms --- ..._embeddings_albert_tiny_bahasa_cased_ms.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_tiny_bahasa_cased_ms.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_tiny_bahasa_cased_ms.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_tiny_bahasa_cased_ms.md new file mode 100644 index 00000000000000..4736fea6022af0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_tiny_bahasa_cased_ms.md @@ -0,0 +1,99 @@ +--- +layout: model +title: Malay ALBERT Embeddings (Tiny) +author: John Snow Labs +name: albert_embeddings_albert_tiny_bahasa_cased +date: 2023-07-30 +tags: [albert, embeddings, ms, open_source, onnx] +task: Embeddings +language: ms +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and imported into Spark NLP. `albert-tiny-bahasa-cased` is a Malay model orginally trained by `malay-huggingface`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_tiny_bahasa_cased_ms_5.0.0_3.0_1690752867859.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_tiny_bahasa_cased_ms_5.0.0_3.0_1690752867859.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ +.setInputCol("text") \ +.setOutputCol("document") + +tokenizer = Tokenizer() \ +.setInputCols("document") \ +.setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("albert_embeddings_albert_tiny_bahasa_cased","ms") \ +.setInputCols(["document", "token"]) \ +.setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["Saya suka Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() +.setInputCol("text") +.setOutputCol("document") + +val tokenizer = new Tokenizer() +.setInputCols(Array("document")) +.setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("albert_embeddings_albert_tiny_bahasa_cased","ms") +.setInputCols(Array("document", "token")) +.setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("Saya suka Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("ms.embed.albert_tiny_bahasa_cased").predict("""Saya suka Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_embeddings_albert_tiny_bahasa_cased| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[bert]| +|Language:|ms| +|Size:|21.3 MB| +|Case sensitive:|false| \ No newline at end of file From 5569cf966d705911356bdb58908635e84cec6f13 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 04:40:52 +0700 Subject: [PATCH 09/92] Add model 2023-07-30-albert_embeddings_albert_base_bahasa_cased_ms --- ..._embeddings_albert_base_bahasa_cased_ms.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_bahasa_cased_ms.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_bahasa_cased_ms.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_bahasa_cased_ms.md new file mode 100644 index 00000000000000..83553b8908a5f7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_bahasa_cased_ms.md @@ -0,0 +1,99 @@ +--- +layout: model +title: Malay ALBERT Embeddings (Base) +author: John Snow Labs +name: albert_embeddings_albert_base_bahasa_cased +date: 2023-07-30 +tags: [albert, embeddings, ms, open_source, onnx] +task: Embeddings +language: ms +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and imported into Spark NLP. `albert-base-bahasa-cased` is a Malay model orginally trained by `malay-huggingface`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_base_bahasa_cased_ms_5.0.0_3.0_1690753174981.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_base_bahasa_cased_ms_5.0.0_3.0_1690753174981.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ +.setInputCol("text") \ +.setOutputCol("document") + +tokenizer = Tokenizer() \ +.setInputCols("document") \ +.setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("albert_embeddings_albert_base_bahasa_cased","ms") \ +.setInputCols(["document", "token"]) \ +.setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["Saya suka Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() +.setInputCol("text") +.setOutputCol("document") + +val tokenizer = new Tokenizer() +.setInputCols(Array("document")) +.setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("albert_embeddings_albert_base_bahasa_cased","ms") +.setInputCols(Array("document", "token")) +.setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("Saya suka Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("ms.embed.albert_base_bahasa_cased").predict("""Saya suka Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_embeddings_albert_base_bahasa_cased| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[bert]| +|Language:|ms| +|Size:|42.9 MB| +|Case sensitive:|false| \ No newline at end of file From 167c3961dae5c7b80256a620c4b7d0cd1b8ece49 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 04:41:52 +0700 Subject: [PATCH 10/92] Add model 2023-07-30-albert_embeddings_fralbert_base_fr --- ...7-30-albert_embeddings_fralbert_base_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_fralbert_base_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_fralbert_base_fr.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_fralbert_base_fr.md new file mode 100644 index 00000000000000..a72bb4a60dd4e2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_fralbert_base_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French ALBERT Embeddings (from qwant) +author: John Snow Labs +name: albert_embeddings_fralbert_base +date: 2023-07-30 +tags: [albert, embeddings, fr, open_source, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and imported into Spark NLP. `fralbert-base` is a French model orginally trained by `qwant`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_fralbert_base_fr_5.0.0_3.0_1690752813444.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_fralbert_base_fr_5.0.0_3.0_1690752813444.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ +.setInputCol("text") \ +.setOutputCol("document") + +tokenizer = Tokenizer() \ +.setInputCols("document") \ +.setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("albert_embeddings_fralbert_base","fr") \ +.setInputCols(["document", "token"]) \ +.setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark Nlp"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() +.setInputCol("text") +.setOutputCol("document") + +val tokenizer = new Tokenizer() +.setInputCols(Array("document")) +.setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("albert_embeddings_fralbert_base","fr") +.setInputCols(Array("document", "token")) +.setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark Nlp").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.albert").predict("""J'adore Spark Nlp""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_embeddings_fralbert_base| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[bert]| +|Language:|fr| +|Size:|43.0 MB| +|Case sensitive:|false| \ No newline at end of file From d135b9e6e4a915a4de86ebe94336c0edf8125f67 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 04:42:52 +0700 Subject: [PATCH 11/92] Add model 2023-07-30-albert_embeddings_marathi_albert_v2_mr --- ...-albert_embeddings_marathi_albert_v2_mr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_marathi_albert_v2_mr.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_marathi_albert_v2_mr.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_marathi_albert_v2_mr.md new file mode 100644 index 00000000000000..c640824c2d9fcb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_marathi_albert_v2_mr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: Marathi ALBERT Embeddings (v2) +author: John Snow Labs +name: albert_embeddings_marathi_albert_v2 +date: 2023-07-30 +tags: [albert, embeddings, mr, open_source, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and imported into Spark NLP. `marathi-albert-v2` is a Marathi model orginally trained by `l3cube-pune`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_marathi_albert_v2_mr_5.0.0_3.0_1690753295251.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_marathi_albert_v2_mr_5.0.0_3.0_1690753295251.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ +.setInputCol("text") \ +.setOutputCol("document") + +tokenizer = Tokenizer() \ +.setInputCols("document") \ +.setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("albert_embeddings_marathi_albert_v2","mr") \ +.setInputCols(["document", "token"]) \ +.setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["मला स्पार्क एनएलपी आवडते"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() +.setInputCol("text") +.setOutputCol("document") + +val tokenizer = new Tokenizer() +.setInputCols(Array("document")) +.setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("albert_embeddings_marathi_albert_v2","mr") +.setInputCols(Array("document", "token")) +.setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("मला स्पार्क एनएलपी आवडते").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("mr.embed.albert_v2").predict("""मला स्पार्क एनएलपी आवडते""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_embeddings_marathi_albert_v2| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[bert]| +|Language:|mr| +|Size:|125.5 MB| +|Case sensitive:|false| \ No newline at end of file From d2cfe6a7a512f9b0a5024ddb32f073c6239b59c1 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 04:43:52 +0700 Subject: [PATCH 12/92] Add model 2023-07-30-albert_embeddings_albert_base_arabic_ar --- ...albert_embeddings_albert_base_arabic_ar.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_arabic_ar.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_arabic_ar.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_arabic_ar.md new file mode 100644 index 00000000000000..a843406cdddce0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_arabic_ar.md @@ -0,0 +1,99 @@ +--- +layout: model +title: Arabic ALBERT Embeddings (Base) +author: John Snow Labs +name: albert_embeddings_albert_base_arabic +date: 2023-07-30 +tags: [albert, embeddings, ar, open_source, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and imported into Spark NLP. `albert-base-arabic` is a Arabic model orginally trained by `asafaya`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_base_arabic_ar_5.0.0_3.0_1690753212237.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_base_arabic_ar_5.0.0_3.0_1690753212237.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ +.setInputCol("text") \ +.setOutputCol("document") + +tokenizer = Tokenizer() \ +.setInputCols("document") \ +.setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("albert_embeddings_albert_base_arabic","ar") \ +.setInputCols(["document", "token"]) \ +.setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["أنا أحب شرارة NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() +.setInputCol("text") +.setOutputCol("document") + +val tokenizer = new Tokenizer() +.setInputCols(Array("document")) +.setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("albert_embeddings_albert_base_arabic","ar") +.setInputCols(Array("document", "token")) +.setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("أنا أحب شرارة NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("ar.embed.albert").predict("""أنا أحب شرارة NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_embeddings_albert_base_arabic| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[bert]| +|Language:|ar| +|Size:|42.0 MB| +|Case sensitive:|false| \ No newline at end of file From 6c6beef90e14aa2b65efb3ebb48cace97504b20f Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 04:44:52 +0700 Subject: [PATCH 13/92] Add model 2023-07-30-albert_embeddings_albert_large_bahasa_cased_ms --- ...embeddings_albert_large_bahasa_cased_ms.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_large_bahasa_cased_ms.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_large_bahasa_cased_ms.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_large_bahasa_cased_ms.md new file mode 100644 index 00000000000000..e960c133c8e959 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_large_bahasa_cased_ms.md @@ -0,0 +1,99 @@ +--- +layout: model +title: Malay ALBERT Embeddings (Large) +author: John Snow Labs +name: albert_embeddings_albert_large_bahasa_cased +date: 2023-07-30 +tags: [albert, embeddings, ms, open_source, onnx] +task: Embeddings +language: ms +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and imported into Spark NLP. `albert-large-bahasa-cased` is a Malay model orginally trained by `malay-huggingface`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_large_bahasa_cased_ms_5.0.0_3.0_1690753388948.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_large_bahasa_cased_ms_5.0.0_3.0_1690753388948.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ +.setInputCol("text") \ +.setOutputCol("document") + +tokenizer = Tokenizer() \ +.setInputCols("document") \ +.setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("albert_embeddings_albert_large_bahasa_cased","ms") \ +.setInputCols(["document", "token"]) \ +.setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["Saya suka Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() +.setInputCol("text") +.setOutputCol("document") + +val tokenizer = new Tokenizer() +.setInputCols(Array("document")) +.setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("albert_embeddings_albert_large_bahasa_cased","ms") +.setInputCols(Array("document", "token")) +.setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("Saya suka Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("ms.embed.albert").predict("""Saya suka Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_embeddings_albert_large_bahasa_cased| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[bert]| +|Language:|ms| +|Size:|63.6 MB| +|Case sensitive:|false| \ No newline at end of file From 42e5f55a2cb1274d4b4dd65de6b7b32ab70ac876 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 04:58:10 +0700 Subject: [PATCH 14/92] Add model 2023-07-30-camembert_embeddings_das22_10_camembert_pretrained_fr --- ...ddings_das22_10_camembert_pretrained_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_das22_10_camembert_pretrained_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_das22_10_camembert_pretrained_fr.md b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_das22_10_camembert_pretrained_fr.md new file mode 100644 index 00000000000000..55d03088434fd5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_das22_10_camembert_pretrained_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from HueyNemud) +author: John Snow Labs +name: camembert_embeddings_das22_10_camembert_pretrained +date: 2023-07-30 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `das22-10-camembert_pretrained` is a French model orginally trained by `HueyNemud`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_das22_10_camembert_pretrained_fr_5.0.0_3.0_1690754281409.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_das22_10_camembert_pretrained_fr_5.0.0_3.0_1690754281409.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_das22_10_camembert_pretrained","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_das22_10_camembert_pretrained","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.by_hueynemud").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_das22_10_camembert_pretrained| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|412.8 MB| +|Case sensitive:|true| \ No newline at end of file From f0f5261b43468f86e18ef77dd63c34cdd54d9f19 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 04:59:10 +0700 Subject: [PATCH 15/92] Add model 2023-07-30-camembert_embeddings_zhenghuabin_generic_model_fr --- ...embeddings_zhenghuabin_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_zhenghuabin_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_zhenghuabin_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_zhenghuabin_generic_model_fr.md new file mode 100644 index 00000000000000..02316cb380ead8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_zhenghuabin_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from zhenghuabin) +author: John Snow Labs +name: camembert_embeddings_zhenghuabin_generic_model +date: 2023-07-30 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy_model` is a French model orginally trained by `zhenghuabin`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_zhenghuabin_generic_model_fr_5.0.0_3.0_1690754329730.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_zhenghuabin_generic_model_fr_5.0.0_3.0_1690754329730.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_zhenghuabin_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_zhenghuabin_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_zhenghuabin").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_zhenghuabin_generic_model| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 15e0255e2aa4197684ebb4d987a41614b547f324 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 05:14:06 +0700 Subject: [PATCH 16/92] Add model 2023-07-30-camembert_embeddings_das22_10_camembert_pretrained_fr --- ...0-camembert_embeddings_das22_10_camembert_pretrained_fr.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_das22_10_camembert_pretrained_fr.md b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_das22_10_camembert_pretrained_fr.md index 55d03088434fd5..0260c7f15208a9 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_das22_10_camembert_pretrained_fr.md +++ b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_das22_10_camembert_pretrained_fr.md @@ -28,8 +28,8 @@ Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_das22_10_camembert_pretrained_fr_5.0.0_3.0_1690754281409.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_das22_10_camembert_pretrained_fr_5.0.0_3.0_1690754281409.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_das22_10_camembert_pretrained_fr_5.0.0_3.0_1690755233634.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_das22_10_camembert_pretrained_fr_5.0.0_3.0_1690755233634.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use From ed624b59948962580f0b6c1e263687c426bf22be Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 05:15:06 +0700 Subject: [PATCH 17/92] Add model 2023-07-30-camembert_embeddings_camembert_mlm_fr --- ...0-camembert_embeddings_camembert_mlm_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_camembert_mlm_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_camembert_mlm_fr.md b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_camembert_mlm_fr.md new file mode 100644 index 00000000000000..972f46e2c90b10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_camembert_mlm_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from Jodsa) +author: John Snow Labs +name: camembert_embeddings_camembert_mlm +date: 2023-07-30 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `camembert_mlm` is a French model orginally trained by `Jodsa`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_camembert_mlm_fr_5.0.0_3.0_1690755250840.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_camembert_mlm_fr_5.0.0_3.0_1690755250840.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_camembert_mlm","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_camembert_mlm","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.by_jodsa").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_camembert_mlm| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|417.9 MB| +|Case sensitive:|true| \ No newline at end of file From 3bef5bf6270a5edb4b8e3168a0073eed2da4a662 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 05:16:06 +0700 Subject: [PATCH 18/92] Add model 2023-07-30-camembert_embeddings_edge2992_generic_model_fr --- ...rt_embeddings_edge2992_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_edge2992_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_edge2992_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_edge2992_generic_model_fr.md new file mode 100644 index 00000000000000..0cd2a8e0f22f39 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_edge2992_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from edge2992) +author: John Snow Labs +name: camembert_embeddings_edge2992_generic_model +date: 2023-07-30 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `edge2992`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_edge2992_generic_model_fr_5.0.0_3.0_1690755314295.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_edge2992_generic_model_fr_5.0.0_3.0_1690755314295.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_edge2992_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_edge2992_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_edge2992").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_edge2992_generic_model| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 9cc51533633e8d6458ce967a5787f5ce23e409f5 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 05:17:06 +0700 Subject: [PATCH 19/92] Add model 2023-07-30-camembert_embeddings_elusive_magnolia_generic_model_fr --- ...dings_elusive_magnolia_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_elusive_magnolia_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_elusive_magnolia_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_elusive_magnolia_generic_model_fr.md new file mode 100644 index 00000000000000..01f5d026f5be1a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_elusive_magnolia_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from elusive-magnolia) +author: John Snow Labs +name: camembert_embeddings_elusive_magnolia_generic_model +date: 2023-07-30 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `elusive-magnolia`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_elusive_magnolia_generic_model_fr_5.0.0_3.0_1690755320528.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_elusive_magnolia_generic_model_fr_5.0.0_3.0_1690755320528.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_elusive_magnolia_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_elusive_magnolia_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_elusive_magnolia").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_elusive_magnolia_generic_model| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 939c740108d5a65ad752c122cd94fa08f4e5560c Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 05:18:06 +0700 Subject: [PATCH 20/92] Add model 2023-07-30-camembert_embeddings_zhenghuabin_generic_model_fr --- ...07-30-camembert_embeddings_zhenghuabin_generic_model_fr.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_zhenghuabin_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_zhenghuabin_generic_model_fr.md index 02316cb380ead8..bccae72328ab59 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_zhenghuabin_generic_model_fr.md +++ b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_zhenghuabin_generic_model_fr.md @@ -28,8 +28,8 @@ Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_zhenghuabin_generic_model_fr_5.0.0_3.0_1690754329730.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_zhenghuabin_generic_model_fr_5.0.0_3.0_1690754329730.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_zhenghuabin_generic_model_fr_5.0.0_3.0_1690755345824.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_zhenghuabin_generic_model_fr_5.0.0_3.0_1690755345824.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use From 17ba326baeaade7355c7614d29b46d0e0b55d76f Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 05:19:06 +0700 Subject: [PATCH 21/92] Add model 2023-07-30-camembert_embeddings_camembert_aux_amandes_mt --- ...ert_embeddings_camembert_aux_amandes_mt.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_camembert_aux_amandes_mt.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_camembert_aux_amandes_mt.md b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_camembert_aux_amandes_mt.md new file mode 100644 index 00000000000000..5129e15a7eef7e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_camembert_aux_amandes_mt.md @@ -0,0 +1,99 @@ +--- +layout: model +title: Maltese CamemBert Embeddings (from fenrhjen) +author: John Snow Labs +name: camembert_embeddings_camembert_aux_amandes +date: 2023-07-30 +tags: [mt, open_source, camembert, embeddings, onnx] +task: Embeddings +language: mt +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `camembert_aux_amandes` is a Maltese model orginally trained by `fenrhjen`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_camembert_aux_amandes_mt_5.0.0_3.0_1690755523370.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_camembert_aux_amandes_mt_5.0.0_3.0_1690755523370.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_camembert_aux_amandes","mt") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["I Love Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_camembert_aux_amandes","mt") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("I Love Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("mt.embed.camembert").predict("""I Love Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_camembert_aux_amandes| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|mt| +|Size:|412.4 MB| +|Case sensitive:|true| \ No newline at end of file From f49a9bc24a0062d5f2f5d19e24cc1a19c583d735 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 05:20:21 +0700 Subject: [PATCH 22/92] Add model 2023-07-30-camembert_embeddings_elliotsmith_generic_model_fr --- ...embeddings_elliotsmith_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_elliotsmith_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_elliotsmith_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_elliotsmith_generic_model_fr.md new file mode 100644 index 00000000000000..d84271e9b0df5c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_elliotsmith_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from elliotsmith) +author: John Snow Labs +name: camembert_embeddings_elliotsmith_generic_model +date: 2023-07-30 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `elliotsmith`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_elliotsmith_generic_model_fr_5.0.0_3.0_1690755611886.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_elliotsmith_generic_model_fr_5.0.0_3.0_1690755611886.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_elliotsmith_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_elliotsmith_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_elliotsmith").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_elliotsmith_generic_model| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 5c9a8d66eba0ad8b69b5610e8b61fc3b667080e2 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Mon, 31 Jul 2023 05:21:22 +0700 Subject: [PATCH 23/92] Add model 2023-07-30-camembert_embeddings_dianeshan_generic_model_fr --- ...t_embeddings_dianeshan_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_dianeshan_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_dianeshan_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_dianeshan_generic_model_fr.md new file mode 100644 index 00000000000000..2e647b9f30743a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_dianeshan_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from dianeshan) +author: John Snow Labs +name: camembert_embeddings_dianeshan_generic_model +date: 2023-07-30 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `dianeshan`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_dianeshan_generic_model_fr_5.0.0_3.0_1690755640623.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_dianeshan_generic_model_fr_5.0.0_3.0_1690755640623.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_dianeshan_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_dianeshan_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_dianeshan").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_dianeshan_generic_model| +|Compatibility:|Spark NLP 5.0.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 9ed8525b335c888a4f4138e570371f8cfc68cfa9 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 02:07:15 +0500 Subject: [PATCH 24/92] fixed wrong version --- .../2023-07-30-albert_embeddings_ALR_BERT_ro.md | 8 ++++---- .../2023-07-30-albert_embeddings_albert_base_arabic_ar.md | 8 ++++---- ...07-30-albert_embeddings_albert_base_bahasa_cased_ms.md | 8 ++++---- ...-07-30-albert_embeddings_albert_base_japanese_v1_ja.md | 8 ++++---- .../2023-07-30-albert_embeddings_albert_fa_base_v2_fa.md | 8 ++++---- ...3-07-30-albert_embeddings_albert_fa_zwnj_base_v2_fa.md | 8 ++++---- .../2023-07-30-albert_embeddings_albert_german_ner_de.md | 8 ++++---- ...2023-07-30-albert_embeddings_albert_large_arabic_ar.md | 8 ++++---- ...7-30-albert_embeddings_albert_large_bahasa_cased_ms.md | 8 ++++---- ...07-30-albert_embeddings_albert_tiny_bahasa_cased_ms.md | 8 ++++---- .../2023-07-30-albert_embeddings_fralbert_base_fr.md | 8 ++++---- .../2023-07-30-albert_embeddings_marathi_albert_mr.md | 8 ++++---- .../2023-07-30-albert_embeddings_marathi_albert_v2_mr.md | 8 ++++---- ...07-30-camembert_embeddings_camembert_aux_amandes_mt.md | 8 ++++---- .../2023-07-30-camembert_embeddings_camembert_mlm_fr.md | 8 ++++---- ...membert_embeddings_das22_10_camembert_pretrained_fr.md | 8 ++++---- ...-30-camembert_embeddings_dianeshan_generic_model_fr.md | 8 ++++---- ...7-30-camembert_embeddings_edge2992_generic_model_fr.md | 8 ++++---- ...0-camembert_embeddings_elliotsmith_generic_model_fr.md | 8 ++++---- ...embert_embeddings_elusive_magnolia_generic_model_fr.md | 8 ++++---- ...0-camembert_embeddings_zhenghuabin_generic_model_fr.md | 8 ++++---- 21 files changed, 84 insertions(+), 84 deletions(-) diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_ALR_BERT_ro.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_ALR_BERT_ro.md index 08882527743322..86db1efa595bc0 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_ALR_BERT_ro.md +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_ALR_BERT_ro.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [albert, embeddings, ro, open_source, onnx] task: Embeddings language: ro -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and import {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_ALR_BERT_ro_5.0.0_3.0_1690752767725.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_ALR_BERT_ro_5.0.0_3.0_1690752767725.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_ALR_BERT_ro_5.0.2_3.0_1690752767725.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_ALR_BERT_ro_5.0.2_3.0_1690752767725.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("ro.embed.ALR_BERT").predict("""Îmi place Spark NLP""") {:.table-model} |---|---| |Model Name:|albert_embeddings_ALR_BERT| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_arabic_ar.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_arabic_ar.md index a843406cdddce0..22896e9a9cd377 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_arabic_ar.md +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_arabic_ar.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [albert, embeddings, ar, open_source, onnx] task: Embeddings language: ar -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and import {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_base_arabic_ar_5.0.0_3.0_1690753212237.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_base_arabic_ar_5.0.0_3.0_1690753212237.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_base_arabic_ar_5.0.2_3.0_1690753212237.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_base_arabic_ar_5.0.2_3.0_1690753212237.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("ar.embed.albert").predict("""أنا أحب شرارة NLP""") {:.table-model} |---|---| |Model Name:|albert_embeddings_albert_base_arabic| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_bahasa_cased_ms.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_bahasa_cased_ms.md index 83553b8908a5f7..f3fca4de758ac0 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_bahasa_cased_ms.md +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_bahasa_cased_ms.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [albert, embeddings, ms, open_source, onnx] task: Embeddings language: ms -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and import {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_base_bahasa_cased_ms_5.0.0_3.0_1690753174981.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_base_bahasa_cased_ms_5.0.0_3.0_1690753174981.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_base_bahasa_cased_ms_5.0.2_3.0_1690753174981.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_base_bahasa_cased_ms_5.0.2_3.0_1690753174981.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("ms.embed.albert_base_bahasa_cased").predict("""Saya suka Spark NLP""") {:.table-model} |---|---| |Model Name:|albert_embeddings_albert_base_bahasa_cased| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_japanese_v1_ja.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_japanese_v1_ja.md index e97c48ba8709fb..ad1185dcccdb33 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_japanese_v1_ja.md +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_base_japanese_v1_ja.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [albert, embeddings, ja, open_source, onnx] task: Embeddings language: ja -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and import {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_base_japanese_v1_ja_5.0.0_3.0_1690752780150.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_base_japanese_v1_ja_5.0.0_3.0_1690752780150.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_base_japanese_v1_ja_5.0.2_3.0_1690752780150.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_base_japanese_v1_ja_5.0.2_3.0_1690752780150.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("ja.embed.albert_base_japanese_v1").predict("""私はSpark NLPを愛し {:.table-model} |---|---| |Model Name:|albert_embeddings_albert_base_japanese_v1| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_fa_base_v2_fa.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_fa_base_v2_fa.md index 313848f8456401..db63deaaa6b3dd 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_fa_base_v2_fa.md +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_fa_base_v2_fa.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [albert, embeddings, fa, open_source, onnx] task: Embeddings language: fa -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and import {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_fa_base_v2_fa_5.0.0_3.0_1690752839758.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_fa_base_v2_fa_5.0.0_3.0_1690752839758.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_fa_base_v2_fa_5.0.2_3.0_1690752839758.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_fa_base_v2_fa_5.0.2_3.0_1690752839758.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("fa.embed.albert").predict("""من عاشق جرقه NLP هستم""") {:.table-model} |---|---| |Model Name:|albert_embeddings_albert_fa_base_v2| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_fa_zwnj_base_v2_fa.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_fa_zwnj_base_v2_fa.md index 789c042888482b..6537edaada379d 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_fa_zwnj_base_v2_fa.md +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_fa_zwnj_base_v2_fa.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [albert, embeddings, fa, open_source, onnx] task: Embeddings language: fa -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and import {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_fa_zwnj_base_v2_fa_5.0.0_3.0_1690752897049.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_fa_zwnj_base_v2_fa_5.0.0_3.0_1690752897049.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_fa_zwnj_base_v2_fa_5.0.2_3.0_1690752897049.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_fa_zwnj_base_v2_fa_5.0.2_3.0_1690752897049.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("fa.embed.albert_fa_zwnj_base_v2").predict("""من عاشق جرقه NL {:.table-model} |---|---| |Model Name:|albert_embeddings_albert_fa_zwnj_base_v2| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_german_ner_de.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_german_ner_de.md index 0791ee2829fadc..038a9d98ca32ff 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_german_ner_de.md +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_german_ner_de.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [albert, embeddings, de, open_source, onnx] task: Embeddings language: de -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and import {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_german_ner_de_5.0.0_3.0_1690752850054.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_german_ner_de_5.0.0_3.0_1690752850054.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_german_ner_de_5.0.2_3.0_1690752850054.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_german_ner_de_5.0.2_3.0_1690752850054.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("de.embed.albert_german_ner").predict("""Ich liebe Funken NLP""") {:.table-model} |---|---| |Model Name:|albert_embeddings_albert_german_ner| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_large_arabic_ar.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_large_arabic_ar.md index 2e1bcfa05cb027..45627e3aa3b848 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_large_arabic_ar.md +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_large_arabic_ar.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [albert, embeddings, ar, open_source, onnx] task: Embeddings language: ar -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and import {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_large_arabic_ar_5.0.0_3.0_1690752835564.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_large_arabic_ar_5.0.0_3.0_1690752835564.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_large_arabic_ar_5.0.2_3.0_1690752835564.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_large_arabic_ar_5.0.2_3.0_1690752835564.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("ar.embed.albert_large_arabic").predict("""أنا أحب شرارة NLP {:.table-model} |---|---| |Model Name:|albert_embeddings_albert_large_arabic| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_large_bahasa_cased_ms.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_large_bahasa_cased_ms.md index e960c133c8e959..b1d5a55edb5f34 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_large_bahasa_cased_ms.md +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_large_bahasa_cased_ms.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [albert, embeddings, ms, open_source, onnx] task: Embeddings language: ms -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and import {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_large_bahasa_cased_ms_5.0.0_3.0_1690753388948.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_large_bahasa_cased_ms_5.0.0_3.0_1690753388948.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_large_bahasa_cased_ms_5.0.2_3.0_1690753388948.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_large_bahasa_cased_ms_5.0.2_3.0_1690753388948.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("ms.embed.albert").predict("""Saya suka Spark NLP""") {:.table-model} |---|---| |Model Name:|albert_embeddings_albert_large_bahasa_cased| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_tiny_bahasa_cased_ms.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_tiny_bahasa_cased_ms.md index 4736fea6022af0..a50ff855bec557 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_tiny_bahasa_cased_ms.md +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_albert_tiny_bahasa_cased_ms.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [albert, embeddings, ms, open_source, onnx] task: Embeddings language: ms -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and import {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_tiny_bahasa_cased_ms_5.0.0_3.0_1690752867859.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_tiny_bahasa_cased_ms_5.0.0_3.0_1690752867859.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_tiny_bahasa_cased_ms_5.0.2_3.0_1690752867859.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_albert_tiny_bahasa_cased_ms_5.0.2_3.0_1690752867859.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("ms.embed.albert_tiny_bahasa_cased").predict("""Saya suka Spark NLP""") {:.table-model} |---|---| |Model Name:|albert_embeddings_albert_tiny_bahasa_cased| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_fralbert_base_fr.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_fralbert_base_fr.md index a72bb4a60dd4e2..36baf514c0219f 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_fralbert_base_fr.md +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_fralbert_base_fr.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [albert, embeddings, fr, open_source, onnx] task: Embeddings language: fr -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and import {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_fralbert_base_fr_5.0.0_3.0_1690752813444.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_fralbert_base_fr_5.0.0_3.0_1690752813444.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_fralbert_base_fr_5.0.2_3.0_1690752813444.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_fralbert_base_fr_5.0.2_3.0_1690752813444.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("fr.embed.albert").predict("""J'adore Spark Nlp""") {:.table-model} |---|---| |Model Name:|albert_embeddings_fralbert_base| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_marathi_albert_mr.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_marathi_albert_mr.md index 7c918883dfd0a0..f1e0b9468b9294 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_marathi_albert_mr.md +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_marathi_albert_mr.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [albert, embeddings, mr, open_source, onnx] task: Embeddings language: mr -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and import {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_marathi_albert_mr_5.0.0_3.0_1690752853424.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_marathi_albert_mr_5.0.0_3.0_1690752853424.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_marathi_albert_mr_5.0.2_3.0_1690752853424.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_marathi_albert_mr_5.0.2_3.0_1690752853424.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("mr.embed.albert").predict("""मला स्पार्क एनए {:.table-model} |---|---| |Model Name:|albert_embeddings_marathi_albert| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_marathi_albert_v2_mr.md b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_marathi_albert_v2_mr.md index c640824c2d9fcb..401cb9ec3030b0 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_marathi_albert_v2_mr.md +++ b/docs/_posts/ahmedlone127/2023-07-30-albert_embeddings_marathi_albert_v2_mr.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [albert, embeddings, mr, open_source, onnx] task: Embeddings language: mr -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained ALBERT Embeddings model, uploaded to Hugging Face, adapted and import {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_marathi_albert_v2_mr_5.0.0_3.0_1690753295251.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_marathi_albert_v2_mr_5.0.0_3.0_1690753295251.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_embeddings_marathi_albert_v2_mr_5.0.2_3.0_1690753295251.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_embeddings_marathi_albert_v2_mr_5.0.2_3.0_1690753295251.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("mr.embed.albert_v2").predict("""मला स्पार्क एन {:.table-model} |---|---| |Model Name:|albert_embeddings_marathi_albert_v2| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_camembert_aux_amandes_mt.md b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_camembert_aux_amandes_mt.md index 5129e15a7eef7e..2de5a5480b5f52 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_camembert_aux_amandes_mt.md +++ b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_camembert_aux_amandes_mt.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [mt, open_source, camembert, embeddings, onnx] task: Embeddings language: mt -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_camembert_aux_amandes_mt_5.0.0_3.0_1690755523370.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_camembert_aux_amandes_mt_5.0.0_3.0_1690755523370.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_camembert_aux_amandes_mt_5.0.2_3.0_1690755523370.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_camembert_aux_amandes_mt_5.0.2_3.0_1690755523370.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("mt.embed.camembert").predict("""I Love Spark NLP""") {:.table-model} |---|---| |Model Name:|camembert_embeddings_camembert_aux_amandes| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_camembert_mlm_fr.md b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_camembert_mlm_fr.md index 972f46e2c90b10..bf7603675b6c3e 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_camembert_mlm_fr.md +++ b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_camembert_mlm_fr.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [fr, open_source, camembert, embeddings, onnx] task: Embeddings language: fr -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_camembert_mlm_fr_5.0.0_3.0_1690755250840.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_camembert_mlm_fr_5.0.0_3.0_1690755250840.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_camembert_mlm_fr_5.0.2_3.0_1690755250840.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_camembert_mlm_fr_5.0.2_3.0_1690755250840.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("fr.embed.camembert.by_jodsa").predict("""J'adore Spark NLP""") {:.table-model} |---|---| |Model Name:|camembert_embeddings_camembert_mlm| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_das22_10_camembert_pretrained_fr.md b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_das22_10_camembert_pretrained_fr.md index 0260c7f15208a9..88001282ca4e52 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_das22_10_camembert_pretrained_fr.md +++ b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_das22_10_camembert_pretrained_fr.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [fr, open_source, camembert, embeddings, onnx] task: Embeddings language: fr -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_das22_10_camembert_pretrained_fr_5.0.0_3.0_1690755233634.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_das22_10_camembert_pretrained_fr_5.0.0_3.0_1690755233634.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_das22_10_camembert_pretrained_fr_5.0.2_3.0_1690755233634.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_das22_10_camembert_pretrained_fr_5.0.2_3.0_1690755233634.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("fr.embed.camembert.by_hueynemud").predict("""J'adore Spark NLP""") {:.table-model} |---|---| |Model Name:|camembert_embeddings_das22_10_camembert_pretrained| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_dianeshan_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_dianeshan_generic_model_fr.md index 2e647b9f30743a..2b802a94df3c0a 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_dianeshan_generic_model_fr.md +++ b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_dianeshan_generic_model_fr.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [fr, open_source, camembert, embeddings, onnx] task: Embeddings language: fr -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_dianeshan_generic_model_fr_5.0.0_3.0_1690755640623.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_dianeshan_generic_model_fr_5.0.0_3.0_1690755640623.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_dianeshan_generic_model_fr_5.0.2_3.0_1690755640623.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_dianeshan_generic_model_fr_5.0.2_3.0_1690755640623.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("fr.embed.camembert.generic.by_dianeshan").predict("""J'adore Spark NLP {:.table-model} |---|---| |Model Name:|camembert_embeddings_dianeshan_generic_model| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_edge2992_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_edge2992_generic_model_fr.md index 0cd2a8e0f22f39..1696cd080f1959 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_edge2992_generic_model_fr.md +++ b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_edge2992_generic_model_fr.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [fr, open_source, camembert, embeddings, onnx] task: Embeddings language: fr -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_edge2992_generic_model_fr_5.0.0_3.0_1690755314295.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_edge2992_generic_model_fr_5.0.0_3.0_1690755314295.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_edge2992_generic_model_fr_5.0.2_3.0_1690755314295.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_edge2992_generic_model_fr_5.0.2_3.0_1690755314295.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("fr.embed.camembert.generic.by_edge2992").predict("""J'adore Spark NLP" {:.table-model} |---|---| |Model Name:|camembert_embeddings_edge2992_generic_model| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_elliotsmith_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_elliotsmith_generic_model_fr.md index d84271e9b0df5c..04b6b4dccd2773 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_elliotsmith_generic_model_fr.md +++ b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_elliotsmith_generic_model_fr.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [fr, open_source, camembert, embeddings, onnx] task: Embeddings language: fr -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_elliotsmith_generic_model_fr_5.0.0_3.0_1690755611886.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_elliotsmith_generic_model_fr_5.0.0_3.0_1690755611886.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_elliotsmith_generic_model_fr_5.0.2_3.0_1690755611886.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_elliotsmith_generic_model_fr_5.0.2_3.0_1690755611886.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("fr.embed.camembert.generic.by_elliotsmith").predict("""J'adore Spark N {:.table-model} |---|---| |Model Name:|camembert_embeddings_elliotsmith_generic_model| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_elusive_magnolia_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_elusive_magnolia_generic_model_fr.md index 01f5d026f5be1a..9b0644bf825cea 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_elusive_magnolia_generic_model_fr.md +++ b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_elusive_magnolia_generic_model_fr.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [fr, open_source, camembert, embeddings, onnx] task: Embeddings language: fr -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_elusive_magnolia_generic_model_fr_5.0.0_3.0_1690755320528.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_elusive_magnolia_generic_model_fr_5.0.0_3.0_1690755320528.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_elusive_magnolia_generic_model_fr_5.0.2_3.0_1690755320528.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_elusive_magnolia_generic_model_fr_5.0.2_3.0_1690755320528.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("fr.embed.camembert.generic.by_elusive_magnolia").predict("""J'adore Sp {:.table-model} |---|---| |Model Name:|camembert_embeddings_elusive_magnolia_generic_model| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| diff --git a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_zhenghuabin_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_zhenghuabin_generic_model_fr.md index bccae72328ab59..5bfe25af51d77e 100644 --- a/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_zhenghuabin_generic_model_fr.md +++ b/docs/_posts/ahmedlone127/2023-07-30-camembert_embeddings_zhenghuabin_generic_model_fr.md @@ -7,7 +7,7 @@ date: 2023-07-30 tags: [fr, open_source, camembert, embeddings, onnx] task: Embeddings language: fr -edition: Spark NLP 5.0.0 +edition: Spark NLP 5.0.2 spark_version: 3.0 supported: true engine: onnx @@ -28,8 +28,8 @@ Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_zhenghuabin_generic_model_fr_5.0.0_3.0_1690755345824.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_zhenghuabin_generic_model_fr_5.0.0_3.0_1690755345824.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_zhenghuabin_generic_model_fr_5.0.2_3.0_1690755345824.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_zhenghuabin_generic_model_fr_5.0.2_3.0_1690755345824.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -89,7 +89,7 @@ nlu.load("fr.embed.camembert.generic.by_zhenghuabin").predict("""J'adore Spark N {:.table-model} |---|---| |Model Name:|camembert_embeddings_zhenghuabin_generic_model| -|Compatibility:|Spark NLP 5.0.0+| +|Compatibility:|Spark NLP 5.0.2+| |License:|Open Source| |Edition:|Official| |Input Labels:|[sentence, token]| From 574699e223d58c4f33b6ebbd20df5ff42fc5e629 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:14:35 +0700 Subject: [PATCH 25/92] Add model 2023-07-31-camembert_embeddings_ankitkupadhyay_generic_model_fr --- ...eddings_ankitkupadhyay_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_ankitkupadhyay_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_ankitkupadhyay_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_ankitkupadhyay_generic_model_fr.md new file mode 100644 index 00000000000000..7a4d6aab6d24ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_ankitkupadhyay_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from ankitkupadhyay) +author: John Snow Labs +name: camembert_embeddings_ankitkupadhyay_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `ankitkupadhyay`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_ankitkupadhyay_generic_model_fr_5.0.2_3.0_1690838066435.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_ankitkupadhyay_generic_model_fr_5.0.2_3.0_1690838066435.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_ankitkupadhyay_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_ankitkupadhyay_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_ankitkupadhyay").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_ankitkupadhyay_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From b9d18d2059f7baa4003af967488527aaa9b6532a Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:15:35 +0700 Subject: [PATCH 26/92] Add model 2023-07-31-camembert_embeddings_devtrent_generic_model_fr --- ...rt_embeddings_devtrent_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_devtrent_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_devtrent_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_devtrent_generic_model_fr.md new file mode 100644 index 00000000000000..9cb2ddef4dc357 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_devtrent_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from devtrent) +author: John Snow Labs +name: camembert_embeddings_devtrent_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `devtrent`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_devtrent_generic_model_fr_5.0.2_3.0_1690838103819.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_devtrent_generic_model_fr_5.0.2_3.0_1690838103819.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_devtrent_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_devtrent_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_devtrent").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_devtrent_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 61e1750d0d04b59643488a7f363adaa87c4ffdb2 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:18:39 +0700 Subject: [PATCH 27/92] Add model 2023-07-31-camembert_embeddings_eduardopds_generic_model_fr --- ..._embeddings_eduardopds_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_eduardopds_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_eduardopds_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_eduardopds_generic_model_fr.md new file mode 100644 index 00000000000000..b81bdd170972c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_eduardopds_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from eduardopds) +author: John Snow Labs +name: camembert_embeddings_eduardopds_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `eduardopds`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_eduardopds_generic_model_fr_5.0.2_3.0_1690838312633.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_eduardopds_generic_model_fr_5.0.2_3.0_1690838312633.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_eduardopds_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_eduardopds_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_eduardopds").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_eduardopds_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 4e3150c7ec15bf14ea892c1b01b979277c4ee61d Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:24:02 +0700 Subject: [PATCH 28/92] Add model 2023-07-31-camembert_embeddings_adeiMousa_generic_model_fr --- ...t_embeddings_adeiMousa_generic_model_fr.md | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_adeiMousa_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_adeiMousa_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_adeiMousa_generic_model_fr.md new file mode 100644 index 00000000000000..9d644c783ef626 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_adeiMousa_generic_model_fr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: French CamemBert Embeddings (from adeiMousa) +author: John Snow Labs +name: camembert_embeddings_adeiMousa_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `adeiMousa`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_adeiMousa_generic_model_fr_5.0.2_3.0_1690838633068.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_adeiMousa_generic_model_fr_5.0.2_3.0_1690838633068.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_adeiMousa_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_adeiMousa_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_adeiMousa_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From d1a13b706409780d3593583a038efba5eeb3e464 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:25:02 +0700 Subject: [PATCH 29/92] Add model 2023-07-31-camembert_embeddings_ericchchiu_generic_model_fr --- ..._embeddings_ericchchiu_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_ericchchiu_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_ericchchiu_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_ericchchiu_generic_model_fr.md new file mode 100644 index 00000000000000..32e911654222d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_ericchchiu_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from ericchchiu) +author: John Snow Labs +name: camembert_embeddings_ericchchiu_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `ericchchiu`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_ericchchiu_generic_model_fr_5.0.2_3.0_1690838666122.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_ericchchiu_generic_model_fr_5.0.2_3.0_1690838666122.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_ericchchiu_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_ericchchiu_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_ericchchiu").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_ericchchiu_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 0887eb506bc48af40f04fd78800cbaf2fa039b70 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:26:02 +0700 Subject: [PATCH 30/92] Add model 2023-07-31-camembert_embeddings_Sebu_generic_model_fr --- ...embert_embeddings_Sebu_generic_model_fr.md | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Sebu_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Sebu_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Sebu_generic_model_fr.md new file mode 100644 index 00000000000000..916051f233bf31 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Sebu_generic_model_fr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: French CamemBert Embeddings (from Sebu) +author: John Snow Labs +name: camembert_embeddings_Sebu_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `Sebu`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_Sebu_generic_model_fr_5.0.2_3.0_1690838692036.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_Sebu_generic_model_fr_5.0.2_3.0_1690838692036.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_Sebu_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_Sebu_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_Sebu_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From f8905a71b623550d1d287be73bc7f44a203c4acf Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:27:03 +0700 Subject: [PATCH 31/92] Add model 2023-07-31-camembert_embeddings_Weipeng_generic_model_fr --- ...ert_embeddings_Weipeng_generic_model_fr.md | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Weipeng_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Weipeng_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Weipeng_generic_model_fr.md new file mode 100644 index 00000000000000..edafd431387b1b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Weipeng_generic_model_fr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: French CamemBert Embeddings (from Weipeng) +author: John Snow Labs +name: camembert_embeddings_Weipeng_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `Weipeng`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_Weipeng_generic_model_fr_5.0.2_3.0_1690838714201.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_Weipeng_generic_model_fr_5.0.2_3.0_1690838714201.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_Weipeng_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_Weipeng_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_Weipeng_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 9e81bb349d222693698c768e8c6eb1145765dbcf Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:28:02 +0700 Subject: [PATCH 32/92] Add model 2023-07-31-camembert_embeddings_codingJacob_generic_model_fr --- ...embeddings_codingJacob_generic_model_fr.md | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_codingJacob_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_codingJacob_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_codingJacob_generic_model_fr.md new file mode 100644 index 00000000000000..b325e41991bc1c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_codingJacob_generic_model_fr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: French CamemBert Embeddings (from codingJacob) +author: John Snow Labs +name: camembert_embeddings_codingJacob_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `codingJacob`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_codingJacob_generic_model_fr_5.0.2_3.0_1690838696305.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_codingJacob_generic_model_fr_5.0.2_3.0_1690838696305.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_codingJacob_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_codingJacob_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_codingJacob_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 6c3267c4c010ed37c10d11b70134528a9d6a4e67 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:29:53 +0700 Subject: [PATCH 33/92] Add model 2023-07-31-camembert_embeddings_SummFinFR_fr --- ...07-31-camembert_embeddings_SummFinFR_fr.md | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_SummFinFR_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_SummFinFR_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_SummFinFR_fr.md new file mode 100644 index 00000000000000..24347b800134c1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_SummFinFR_fr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: French CamemBert Embeddings (from Ghani-25) +author: John Snow Labs +name: camembert_embeddings_SummFinFR +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `SummFinFR` is a French model orginally trained by `Ghani-25`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_SummFinFR_fr_5.0.2_3.0_1690838983983.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_SummFinFR_fr_5.0.2_3.0_1690838983983.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_SummFinFR","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_SummFinFR","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_SummFinFR| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|412.4 MB| +|Case sensitive:|true| \ No newline at end of file From 7d2cf8e884e6b428237e4d9b0176221012747ddd Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:30:53 +0700 Subject: [PATCH 34/92] Add model 2023-07-31-camembert_embeddings_MYX4567_generic_model_fr --- ...ert_embeddings_MYX4567_generic_model_fr.md | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_MYX4567_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_MYX4567_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_MYX4567_generic_model_fr.md new file mode 100644 index 00000000000000..323e24d5e285d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_MYX4567_generic_model_fr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: French CamemBert Embeddings (from MYX4567) +author: John Snow Labs +name: camembert_embeddings_MYX4567_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `MYX4567`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_MYX4567_generic_model_fr_5.0.2_3.0_1690839035578.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_MYX4567_generic_model_fr_5.0.2_3.0_1690839035578.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_MYX4567_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_MYX4567_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_MYX4567_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 16882729ba19f175cb557318c681c676a762618f Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:35:21 +0700 Subject: [PATCH 35/92] Add model 2023-07-31-camembert_embeddings_Katster_generic_model_fr --- ...ert_embeddings_Katster_generic_model_fr.md | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Katster_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Katster_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Katster_generic_model_fr.md new file mode 100644 index 00000000000000..2521d52f8bcab8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Katster_generic_model_fr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: French CamemBert Embeddings (from Katster) +author: John Snow Labs +name: camembert_embeddings_Katster_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `Katster`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_Katster_generic_model_fr_5.0.2_3.0_1690839314673.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_Katster_generic_model_fr_5.0.2_3.0_1690839314673.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_Katster_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_Katster_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_Katster_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 10e443e5102d39fff1709dfeb35de18352c705d0 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:37:09 +0700 Subject: [PATCH 36/92] Add model 2023-07-31-camembert_embeddings_MYX4567_generic_model_fr --- ...023-07-31-camembert_embeddings_MYX4567_generic_model_fr.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_MYX4567_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_MYX4567_generic_model_fr.md index 323e24d5e285d6..2fe8dc27a6876f 100644 --- a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_MYX4567_generic_model_fr.md +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_MYX4567_generic_model_fr.md @@ -28,8 +28,8 @@ Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_MYX4567_generic_model_fr_5.0.2_3.0_1690839035578.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_MYX4567_generic_model_fr_5.0.2_3.0_1690839035578.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_MYX4567_generic_model_fr_5.0.2_3.0_1690839423024.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_MYX4567_generic_model_fr_5.0.2_3.0_1690839423024.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use From 3fa03d548b10c0ec639717926d9b22349c491320 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:42:06 +0700 Subject: [PATCH 37/92] Add model 2023-07-31-camembert_embeddings_JonathanSum_generic_model_fr --- ...embeddings_JonathanSum_generic_model_fr.md | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_JonathanSum_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_JonathanSum_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_JonathanSum_generic_model_fr.md new file mode 100644 index 00000000000000..373bdacf419b42 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_JonathanSum_generic_model_fr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: French CamemBert Embeddings (from JonathanSum) +author: John Snow Labs +name: camembert_embeddings_JonathanSum_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `JonathanSum`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_JonathanSum_generic_model_fr_5.0.2_3.0_1690839719364.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_JonathanSum_generic_model_fr_5.0.2_3.0_1690839719364.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_JonathanSum_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_JonathanSum_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_JonathanSum_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 7e9c8233f8fb9d0421a9ea49bd9dd452a38378af Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:43:06 +0700 Subject: [PATCH 38/92] Add model 2023-07-31-camembert_embeddings_Leisa_generic_model_fr --- ...mbert_embeddings_Leisa_generic_model_fr.md | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Leisa_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Leisa_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Leisa_generic_model_fr.md new file mode 100644 index 00000000000000..11fe2eba2f4b9e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Leisa_generic_model_fr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: French CamemBert Embeddings (from Leisa) +author: John Snow Labs +name: camembert_embeddings_Leisa_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `Leisa`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_Leisa_generic_model_fr_5.0.2_3.0_1690839743810.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_Leisa_generic_model_fr_5.0.2_3.0_1690839743810.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_Leisa_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_Leisa_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_Leisa_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From ad58b0e79cdeae45242b979c378cd0e36ead22ca Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:44:10 +0700 Subject: [PATCH 39/92] Add model 2023-07-31-camembert_embeddings_adam1224_generic_model_fr --- ...rt_embeddings_adam1224_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_adam1224_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_adam1224_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_adam1224_generic_model_fr.md new file mode 100644 index 00000000000000..5fa59759dab81d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_adam1224_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from adam1224) +author: John Snow Labs +name: camembert_embeddings_adam1224_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `adam1224`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_adam1224_generic_model_fr_5.0.2_3.0_1690839841421.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_adam1224_generic_model_fr_5.0.2_3.0_1690839841421.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_adam1224_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_adam1224_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_adam1224").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_adam1224_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 89410581853d72a941d7f25e5787de3dcb497b18 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:45:35 +0700 Subject: [PATCH 40/92] Add model 2023-07-31-camembert_embeddings_est_roberta_et --- ...-31-camembert_embeddings_est_roberta_et.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_est_roberta_et.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_est_roberta_et.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_est_roberta_et.md new file mode 100644 index 00000000000000..e6db30b8ea753c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_est_roberta_et.md @@ -0,0 +1,99 @@ +--- +layout: model +title: Estonian CamemBert Embeddings (from EMBEDDIA) +author: John Snow Labs +name: camembert_embeddings_est_roberta +date: 2023-07-31 +tags: [et, open_source, camembert, embeddings, onnx] +task: Embeddings +language: et +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `est-roberta` is a Estonian model orginally trained by `EMBEDDIA`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_est_roberta_et_5.0.2_3.0_1690839928128.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_est_roberta_et_5.0.2_3.0_1690839928128.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_est_roberta","et") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["Ma armastan sädet nlp"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_est_roberta","et") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("Ma armastan sädet nlp").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("et.embed.camembert").predict("""Ma armastan sädet nlp""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_est_roberta| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|et| +|Size:|277.9 MB| +|Case sensitive:|true| \ No newline at end of file From e50073c132b29466b30570f1ceb17d2f1976ea82 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:48:52 +0700 Subject: [PATCH 41/92] Add model 2023-07-31-camembert_embeddings_generic2_fr --- ...-07-31-camembert_embeddings_generic2_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_generic2_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_generic2_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_generic2_fr.md new file mode 100644 index 00000000000000..cadd0e57fa1d59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_generic2_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from hackertec) +author: John Snow Labs +name: camembert_embeddings_generic2 +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy2` is a French model orginally trained by `hackertec`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_generic2_fr_5.0.2_3.0_1690840126577.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_generic2_fr_5.0.2_3.0_1690840126577.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_generic2","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_generic2","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_hackertec").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_generic2| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 2f1fe4d6bcb08eb9c2c62cd60096c98424e93ecc Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:55:58 +0700 Subject: [PATCH 42/92] Add model 2023-07-31-camembert_embeddings_ysharma_generic_model_2_fr --- ...t_embeddings_ysharma_generic_model_2_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_ysharma_generic_model_2_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_ysharma_generic_model_2_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_ysharma_generic_model_2_fr.md new file mode 100644 index 00000000000000..d2bcaa21f544ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_ysharma_generic_model_2_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from ysharma) +author: John Snow Labs +name: camembert_embeddings_ysharma_generic_model_2 +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model-2` is a French model orginally trained by `ysharma`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_ysharma_generic_model_2_fr_5.0.2_3.0_1690840552187.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_ysharma_generic_model_2_fr_5.0.2_3.0_1690840552187.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_ysharma_generic_model_2","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_ysharma_generic_model_2","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_ysharma").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_ysharma_generic_model_2| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 9ee72c6dd13ffb017785183da59a94c6a6bf412b Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:56:58 +0700 Subject: [PATCH 43/92] Add model 2023-07-31-camembert_embeddings_DoyyingFace_generic_model_fr --- ...embeddings_DoyyingFace_generic_model_fr.md | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_DoyyingFace_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_DoyyingFace_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_DoyyingFace_generic_model_fr.md new file mode 100644 index 00000000000000..eb8cc1f3446f54 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_DoyyingFace_generic_model_fr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: French CamemBert Embeddings (from DoyyingFace) +author: John Snow Labs +name: camembert_embeddings_DoyyingFace_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `DoyyingFace`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_DoyyingFace_generic_model_fr_5.0.2_3.0_1690840568040.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_DoyyingFace_generic_model_fr_5.0.2_3.0_1690840568040.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_DoyyingFace_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_DoyyingFace_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_DoyyingFace_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From fc188fc6a73e0fb8a204abd02aefe3268b5cc242 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:57:58 +0700 Subject: [PATCH 44/92] Add model 2023-07-31-camembert_embeddings_Henrywang_generic_model_fr --- ...t_embeddings_Henrywang_generic_model_fr.md | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Henrywang_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Henrywang_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Henrywang_generic_model_fr.md new file mode 100644 index 00000000000000..850aecb10f995c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_Henrywang_generic_model_fr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: French CamemBert Embeddings (from Henrywang) +author: John Snow Labs +name: camembert_embeddings_Henrywang_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `Henrywang`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_Henrywang_generic_model_fr_5.0.2_3.0_1690840573449.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_Henrywang_generic_model_fr_5.0.2_3.0_1690840573449.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_Henrywang_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_Henrywang_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_Henrywang_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 8e355ee6a08e7db737de633bb32e4e9873f034bc Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 04:58:58 +0700 Subject: [PATCH 45/92] Add model 2023-07-31-camembert_embeddings_xkang_generic_model_fr --- ...mbert_embeddings_xkang_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_xkang_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_xkang_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_xkang_generic_model_fr.md new file mode 100644 index 00000000000000..b0465bddfdb95c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_xkang_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from xkang) +author: John Snow Labs +name: camembert_embeddings_xkang_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `xkang`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_xkang_generic_model_fr_5.0.2_3.0_1690840602500.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_xkang_generic_model_fr_5.0.2_3.0_1690840602500.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_xkang_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_xkang_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_xkang").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_xkang_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 1c33bfd635222c3b00453c537524689ff77c7334 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 05:02:00 +0700 Subject: [PATCH 46/92] Add model 2023-07-31-camembert_embeddings_wangst_generic_model_fr --- ...bert_embeddings_wangst_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_wangst_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_wangst_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_wangst_generic_model_fr.md new file mode 100644 index 00000000000000..2e9322c530190b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_wangst_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from wangst) +author: John Snow Labs +name: camembert_embeddings_wangst_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `wangst`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_wangst_generic_model_fr_5.0.2_3.0_1690840911109.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_wangst_generic_model_fr_5.0.2_3.0_1690840911109.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_wangst_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_wangst_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_wangst").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_wangst_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 90c91450a7688cd58d09be9fbe3b079a338b893a Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 05:05:22 +0700 Subject: [PATCH 47/92] Add model 2023-07-31-camembert_embeddings_seyfullah_generic_model_fr --- ...t_embeddings_seyfullah_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_seyfullah_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_seyfullah_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_seyfullah_generic_model_fr.md new file mode 100644 index 00000000000000..8282ef44896f1f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_seyfullah_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from seyfullah) +author: John Snow Labs +name: camembert_embeddings_seyfullah_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `seyfullah`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_seyfullah_generic_model_fr_5.0.2_3.0_1690841115822.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_seyfullah_generic_model_fr_5.0.2_3.0_1690841115822.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_seyfullah_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_seyfullah_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_seyfullah").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_seyfullah_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 24c00f53f5677cff1c9807c906de80cc892fcdb4 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 05:06:22 +0700 Subject: [PATCH 48/92] Add model 2023-07-31-camembert_embeddings_tnagata_generic_model_fr --- ...ert_embeddings_tnagata_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_tnagata_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_tnagata_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_tnagata_generic_model_fr.md new file mode 100644 index 00000000000000..be15fa1d102c14 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_tnagata_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from tnagata) +author: John Snow Labs +name: camembert_embeddings_tnagata_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `tnagata`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_tnagata_generic_model_fr_5.0.2_3.0_1690841148034.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_tnagata_generic_model_fr_5.0.2_3.0_1690841148034.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_tnagata_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_tnagata_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_tnagata").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_tnagata_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 755e135db9d845bb735f462b9e7799f944bc9c8c Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 05:07:23 +0700 Subject: [PATCH 49/92] Add model 2023-07-31-camembert_embeddings_yancong_generic_model_fr --- ...ert_embeddings_yancong_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_yancong_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_yancong_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_yancong_generic_model_fr.md new file mode 100644 index 00000000000000..c11c87a7dc9850 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_yancong_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from yancong) +author: John Snow Labs +name: camembert_embeddings_yancong_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `yancong`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_yancong_generic_model_fr_5.0.2_3.0_1690841226605.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_yancong_generic_model_fr_5.0.2_3.0_1690841226605.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_yancong_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_yancong_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_yancong").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_yancong_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 107a644e6a962927e9a3970e78d751e09ff0496c Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 05:10:57 +0700 Subject: [PATCH 50/92] Add model 2023-07-31-camembert_embeddings_safik_generic_model_fr --- ...mbert_embeddings_safik_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_safik_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_safik_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_safik_generic_model_fr.md new file mode 100644 index 00000000000000..30ff64246a20f7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_safik_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from safik) +author: John Snow Labs +name: camembert_embeddings_safik_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `safik`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_safik_generic_model_fr_5.0.2_3.0_1690841450264.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_safik_generic_model_fr_5.0.2_3.0_1690841450264.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_safik_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_safik_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_safik").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_safik_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From c689ebb5830ce9c1c18d09bbcb7307d6446bf689 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 05:17:11 +0700 Subject: [PATCH 51/92] Add model 2023-07-31-camembert_embeddings_tpanza_generic_model_fr --- ...bert_embeddings_tpanza_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_tpanza_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_tpanza_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_tpanza_generic_model_fr.md new file mode 100644 index 00000000000000..af70dc70467023 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_tpanza_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from tpanza) +author: John Snow Labs +name: camembert_embeddings_tpanza_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `tpanza`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_tpanza_generic_model_fr_5.0.2_3.0_1690841821807.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_tpanza_generic_model_fr_5.0.2_3.0_1690841821807.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_tpanza_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_tpanza_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_tpanza").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_tpanza_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 83409d499daaacf98411e0e705b0e7834c96c9fc Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 05:18:23 +0700 Subject: [PATCH 52/92] Add model 2023-07-31-camembert_embeddings_peterhsu_generic_model_fr --- ...rt_embeddings_peterhsu_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_peterhsu_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_peterhsu_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_peterhsu_generic_model_fr.md new file mode 100644 index 00000000000000..64617bef18c08b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_peterhsu_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from peterhsu) +author: John Snow Labs +name: camembert_embeddings_peterhsu_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `peterhsu`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_peterhsu_generic_model_fr_5.0.2_3.0_1690841897272.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_peterhsu_generic_model_fr_5.0.2_3.0_1690841897272.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_peterhsu_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_peterhsu_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_peterhsu").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_peterhsu_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 0a17c4f0abdea6653f576e03beca7b875b809aab Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 05:21:24 +0700 Subject: [PATCH 53/92] Add model 2023-07-31-camembert_embeddings_pgperrone_generic_model_fr --- ...t_embeddings_pgperrone_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_pgperrone_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_pgperrone_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_pgperrone_generic_model_fr.md new file mode 100644 index 00000000000000..1ffb45542ea74f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_pgperrone_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from pgperrone) +author: John Snow Labs +name: camembert_embeddings_pgperrone_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `pgperrone`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_pgperrone_generic_model_fr_5.0.2_3.0_1690842077639.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_pgperrone_generic_model_fr_5.0.2_3.0_1690842077639.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_pgperrone_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_pgperrone_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_pgperrone").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_pgperrone_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 054ad09208ce54f20b44183da31ac5360a718666 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 05:23:49 +0700 Subject: [PATCH 54/92] Add model 2023-07-31-camembert_embeddings_osanseviero_generic_model_fr --- ...embeddings_osanseviero_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_osanseviero_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_osanseviero_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_osanseviero_generic_model_fr.md new file mode 100644 index 00000000000000..019c9a8252d9b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_osanseviero_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from osanseviero) +author: John Snow Labs +name: camembert_embeddings_osanseviero_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `osanseviero`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_osanseviero_generic_model_fr_5.0.2_3.0_1690842223769.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_osanseviero_generic_model_fr_5.0.2_3.0_1690842223769.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_osanseviero_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_osanseviero_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic_v2.by_osanseviero").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_osanseviero_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 59c8fb7175dce36ed712e9b4dd2a5108978d3e64 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Tue, 1 Aug 2023 05:25:24 +0700 Subject: [PATCH 55/92] Add model 2023-07-31-camembert_embeddings_lijingxin_generic_model_fr --- ...t_embeddings_lijingxin_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_lijingxin_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_lijingxin_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_lijingxin_generic_model_fr.md new file mode 100644 index 00000000000000..b844d5bc5c7473 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-07-31-camembert_embeddings_lijingxin_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from lijingxin) +author: John Snow Labs +name: camembert_embeddings_lijingxin_generic_model +date: 2023-07-31 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `lijingxin`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_lijingxin_generic_model_fr_5.0.2_3.0_1690842315235.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_lijingxin_generic_model_fr_5.0.2_3.0_1690842315235.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_lijingxin_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_lijingxin_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_lijingxin").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_lijingxin_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 6a16bdb8e8078cce19947e0273c11a6347ec8e37 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:29:47 +0700 Subject: [PATCH 56/92] Add model 2023-08-01-camembert_embeddings_kaushikacharya_generic_model_fr --- ...eddings_kaushikacharya_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_kaushikacharya_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_kaushikacharya_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_kaushikacharya_generic_model_fr.md new file mode 100644 index 00000000000000..2c98576373f2cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_kaushikacharya_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from kaushikacharya) +author: John Snow Labs +name: camembert_embeddings_kaushikacharya_generic_model +date: 2023-08-01 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `kaushikacharya`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_kaushikacharya_generic_model_fr_5.0.2_3.0_1690925377512.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_kaushikacharya_generic_model_fr_5.0.2_3.0_1690925377512.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_kaushikacharya_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_kaushikacharya_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_kaushikacharya").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_kaushikacharya_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From da6cea2e75fd3fff182143dbb1d3d593a68f12e5 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:30:47 +0700 Subject: [PATCH 57/92] Add model 2023-08-01-camembert_embeddings_new_generic_model_fr --- ...membert_embeddings_new_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_new_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_new_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_new_generic_model_fr.md new file mode 100644 index 00000000000000..ba067640a36231 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_new_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from gulabpatel) +author: John Snow Labs +name: camembert_embeddings_new_generic_model +date: 2023-08-01 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `new-dummy-model` is a French model orginally trained by `gulabpatel`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_new_generic_model_fr_5.0.2_3.0_1690925425451.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_new_generic_model_fr_5.0.2_3.0_1690925425451.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_new_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_new_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_gulabpatel").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_new_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 5dced864fc263aa80f0d2e5f6d07a427237689f7 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:31:47 +0700 Subject: [PATCH 58/92] Add model 2023-08-01-camembert_embeddings_mbateman_generic_model_fr --- ...rt_embeddings_mbateman_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_mbateman_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_mbateman_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_mbateman_generic_model_fr.md new file mode 100644 index 00000000000000..6e92ecc54ac197 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_mbateman_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from mbateman) +author: John Snow Labs +name: camembert_embeddings_mbateman_generic_model +date: 2023-08-01 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `mbateman`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_mbateman_generic_model_fr_5.0.2_3.0_1690925436014.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_mbateman_generic_model_fr_5.0.2_3.0_1690925436014.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_mbateman_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_mbateman_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_mbateman").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_mbateman_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From f16a0fbd3f56f3376b6c42ac232483a291a7f3b8 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:32:47 +0700 Subject: [PATCH 59/92] Add model 2023-08-01-camembert_embeddings_lijingxin_generic_model_2_fr --- ...embeddings_lijingxin_generic_model_2_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_lijingxin_generic_model_2_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_lijingxin_generic_model_2_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_lijingxin_generic_model_2_fr.md new file mode 100644 index 00000000000000..fe2ca4486b6513 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_lijingxin_generic_model_2_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from lijingxin) +author: John Snow Labs +name: camembert_embeddings_lijingxin_generic_model_2 +date: 2023-08-01 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model-2` is a French model orginally trained by `lijingxin`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_lijingxin_generic_model_2_fr_5.0.2_3.0_1690925509326.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_lijingxin_generic_model_2_fr_5.0.2_3.0_1690925509326.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_lijingxin_generic_model_2","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_lijingxin_generic_model_2","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic_v2.by_lijingxin").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_lijingxin_generic_model_2| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 061ce2ed7fe4511298d94d6589b5e676c4643961 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:35:03 +0700 Subject: [PATCH 60/92] Add model 2023-08-01-camembert_embeddings_katrin_kc_generic_model_fr --- ...t_embeddings_katrin_kc_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_katrin_kc_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_katrin_kc_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_katrin_kc_generic_model_fr.md new file mode 100644 index 00000000000000..82d12b7109e107 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_katrin_kc_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from katrin-kc) +author: John Snow Labs +name: camembert_embeddings_katrin_kc_generic_model +date: 2023-08-01 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `katrin-kc`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_katrin_kc_generic_model_fr_5.0.2_3.0_1690925694402.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_katrin_kc_generic_model_fr_5.0.2_3.0_1690925694402.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_katrin_kc_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_katrin_kc_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_katrin_kc").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_katrin_kc_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 364a258c914cfdf8a873fabd3d874e96322ec2df Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:36:48 +0700 Subject: [PATCH 61/92] Add model 2023-08-01-camembert_embeddings_linyi_generic_model_fr --- ...mbert_embeddings_linyi_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_linyi_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_linyi_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_linyi_generic_model_fr.md new file mode 100644 index 00000000000000..d08e2fbe1cdacf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_linyi_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from linyi) +author: John Snow Labs +name: camembert_embeddings_linyi_generic_model +date: 2023-08-01 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `linyi`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_linyi_generic_model_fr_5.0.2_3.0_1690925802007.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_linyi_generic_model_fr_5.0.2_3.0_1690925802007.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_linyi_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_linyi_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_linyi").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_linyi_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 614636af5d2f43bdd706b252a1ff2c11d687ca6d Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:39:32 +0700 Subject: [PATCH 62/92] Add model 2023-08-01-camembert_embeddings_lewtun_generic_model_fr --- ...bert_embeddings_lewtun_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_lewtun_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_lewtun_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_lewtun_generic_model_fr.md new file mode 100644 index 00000000000000..bc03b7b850c19a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_lewtun_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from lewtun) +author: John Snow Labs +name: camembert_embeddings_lewtun_generic_model +date: 2023-08-01 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `lewtun`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_lewtun_generic_model_fr_5.0.2_3.0_1690925963643.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_lewtun_generic_model_fr_5.0.2_3.0_1690925963643.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_lewtun_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_lewtun_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_lewtun").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_lewtun_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 35a809972baf46dd61aa0cef36f7b15b3e7f72b0 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:41:09 +0700 Subject: [PATCH 63/92] Add model 2023-08-01-camembert_embeddings_joe8zhang_generic_model_fr --- ...t_embeddings_joe8zhang_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_joe8zhang_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_joe8zhang_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_joe8zhang_generic_model_fr.md new file mode 100644 index 00000000000000..0177700d202a9c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_joe8zhang_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from joe8zhang) +author: John Snow Labs +name: camembert_embeddings_joe8zhang_generic_model +date: 2023-08-01 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `joe8zhang`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_joe8zhang_generic_model_fr_5.0.2_3.0_1690926063630.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_joe8zhang_generic_model_fr_5.0.2_3.0_1690926063630.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_joe8zhang_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_joe8zhang_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_joe8zhang").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_joe8zhang_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 0d63cc669378e0bc6f23090b76bfdf2367290f2c Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:42:10 +0700 Subject: [PATCH 64/92] Add model 2023-08-01-camembert_embeddings_sloberta_sl --- ...-08-01-camembert_embeddings_sloberta_sl.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_sloberta_sl.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_sloberta_sl.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_sloberta_sl.md new file mode 100644 index 00000000000000..029dce1090a83b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_sloberta_sl.md @@ -0,0 +1,99 @@ +--- +layout: model +title: Slovenian CamemBert Embeddings (from EMBEDDIA) +author: John Snow Labs +name: camembert_embeddings_sloberta +date: 2023-08-01 +tags: [sl, open_source, camembert, embeddings, onnx] +task: Embeddings +language: sl +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `sloberta` is a Slovenian model orginally trained by `EMBEDDIA`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_sloberta_sl_5.0.2_3.0_1690926104653.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_sloberta_sl_5.0.2_3.0_1690926104653.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_sloberta","sl") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["Obožujem Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_sloberta","sl") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("Obožujem Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("sl.embed.camembert").predict("""Obožujem Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_sloberta| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|sl| +|Size:|263.5 MB| +|Case sensitive:|true| \ No newline at end of file From bb679a561dfcda3f1138ac93a14ebf5810210c78 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:43:10 +0700 Subject: [PATCH 65/92] Add model 2023-08-01-camembert_embeddings_generic_model_test_fr --- ...embert_embeddings_generic_model_test_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_generic_model_test_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_generic_model_test_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_generic_model_test_fr.md new file mode 100644 index 00000000000000..f1276b1133a22d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_generic_model_test_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from osanseviero) +author: John Snow Labs +name: camembert_embeddings_generic_model_test +date: 2023-08-01 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model-test` is a French model orginally trained by `osanseviero`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_generic_model_test_fr_5.0.2_3.0_1690926134164.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_generic_model_test_fr_5.0.2_3.0_1690926134164.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_generic_model_test","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_generic_model_test","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_osanseviero").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_generic_model_test| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From feaec8d465b8890e22487ddea605b10872c29a1d Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:46:16 +0700 Subject: [PATCH 66/92] Add model 2023-08-01-camembert_embeddings_jcai1_generic_model_fr --- ...mbert_embeddings_jcai1_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_jcai1_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_jcai1_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_jcai1_generic_model_fr.md new file mode 100644 index 00000000000000..cba25095d6389f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_jcai1_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from jcai1) +author: John Snow Labs +name: camembert_embeddings_jcai1_generic_model +date: 2023-08-01 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `jcai1`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_jcai1_generic_model_fr_5.0.2_3.0_1690926370690.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_jcai1_generic_model_fr_5.0.2_3.0_1690926370690.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_jcai1_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_jcai1_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_jcai1").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_jcai1_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 4d16433f208a8b8d70e92a77958f1e4a7d697506 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:47:16 +0700 Subject: [PATCH 67/92] Add model 2023-08-01-camembert_embeddings_umberto_commoncrawl_cased_v1_it --- ...eddings_umberto_commoncrawl_cased_v1_it.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_umberto_commoncrawl_cased_v1_it.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_umberto_commoncrawl_cased_v1_it.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_umberto_commoncrawl_cased_v1_it.md new file mode 100644 index 00000000000000..0c3c83cb8e961b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_umberto_commoncrawl_cased_v1_it.md @@ -0,0 +1,99 @@ +--- +layout: model +title: Italian CamemBert Embeddings (from Musixmatch) +author: John Snow Labs +name: camembert_embeddings_umberto_commoncrawl_cased_v1 +date: 2023-08-01 +tags: [it, open_source, camembert, embeddings, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `umberto-commoncrawl-cased-v1` is a Italian model orginally trained by `Musixmatch`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_umberto_commoncrawl_cased_v1_it_5.0.2_3.0_1690926373074.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_umberto_commoncrawl_cased_v1_it_5.0.2_3.0_1690926373074.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_umberto_commoncrawl_cased_v1","it") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["Adoro Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_umberto_commoncrawl_cased_v1","it") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("Adoro Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("it.embed.camembert.cased").predict("""Adoro Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_umberto_commoncrawl_cased_v1| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|263.1 MB| +|Case sensitive:|true| \ No newline at end of file From 0dcc83ea2274bb4a7043010a846312b7e6f109d5 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:48:16 +0700 Subject: [PATCH 68/92] Add model 2023-08-01-camembert_embeddings_DataikuNLP_camembert_base_fr --- ...embeddings_DataikuNLP_camembert_base_fr.md | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_DataikuNLP_camembert_base_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_DataikuNLP_camembert_base_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_DataikuNLP_camembert_base_fr.md new file mode 100644 index 00000000000000..2548f85d5d03da --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_DataikuNLP_camembert_base_fr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: French CamemBert Embeddings (from DataikuNLP) +author: John Snow Labs +name: camembert_embeddings_DataikuNLP_camembert_base +date: 2023-08-01 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `camembert-base` is a French model orginally trained by `DataikuNLP`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_DataikuNLP_camembert_base_fr_5.0.2_3.0_1690926395140.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_DataikuNLP_camembert_base_fr_5.0.2_3.0_1690926395140.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_DataikuNLP_camembert_base","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_DataikuNLP_camembert_base","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_DataikuNLP_camembert_base| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From b57090538722e28876ea6b3c2436c86769a9b98e Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:49:17 +0700 Subject: [PATCH 69/92] Add model 2023-08-01-camembert_embeddings_umberto_wikipedia_uncased_v1_it --- ...eddings_umberto_wikipedia_uncased_v1_it.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_umberto_wikipedia_uncased_v1_it.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_umberto_wikipedia_uncased_v1_it.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_umberto_wikipedia_uncased_v1_it.md new file mode 100644 index 00000000000000..f810799f3b227d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_umberto_wikipedia_uncased_v1_it.md @@ -0,0 +1,99 @@ +--- +layout: model +title: Italian CamemBert Embeddings (from Musixmatch) +author: John Snow Labs +name: camembert_embeddings_umberto_wikipedia_uncased_v1 +date: 2023-08-01 +tags: [it, open_source, camembert, embeddings, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `umberto-wikipedia-uncased-v1` is a Italian model orginally trained by `Musixmatch`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_umberto_wikipedia_uncased_v1_it_5.0.2_3.0_1690926515076.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_umberto_wikipedia_uncased_v1_it_5.0.2_3.0_1690926515076.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_umberto_wikipedia_uncased_v1","it") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["Adoro Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_umberto_wikipedia_uncased_v1","it") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("Adoro Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("it.embed.camembert.uncased").predict("""Adoro Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_umberto_wikipedia_uncased_v1| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|262.7 MB| +|Case sensitive:|false| \ No newline at end of file From 0f181ce60767ea6e58ee38f61163f9777b7dfde3 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:50:17 +0700 Subject: [PATCH 70/92] Add model 2023-08-01-camembert_base_oscar_4gb_fr --- .../2023-08-01-camembert_base_oscar_4gb_fr.md | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_base_oscar_4gb_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_base_oscar_4gb_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_oscar_4gb_fr.md new file mode 100644 index 00000000000000..c22aaa7c3cbdab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_oscar_4gb_fr.md @@ -0,0 +1,86 @@ +--- +layout: model +title: CamemBERT Base OSCAR +author: John Snow Labs +name: camembert_base_oscar_4gb +date: 2023-08-01 +tags: [fr, french, camembert, embeddings, oscar, open_source, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +[CamemBERT](https://arxiv.org/abs/1911.03894) is a state-of-the-art language model for French based on the RoBERTa model. +For further information or requests, please go to [Camembert Website](https://camembert-model.fr/) + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_base_oscar_4gb_fr_5.0.2_3.0_1690926599842.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_base_oscar_4gb_fr_5.0.2_3.0_1690926599842.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings = CamemBertEmbeddings.pretrained("camembert_base_oscar_4gb", "fr") \ +.setInputCols("sentence", "token") \ +.setOutputCol("embeddings") +``` +```scala +val embeddings = CamemBertEmbeddings.pretrained("camembert_base_oscar_4gb", "fr") +.setInputCols("sentence", "token") +.setOutputCol("embeddings") +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert_oscar_4g").predict("""Put your text here.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_base_oscar_4gb| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[token, sentence]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|263.6 MB| +|Case sensitive:|true| + +## Benchmarking + +```bash + +| Model | #params | Arch. | Training data | +|--------------------------------|--------------------------------|-------|-----------------------------------| +| `camembert-base` | 110M | Base | OSCAR (138 GB of text) | +| `camembert/camembert-large` | 335M | Large | CCNet (135 GB of text) | +| `camembert/camembert-base-ccnet` | 110M | Base | CCNet (135 GB of text) | +| `camembert/camembert-base-wikipedia-4gb` | 110M | Base | Wikipedia (4 GB of text) | +| `camembert/camembert-base-oscar-4gb` | 110M | Base | Subsample of OSCAR (4 GB of text) | +| `camembert/camembert-base-ccnet-4gb` | 110M | Base | Subsample of CCNet (4 GB of text) | +``` \ No newline at end of file From abd307b1c370fd280158e361df408862395d75ee Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:51:17 +0700 Subject: [PATCH 71/92] Add model 2023-08-01-camembert_embeddings_distilcamembert_base_fr --- ...bert_embeddings_distilcamembert_base_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_distilcamembert_base_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_distilcamembert_base_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_distilcamembert_base_fr.md new file mode 100644 index 00000000000000..a0bf1c37dde147 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_distilcamembert_base_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from cmarkea) +author: John Snow Labs +name: camembert_embeddings_distilcamembert_base +date: 2023-08-01 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `distilcamembert-base` is a French model orginally trained by `cmarkea`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_distilcamembert_base_fr_5.0.2_3.0_1690926517410.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_distilcamembert_base_fr_5.0.2_3.0_1690926517410.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_distilcamembert_base","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_distilcamembert_base","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.distilled_base").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_distilcamembert_base| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|253.5 MB| +|Case sensitive:|true| \ No newline at end of file From 05aa01630f76c4a2a3cf5bc9cb06ad21f84a04b0 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:53:21 +0700 Subject: [PATCH 72/92] Add model 2023-08-01-camembert_base_wikipedia_4gb_fr --- ...3-08-01-camembert_base_wikipedia_4gb_fr.md | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_base_wikipedia_4gb_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_base_wikipedia_4gb_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_wikipedia_4gb_fr.md new file mode 100644 index 00000000000000..ab8bfd83cf9b6f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_wikipedia_4gb_fr.md @@ -0,0 +1,86 @@ +--- +layout: model +title: CamemBERT Base Wikipedia +author: John Snow Labs +name: camembert_base_wikipedia_4gb +date: 2023-08-01 +tags: [fr, french, embeddings, camembert, wikipedia, open_source, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +[CamemBERT](https://arxiv.org/abs/1911.03894) is a state-of-the-art language model for French based on the RoBERTa model. +For further information or requests, please go to [Camembert Website](https://camembert-model.fr/) + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_base_wikipedia_4gb_fr_5.0.2_3.0_1690926794799.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_base_wikipedia_4gb_fr_5.0.2_3.0_1690926794799.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings = CamemBertEmbeddings.pretrained("camembert_base_wikipedia_4gb", "fr") \ +.setInputCols("sentence", "token") \ +.setOutputCol("embeddings") +``` +```scala +val embeddings = CamemBertEmbeddings.pretrained("camembert_base_wikipedia_4gb", "fr") +.setInputCols("sentence", "token") +.setOutputCol("embeddings") +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert_wiki_4g").predict("""Put your text here.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_base_wikipedia_4gb| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[token, sentence]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|263.1 MB| +|Case sensitive:|true| + +## Benchmarking + +```bash + +| Model | #params | Arch. | Training data | +|--------------------------------|--------------------------------|-------|-----------------------------------| +| `camembert-base` | 110M | Base | OSCAR (138 GB of text) | +| `camembert/camembert-large` | 335M | Large | CCNet (135 GB of text) | +| `camembert/camembert-base-ccnet` | 110M | Base | CCNet (135 GB of text) | +| `camembert/camembert-base-wikipedia-4gb` | 110M | Base | Wikipedia (4 GB of text) | +| `camembert/camembert-base-oscar-4gb` | 110M | Base | Subsample of OSCAR (4 GB of text) | +| `camembert/camembert-base-ccnet-4gb` | 110M | Base | Subsample of CCNet (4 GB of text) | +``` \ No newline at end of file From 70f3bbdcd346943cb1199b0e3b1176b3a2b0008b Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:55:46 +0700 Subject: [PATCH 73/92] Add model 2023-08-01-camembert_base_ccnet_fr --- .../2023-08-01-camembert_base_ccnet_fr.md | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_base_ccnet_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_base_ccnet_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_ccnet_fr.md new file mode 100644 index 00000000000000..df065c0b828dd7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_ccnet_fr.md @@ -0,0 +1,86 @@ +--- +layout: model +title: CamemBERT Base CCNet +author: John Snow Labs +name: camembert_base_ccnet +date: 2023-08-01 +tags: [fr, french, embeddings, camembert, ccnet, open_source, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +[CamemBERT](https://arxiv.org/abs/1911.03894) is a state-of-the-art language model for French based on the RoBERTa model. +For further information or requests, please go to [Camembert Website](https://camembert-model.fr/) + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_base_ccnet_fr_5.0.2_3.0_1690926936025.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_base_ccnet_fr_5.0.2_3.0_1690926936025.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings = CamemBertEmbeddings.pretrained("camembert_base_ccnet", "fr") \ +.setInputCols("sentence", "token") \ +.setOutputCol("embeddings") +``` +```scala +val embeddings = CamemBertEmbeddings.pretrained("camembert_base_ccnet", "fr") +.setInputCols("sentence", "token") +.setOutputCol("embeddings") +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert_base_ccnet").predict("""Put your text here.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_base_ccnet| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[token, sentence]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|263.6 MB| +|Case sensitive:|true| + +## Benchmarking + +```bash + +| Model | #params | Arch. | Training data | +|--------------------------------|--------------------------------|-------|-----------------------------------| +| `camembert-base` | 110M | Base | OSCAR (138 GB of text) | +| `camembert/camembert-large` | 335M | Large | CCNet (135 GB of text) | +| `camembert/camembert-base-ccnet` | 110M | Base | CCNet (135 GB of text) | +| `camembert/camembert-base-wikipedia-4gb` | 110M | Base | Wikipedia (4 GB of text) | +| `camembert/camembert-base-oscar-4gb` | 110M | Base | Subsample of OSCAR (4 GB of text) | +| `camembert/camembert-base-ccnet-4gb` | 110M | Base | Subsample of CCNet (4 GB of text) | +``` \ No newline at end of file From d1af927150251c4ee28afc40ac9718669cd59dc6 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:56:46 +0700 Subject: [PATCH 74/92] Add model 2023-08-01-camembert_base_oscar_4gb_fr --- .../ahmedlone127/2023-08-01-camembert_base_oscar_4gb_fr.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_base_oscar_4gb_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_oscar_4gb_fr.md index c22aaa7c3cbdab..20af11cc2052cd 100644 --- a/docs/_posts/ahmedlone127/2023-08-01-camembert_base_oscar_4gb_fr.md +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_oscar_4gb_fr.md @@ -29,8 +29,8 @@ For further information or requests, please go to [Camembert Website](https://ca {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_base_oscar_4gb_fr_5.0.2_3.0_1690926599842.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_base_oscar_4gb_fr_5.0.2_3.0_1690926599842.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_base_oscar_4gb_fr_5.0.2_3.0_1690926989664.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_base_oscar_4gb_fr_5.0.2_3.0_1690926989664.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use From ffb1f02a298f566a171c90e7d1f30e57e919b611 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 04:57:46 +0700 Subject: [PATCH 75/92] Add model 2023-08-01-camembert_embeddings_hackertec_generic_fr --- ...membert_embeddings_hackertec_generic_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_hackertec_generic_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_hackertec_generic_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_hackertec_generic_fr.md new file mode 100644 index 00000000000000..8bf610174a3f4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_hackertec_generic_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from hackertec) +author: John Snow Labs +name: camembert_embeddings_hackertec_generic +date: 2023-08-01 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy` is a French model orginally trained by `hackertec`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_hackertec_generic_fr_5.0.2_3.0_1690927057947.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_hackertec_generic_fr_5.0.2_3.0_1690927057947.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_hackertec_generic","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_hackertec_generic","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic_v2.by_hackertec").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_hackertec_generic| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From b3cd1b5e9d426c2d11f3de608523a60b90031643 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 05:01:55 +0700 Subject: [PATCH 76/92] Add model 2023-08-01-camembert_base_ccnet_fr --- .../_posts/ahmedlone127/2023-08-01-camembert_base_ccnet_fr.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_base_ccnet_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_ccnet_fr.md index df065c0b828dd7..f6314446737cb8 100644 --- a/docs/_posts/ahmedlone127/2023-08-01-camembert_base_ccnet_fr.md +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_ccnet_fr.md @@ -29,8 +29,8 @@ For further information or requests, please go to [Camembert Website](https://ca {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_base_ccnet_fr_5.0.2_3.0_1690926936025.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_base_ccnet_fr_5.0.2_3.0_1690926936025.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_base_ccnet_fr_5.0.2_3.0_1690927305918.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_base_ccnet_fr_5.0.2_3.0_1690927305918.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use From 4639142800102d7f852b2c8d18829674836c7d4f Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 05:03:07 +0700 Subject: [PATCH 77/92] Add model 2023-08-01-camembert_embeddings_h4d35_generic_model_fr --- ...mbert_embeddings_h4d35_generic_model_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_h4d35_generic_model_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_h4d35_generic_model_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_h4d35_generic_model_fr.md new file mode 100644 index 00000000000000..6041bc4bb5d976 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_h4d35_generic_model_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from h4d35) +author: John Snow Labs +name: camembert_embeddings_h4d35_generic_model +date: 2023-08-01 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dummy-model` is a French model orginally trained by `h4d35`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_h4d35_generic_model_fr_5.0.2_3.0_1690927379924.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_h4d35_generic_model_fr_5.0.2_3.0_1690927379924.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_h4d35_generic_model","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_h4d35_generic_model","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.generic.by_h4d35").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_h4d35_generic_model| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| \ No newline at end of file From 4c1072f02e6a484352d41f53796be0b40e1365af Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 05:07:29 +0700 Subject: [PATCH 78/92] Add model 2023-08-01-camembert_embeddings_bertweetfr_base_fr --- ...camembert_embeddings_bertweetfr_base_fr.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_bertweetfr_base_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_bertweetfr_base_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_bertweetfr_base_fr.md new file mode 100644 index 00000000000000..4eee761df87a0e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_embeddings_bertweetfr_base_fr.md @@ -0,0 +1,99 @@ +--- +layout: model +title: French CamemBert Embeddings (from Yanzhu) +author: John Snow Labs +name: camembert_embeddings_bertweetfr_base +date: 2023-08-01 +tags: [fr, open_source, camembert, embeddings, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBert Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `bertweetfr-base` is a French model orginally trained by `Yanzhu`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_embeddings_bertweetfr_base_fr_5.0.2_3.0_1690927634160.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_embeddings_bertweetfr_base_fr_5.0.2_3.0_1690927634160.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_bertweetfr_base","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["J'adore Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_embeddings_bertweetfr_base","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("J'adore Spark NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert.tweet.base").predict("""J'adore Spark NLP""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_embeddings_bertweetfr_base| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|412.8 MB| +|Case sensitive:|true| \ No newline at end of file From 75a8aa85d50107c72c27bca6f12dc15c8b896bcb Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 05:09:37 +0700 Subject: [PATCH 79/92] Add model 2023-08-01-camembert_base_ccnet_4gb_fr --- .../2023-08-01-camembert_base_ccnet_4gb_fr.md | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_base_ccnet_4gb_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_base_ccnet_4gb_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_ccnet_4gb_fr.md new file mode 100644 index 00000000000000..bafa3216919d3d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_ccnet_4gb_fr.md @@ -0,0 +1,86 @@ +--- +layout: model +title: CamemBERT Subsample of CCNet +author: John Snow Labs +name: camembert_base_ccnet_4gb +date: 2023-08-01 +tags: [fr, french, embeddings, camembert, ccnet, open_source, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +[CamemBERT](https://arxiv.org/abs/1911.03894) is a state-of-the-art language model for French based on the RoBERTa model. +For further information or requests, please go to [Camembert Website](https://camembert-model.fr/) + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_base_ccnet_4gb_fr_5.0.2_3.0_1690927765575.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_base_ccnet_4gb_fr_5.0.2_3.0_1690927765575.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings = CamemBertEmbeddings.pretrained("camembert_base_ccnet_4gb", "fr") \ +.setInputCols("sentence", "token") \ +.setOutputCol("embeddings") +``` +```scala +val embeddings = CamemBertEmbeddings.pretrained("camembert_base_ccnet_4gb", "fr") +.setInputCols("sentence", "token") +.setOutputCol("embeddings") +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert_ccnet4g").predict("""Put your text here.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_base_ccnet_4gb| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[token, sentence]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|263.4 MB| +|Case sensitive:|true| + +## Benchmarking + +```bash + +| Model | #params | Arch. | Training data | +|--------------------------------|--------------------------------|-------|-----------------------------------| +| `camembert-base` | 110M | Base | OSCAR (138 GB of text) | +| `camembert/camembert-large` | 335M | Large | CCNet (135 GB of text) | +| `camembert/camembert-base-ccnet` | 110M | Base | CCNet (135 GB of text) | +| `camembert/camembert-base-wikipedia-4gb` | 110M | Base | Wikipedia (4 GB of text) | +| `camembert/camembert-base-oscar-4gb` | 110M | Base | Subsample of OSCAR (4 GB of text) | +| `camembert/camembert-base-ccnet-4gb` | 110M | Base | Subsample of CCNet (4 GB of text) | +``` \ No newline at end of file From eb6082cd0bf96d8f67845614b7022fae077e4d9a Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 05:19:38 +0700 Subject: [PATCH 80/92] Add model 2023-08-01-camembert_base_ccnet_4gb_fr --- .../ahmedlone127/2023-08-01-camembert_base_ccnet_4gb_fr.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_base_ccnet_4gb_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_ccnet_4gb_fr.md index bafa3216919d3d..eb59efb3511280 100644 --- a/docs/_posts/ahmedlone127/2023-08-01-camembert_base_ccnet_4gb_fr.md +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_ccnet_4gb_fr.md @@ -29,8 +29,8 @@ For further information or requests, please go to [Camembert Website](https://ca {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_base_ccnet_4gb_fr_5.0.2_3.0_1690927765575.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_base_ccnet_4gb_fr_5.0.2_3.0_1690927765575.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_base_ccnet_4gb_fr_5.0.2_3.0_1690928369186.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_base_ccnet_4gb_fr_5.0.2_3.0_1690928369186.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use From edc9bcd90b54a9e6a0424cd63bebe546f6eeef75 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 05:27:39 +0700 Subject: [PATCH 81/92] Add model 2023-08-01-xlmroberta_embeddings_fairlex_fscs_minilm_xx --- ...berta_embeddings_fairlex_fscs_minilm_xx.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-xlmroberta_embeddings_fairlex_fscs_minilm_xx.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-xlmroberta_embeddings_fairlex_fscs_minilm_xx.md b/docs/_posts/ahmedlone127/2023-08-01-xlmroberta_embeddings_fairlex_fscs_minilm_xx.md new file mode 100644 index 00000000000000..fc4285cef2e324 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-xlmroberta_embeddings_fairlex_fscs_minilm_xx.md @@ -0,0 +1,99 @@ +--- +layout: model +title: Multilingual XLMRoBerta Embeddings (from coastalcph) +author: John Snow Labs +name: xlmroberta_embeddings_fairlex_fscs_minilm +date: 2023-08-01 +tags: [fr, de, it, open_source, xlm_roberta, embeddings, xx, fairlex, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRoBERTa Embeddings model, uploaded to Hugging Face, adapted and imported into Spark NLP. `fairlex-fscs-minilm` is a Multilingual model orginally trained by `coastalcph`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_embeddings_fairlex_fscs_minilm_xx_5.0.2_3.0_1690928849227.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_embeddings_fairlex_fscs_minilm_xx_5.0.2_3.0_1690928849227.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("xlmroberta_embeddings_fairlex_fscs_minilm","xx") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("xlmroberta_embeddings_fairlex_fscs_minilm","xx") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("PUT YOUR STRING HERE").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("xx.embed.xlmr_roberta.mini_lm_mini").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_embeddings_fairlex_fscs_minilm| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|402.9 MB| +|Case sensitive:|true| \ No newline at end of file From f5e3a6871006b565522dc87fc8364f2c0548b5af Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 05:28:39 +0700 Subject: [PATCH 82/92] Add model 2023-08-01-xlmroberta_embeddings_fairlex_cail_minilm_zh --- ...berta_embeddings_fairlex_cail_minilm_zh.md | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-xlmroberta_embeddings_fairlex_cail_minilm_zh.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-xlmroberta_embeddings_fairlex_cail_minilm_zh.md b/docs/_posts/ahmedlone127/2023-08-01-xlmroberta_embeddings_fairlex_cail_minilm_zh.md new file mode 100644 index 00000000000000..0f58c4e10d68a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-xlmroberta_embeddings_fairlex_cail_minilm_zh.md @@ -0,0 +1,99 @@ +--- +layout: model +title: Chinese XLMRoBerta Embeddings (from coastalcph) +author: John Snow Labs +name: xlmroberta_embeddings_fairlex_cail_minilm +date: 2023-08-01 +tags: [zh, open_source, xlm_roberta, embeddings, fairlex, onnx] +task: Embeddings +language: zh +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRoBERTa Embeddings model, uploaded to Hugging Face, adapted and imported into Spark NLP. `fairlex-cail-minilm` is a Chinese model orginally trained by `coastalcph`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_embeddings_fairlex_cail_minilm_zh_5.0.2_3.0_1690928905681.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_embeddings_fairlex_cail_minilm_zh_5.0.2_3.0_1690928905681.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("xlmroberta_embeddings_fairlex_cail_minilm","zh") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("xlmroberta_embeddings_fairlex_cail_minilm","zh") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("PUT YOUR STRING HERE").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("zh.embed.xlmr_roberta.mini_lm_mini").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_embeddings_fairlex_cail_minilm| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|zh| +|Size:|402.9 MB| +|Case sensitive:|true| \ No newline at end of file From 91c5ebe7f7f7d14f569720484fbf53a01d257142 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 06:46:23 +0700 Subject: [PATCH 83/92] Add model 2023-08-01-camembert_base_fr --- .../2023-08-01-camembert_base_fr.md | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_base_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_base_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_fr.md new file mode 100644 index 00000000000000..0a2792ccab32c0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_fr.md @@ -0,0 +1,86 @@ +--- +layout: model +title: CamemBERT Base Model +author: John Snow Labs +name: camembert_base +date: 2023-08-01 +tags: [fr, french, embeddings, camembert, base, open_source, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +[CamemBERT](https://arxiv.org/abs/1911.03894) is a state-of-the-art language model for French based on the RoBERTa model. +For further information or requests, please go to [Camembert Website](https://camembert-model.fr/) + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_base_fr_5.0.2_3.0_1690933576243.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_base_fr_5.0.2_3.0_1690933576243.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings = CamemBertEmbeddings.pretrained("camembert_base", "fr") \ +.setInputCols("sentence", "token") \ +.setOutputCol("embeddings") +``` +```scala +val embeddings = CamemBertEmbeddings.pretrained("camembert_base", "fr") +.setInputCols("sentence", "token") +.setOutputCol("embeddings") +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert_base").predict("""Put your text here.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_base| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[token, sentence]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.0 MB| +|Case sensitive:|true| + +## Benchmarking + +```bash + +| Model | #params | Arch. | Training data | +|--------------------------------|--------------------------------|-------|-----------------------------------| +| `camembert-base` | 110M | Base | OSCAR (138 GB of text) | +| `camembert/camembert-large` | 335M | Large | CCNet (135 GB of text) | +| `camembert/camembert-base-ccnet` | 110M | Base | CCNet (135 GB of text) | +| `camembert/camembert-base-wikipedia-4gb` | 110M | Base | Wikipedia (4 GB of text) | +| `camembert/camembert-base-oscar-4gb` | 110M | Base | Subsample of OSCAR (4 GB of text) | +| `camembert/camembert-base-ccnet-4gb` | 110M | Base | Subsample of CCNet (4 GB of text) | +``` \ No newline at end of file From af690863c7dbb7a8ffd42c3228e880dceaaced70 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 06:49:49 +0700 Subject: [PATCH 84/92] Add model 2023-08-01-camembert_base_opt_fr --- .../2023-08-01-camembert_base_opt_fr.md | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_base_opt_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_base_opt_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_opt_fr.md new file mode 100644 index 00000000000000..0f0f64d821060b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_opt_fr.md @@ -0,0 +1,86 @@ +--- +layout: model +title: CamemBERT Base Model +author: John Snow Labs +name: camembert_base_opt +date: 2023-08-01 +tags: [fr, french, embeddings, camembert, base, open_source, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +[CamemBERT](https://arxiv.org/abs/1911.03894) is a state-of-the-art language model for French based on the RoBERTa model. +For further information or requests, please go to [Camembert Website](https://camembert-model.fr/) + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_base_opt_fr_5.0.2_3.0_1690933783384.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_base_opt_fr_5.0.2_3.0_1690933783384.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings = CamemBertEmbeddings.pretrained("camembert_base", "fr") \ +.setInputCols("sentence", "token") \ +.setOutputCol("embeddings") +``` +```scala +val embeddings = CamemBertEmbeddings.pretrained("camembert_base", "fr") +.setInputCols("sentence", "token") +.setOutputCol("embeddings") +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert_base").predict("""Put your text here.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_base_opt| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[token, sentence]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|264.3 MB| +|Case sensitive:|true| + +## Benchmarking + +```bash + +| Model | #params | Arch. | Training data | +|--------------------------------|--------------------------------|-------|-----------------------------------| +| `camembert-base` | 110M | Base | OSCAR (138 GB of text) | +| `camembert/camembert-large` | 335M | Large | CCNet (135 GB of text) | +| `camembert/camembert-base-ccnet` | 110M | Base | CCNet (135 GB of text) | +| `camembert/camembert-base-wikipedia-4gb` | 110M | Base | Wikipedia (4 GB of text) | +| `camembert/camembert-base-oscar-4gb` | 110M | Base | Subsample of OSCAR (4 GB of text) | +| `camembert/camembert-base-ccnet-4gb` | 110M | Base | Subsample of CCNet (4 GB of text) | +``` \ No newline at end of file From 0b2b966f2dfbf8c65c3fc4265394c0239f6db257 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 06:51:12 +0700 Subject: [PATCH 85/92] Add model 2023-08-01-camembert_base_quantized_fr --- .../2023-08-01-camembert_base_quantized_fr.md | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-01-camembert_base_quantized_fr.md diff --git a/docs/_posts/ahmedlone127/2023-08-01-camembert_base_quantized_fr.md b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_quantized_fr.md new file mode 100644 index 00000000000000..64850ca0c2d212 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-01-camembert_base_quantized_fr.md @@ -0,0 +1,86 @@ +--- +layout: model +title: CamemBERT Base Model +author: John Snow Labs +name: camembert_base_quantized +date: 2023-08-01 +tags: [fr, french, embeddings, camembert, base, open_source, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +[CamemBERT](https://arxiv.org/abs/1911.03894) is a state-of-the-art language model for French based on the RoBERTa model. +For further information or requests, please go to [Camembert Website](https://camembert-model.fr/) + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_base_quantized_fr_5.0.2_3.0_1690933869613.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_base_quantized_fr_5.0.2_3.0_1690933869613.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings = CamemBertEmbeddings.pretrained("camembert_base", "fr") \ +.setInputCols("sentence", "token") \ +.setOutputCol("embeddings") +``` +```scala +val embeddings = CamemBertEmbeddings.pretrained("camembert_base", "fr") +.setInputCols("sentence", "token") +.setOutputCol("embeddings") +``` + +{:.nlu-block} +```python +import nlu +nlu.load("fr.embed.camembert_base").predict("""Put your text here.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_base_quantized| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[token, sentence]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|107.9 MB| +|Case sensitive:|true| + +## Benchmarking + +```bash + +| Model | #params | Arch. | Training data | +|--------------------------------|--------------------------------|-------|-----------------------------------| +| `camembert-base` | 110M | Base | OSCAR (138 GB of text) | +| `camembert/camembert-large` | 335M | Large | CCNet (135 GB of text) | +| `camembert/camembert-base-ccnet` | 110M | Base | CCNet (135 GB of text) | +| `camembert/camembert-base-wikipedia-4gb` | 110M | Base | Wikipedia (4 GB of text) | +| `camembert/camembert-base-oscar-4gb` | 110M | Base | Subsample of OSCAR (4 GB of text) | +| `camembert/camembert-base-ccnet-4gb` | 110M | Base | Subsample of CCNet (4 GB of text) | +``` \ No newline at end of file From 07a9536249d45d13044458a5b76dad61238ad5ce Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 07:14:22 +0700 Subject: [PATCH 86/92] Add model 2023-08-02-albert_base_uncased_en --- .../2023-08-02-albert_base_uncased_en.md | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-02-albert_base_uncased_en.md diff --git a/docs/_posts/ahmedlone127/2023-08-02-albert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-08-02-albert_base_uncased_en.md new file mode 100644 index 00000000000000..703963892f63d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-02-albert_base_uncased_en.md @@ -0,0 +1,78 @@ +--- +layout: model +title: ALBERT Embeddings (Base Uncase) +author: John Snow Labs +name: albert_base_uncased +date: 2023-08-02 +tags: [open_source, en, english, embeddings, albert, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +ALBERT is "A Lite" version of BERT, a popular unsupervised language representation learning algorithm. ALBERT uses parameter-reduction techniques that allow for large-scale configurations, overcome previous memory limitations, and achieve better behavior with respect to model degradation. The details are described in the paper "[ALBERT: A Lite BERT for Self-supervised Learning of Language Representations.](https://arxiv.org/abs/1909.11942)" + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_uncased_en_5.0.2_3.0_1690935260361.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_uncased_en_5.0.2_3.0_1690935260361.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings = AlbertEmbeddings.pretrained("albert_base_uncased", "en") \ +.setInputCols("sentence", "token") \ +.setOutputCol("embeddings") +``` +```scala +val embeddings = AlbertEmbeddings.pretrained("albert_base_uncased", "en") +.setInputCols("sentence", "token") +.setOutputCol("embeddings") +``` + +{:.nlu-block} +```python +import nlu + +text = ["I love NLP"] +embeddings_df = nlu.load('en.embed.albert.base_uncased').predict(text, output_level='token') +embeddings_df +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_uncased| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[token, sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|42.0 MB| +|Case sensitive:|false| + +## References + +[https://huggingface.co/albert-base-v2](https://huggingface.co/albert-base-v2) \ No newline at end of file From 9bfe82ba783b9d6a24a7d4ea08d11083451df930 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 07:15:22 +0700 Subject: [PATCH 87/92] Add model 2023-08-02-albert_base_uncased_opt_en --- .../2023-08-02-albert_base_uncased_opt_en.md | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-02-albert_base_uncased_opt_en.md diff --git a/docs/_posts/ahmedlone127/2023-08-02-albert_base_uncased_opt_en.md b/docs/_posts/ahmedlone127/2023-08-02-albert_base_uncased_opt_en.md new file mode 100644 index 00000000000000..21794c5b8aa379 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-02-albert_base_uncased_opt_en.md @@ -0,0 +1,78 @@ +--- +layout: model +title: ALBERT Embeddings (Base Uncase) +author: John Snow Labs +name: albert_base_uncased_opt +date: 2023-08-02 +tags: [open_source, en, english, embeddings, albert, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +ALBERT is "A Lite" version of BERT, a popular unsupervised language representation learning algorithm. ALBERT uses parameter-reduction techniques that allow for large-scale configurations, overcome previous memory limitations, and achieve better behavior with respect to model degradation. The details are described in the paper "[ALBERT: A Lite BERT for Self-supervised Learning of Language Representations.](https://arxiv.org/abs/1909.11942)" + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_uncased_opt_en_5.0.2_3.0_1690935304465.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_uncased_opt_en_5.0.2_3.0_1690935304465.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings = AlbertEmbeddings.pretrained("albert_base_uncased", "en") \ +.setInputCols("sentence", "token") \ +.setOutputCol("embeddings") +``` +```scala +val embeddings = AlbertEmbeddings.pretrained("albert_base_uncased", "en") +.setInputCols("sentence", "token") +.setOutputCol("embeddings") +``` + +{:.nlu-block} +```python +import nlu + +text = ["I love NLP"] +embeddings_df = nlu.load('en.embed.albert.base_uncased').predict(text, output_level='token') +embeddings_df +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_uncased_opt| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[token, sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|115.0 MB| +|Case sensitive:|false| + +## References + +[https://huggingface.co/albert-base-v2](https://huggingface.co/albert-base-v2) \ No newline at end of file From 9ed1c86a413396c094e065f327cabb97c7b7461d Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 07:16:22 +0700 Subject: [PATCH 88/92] Add model 2023-08-02-albert_base_uncased_quantized_en --- ...-08-02-albert_base_uncased_quantized_en.md | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-02-albert_base_uncased_quantized_en.md diff --git a/docs/_posts/ahmedlone127/2023-08-02-albert_base_uncased_quantized_en.md b/docs/_posts/ahmedlone127/2023-08-02-albert_base_uncased_quantized_en.md new file mode 100644 index 00000000000000..1f373a267014d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-02-albert_base_uncased_quantized_en.md @@ -0,0 +1,78 @@ +--- +layout: model +title: ALBERT Embeddings (Base Uncase) +author: John Snow Labs +name: albert_base_uncased_quantized +date: 2023-08-02 +tags: [open_source, en, english, embeddings, albert, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +ALBERT is "A Lite" version of BERT, a popular unsupervised language representation learning algorithm. ALBERT uses parameter-reduction techniques that allow for large-scale configurations, overcome previous memory limitations, and achieve better behavior with respect to model degradation. The details are described in the paper "[ALBERT: A Lite BERT for Self-supervised Learning of Language Representations.](https://arxiv.org/abs/1909.11942)" + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_uncased_quantized_en_5.0.2_3.0_1690935326685.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_uncased_quantized_en_5.0.2_3.0_1690935326685.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings = AlbertEmbeddings.pretrained("albert_base_uncased", "en") \ +.setInputCols("sentence", "token") \ +.setOutputCol("embeddings") +``` +```scala +val embeddings = AlbertEmbeddings.pretrained("albert_base_uncased", "en") +.setInputCols("sentence", "token") +.setOutputCol("embeddings") +``` + +{:.nlu-block} +```python +import nlu + +text = ["I love NLP"] +embeddings_df = nlu.load('en.embed.albert.base_uncased').predict(text, output_level='token') +embeddings_df +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_uncased_quantized| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[token, sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|46.0 MB| +|Case sensitive:|false| + +## References + +[https://huggingface.co/albert-base-v2](https://huggingface.co/albert-base-v2) \ No newline at end of file From fe011d1e7eb64f51edc909f74aab1dff079ae515 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 07:17:27 +0700 Subject: [PATCH 89/92] Add model 2023-08-02-albert_large_uncased_en --- .../2023-08-02-albert_large_uncased_en.md | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-02-albert_large_uncased_en.md diff --git a/docs/_posts/ahmedlone127/2023-08-02-albert_large_uncased_en.md b/docs/_posts/ahmedlone127/2023-08-02-albert_large_uncased_en.md new file mode 100644 index 00000000000000..97756c809f0a5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-02-albert_large_uncased_en.md @@ -0,0 +1,78 @@ +--- +layout: model +title: ALBERT Embeddings (Large Uncase) +author: John Snow Labs +name: albert_large_uncased +date: 2023-08-02 +tags: [open_source, en, english, embeddings, albert, large, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +ALBERT is "A Lite" version of BERT, a popular unsupervised language representation learning algorithm. ALBERT uses parameter-reduction techniques that allow for large-scale configurations, overcome previous memory limitations, and achieve better behavior with respect to model degradation. The details are described in the paper "[ALBERT: A Lite BERT for Self-supervised Learning of Language Representations.](https://arxiv.org/abs/1909.11942)" + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_uncased_en_5.0.2_3.0_1690935444847.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_uncased_en_5.0.2_3.0_1690935444847.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings = AlbertEmbeddings.pretrained("albert_large_uncased", "en") \ +.setInputCols("sentence", "token") \ +.setOutputCol("embeddings") +``` +```scala +val embeddings = AlbertEmbeddings.pretrained("albert_large_uncased", "en") +.setInputCols("sentence", "token") +.setOutputCol("embeddings") +``` + +{:.nlu-block} +```python +import nlu + +text = ["I love NLP"] +embeddings_df = nlu.load('en.embed.albert.large_uncased').predict(text, output_level='token') +embeddings_df +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_uncased| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[token, sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|62.7 MB| +|Case sensitive:|false| + +## References + +[https://huggingface.co/albert-large-v2](https://huggingface.co/albert-large-v2) \ No newline at end of file From 5482cf155fae2c91868222ca0abf5590055b0af1 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 07:23:04 +0700 Subject: [PATCH 90/92] Add model 2023-08-02-albert_large_uncased_en --- .../_posts/ahmedlone127/2023-08-02-albert_large_uncased_en.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/_posts/ahmedlone127/2023-08-02-albert_large_uncased_en.md b/docs/_posts/ahmedlone127/2023-08-02-albert_large_uncased_en.md index 97756c809f0a5f..2868cd5390206c 100644 --- a/docs/_posts/ahmedlone127/2023-08-02-albert_large_uncased_en.md +++ b/docs/_posts/ahmedlone127/2023-08-02-albert_large_uncased_en.md @@ -28,8 +28,8 @@ ALBERT is "A Lite" version of BERT, a popular unsupervised language representati {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_uncased_en_5.0.2_3.0_1690935444847.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_uncased_en_5.0.2_3.0_1690935444847.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_uncased_en_5.0.2_3.0_1690935781574.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_uncased_en_5.0.2_3.0_1690935781574.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use From dbee304f655ba6be75418a933323051c9964204c Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 07:25:41 +0700 Subject: [PATCH 91/92] Add model 2023-08-02-albert_large_uncased_opt_en --- .../2023-08-02-albert_large_uncased_opt_en.md | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-02-albert_large_uncased_opt_en.md diff --git a/docs/_posts/ahmedlone127/2023-08-02-albert_large_uncased_opt_en.md b/docs/_posts/ahmedlone127/2023-08-02-albert_large_uncased_opt_en.md new file mode 100644 index 00000000000000..53d69c5d209c22 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-02-albert_large_uncased_opt_en.md @@ -0,0 +1,78 @@ +--- +layout: model +title: ALBERT Embeddings (Large Uncase) +author: John Snow Labs +name: albert_large_uncased_opt +date: 2023-08-02 +tags: [open_source, en, english, embeddings, albert, large, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +ALBERT is "A Lite" version of BERT, a popular unsupervised language representation learning algorithm. ALBERT uses parameter-reduction techniques that allow for large-scale configurations, overcome previous memory limitations, and achieve better behavior with respect to model degradation. The details are described in the paper "[ALBERT: A Lite BERT for Self-supervised Learning of Language Representations.](https://arxiv.org/abs/1909.11942)" + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_uncased_opt_en_5.0.2_3.0_1690935934328.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_uncased_opt_en_5.0.2_3.0_1690935934328.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings = AlbertEmbeddings.pretrained("albert_large_uncased", "en") \ +.setInputCols("sentence", "token") \ +.setOutputCol("embeddings") +``` +```scala +val embeddings = AlbertEmbeddings.pretrained("albert_large_uncased", "en") +.setInputCols("sentence", "token") +.setOutputCol("embeddings") +``` + +{:.nlu-block} +```python +import nlu + +text = ["I love NLP"] +embeddings_df = nlu.load('en.embed.albert.large_uncased').predict(text, output_level='token') +embeddings_df +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_uncased_opt| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[token, sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|333.8 MB| +|Case sensitive:|false| + +## References + +[https://huggingface.co/albert-large-v2](https://huggingface.co/albert-large-v2) \ No newline at end of file From b055479c577f07cfc6aa956753482c619335467b Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 2 Aug 2023 07:26:42 +0700 Subject: [PATCH 92/92] Add model 2023-08-02-albert_large_uncased_quantized_en --- ...08-02-albert_large_uncased_quantized_en.md | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-08-02-albert_large_uncased_quantized_en.md diff --git a/docs/_posts/ahmedlone127/2023-08-02-albert_large_uncased_quantized_en.md b/docs/_posts/ahmedlone127/2023-08-02-albert_large_uncased_quantized_en.md new file mode 100644 index 00000000000000..0e7cdcec3cac9a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-08-02-albert_large_uncased_quantized_en.md @@ -0,0 +1,78 @@ +--- +layout: model +title: ALBERT Embeddings (Large Uncase) +author: John Snow Labs +name: albert_large_uncased_quantized +date: 2023-08-02 +tags: [open_source, en, english, embeddings, albert, large, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.0.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +ALBERT is "A Lite" version of BERT, a popular unsupervised language representation learning algorithm. ALBERT uses parameter-reduction techniques that allow for large-scale configurations, overcome previous memory limitations, and achieve better behavior with respect to model degradation. The details are described in the paper "[ALBERT: A Lite BERT for Self-supervised Learning of Language Representations.](https://arxiv.org/abs/1909.11942)" + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_uncased_quantized_en_5.0.2_3.0_1690935970820.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_uncased_quantized_en_5.0.2_3.0_1690935970820.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings = AlbertEmbeddings.pretrained("albert_large_uncased", "en") \ +.setInputCols("sentence", "token") \ +.setOutputCol("embeddings") +``` +```scala +val embeddings = AlbertEmbeddings.pretrained("albert_large_uncased", "en") +.setInputCols("sentence", "token") +.setOutputCol("embeddings") +``` + +{:.nlu-block} +```python +import nlu + +text = ["I love NLP"] +embeddings_df = nlu.load('en.embed.albert.large_uncased').predict(text, output_level='token') +embeddings_df +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_uncased_quantized| +|Compatibility:|Spark NLP 5.0.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[token, sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|71.4 MB| +|Case sensitive:|false| + +## References + +[https://huggingface.co/albert-large-v2](https://huggingface.co/albert-large-v2) \ No newline at end of file