Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ Flax), PyTorch, and/or TensorFlow.
| ProphetNet | ✅ | ❌ | ✅ | ❌ | ❌ |
| QDQBert | ❌ | ❌ | ✅ | ❌ | ❌ |
| RAG | ✅ | ❌ | ✅ | ✅ | ❌ |
| Realm | ✅ | | ✅ | ❌ | ❌ |
| Realm | ✅ | | ✅ | ❌ | ❌ |
| Reformer | ✅ | ✅ | ✅ | ❌ | ❌ |
| RemBERT | ✅ | ✅ | ✅ | ✅ | ❌ |
| RetriBERT | ✅ | ✅ | ✅ | ❌ | ❌ |
Expand Down
5 changes: 5 additions & 0 deletions docs/source/model_doc/realm.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ This model was contributed by `qqaatw <https://huggingface.co/qqaatw>`__. The or
- save_vocabulary
- batch_encode_candidates

## RealmTokenizerFast

[[autodoc]] RealmTokenizerFast
- batch_encode_candidates

## RealmRetriever

[[autodoc]] RealmRetriever
Expand Down
2 changes: 2 additions & 0 deletions src/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,7 @@
# tokenizers-backed objects
if is_tokenizers_available():
# Fast tokenizers
_import_structure["models.realm"].append("RealmTokenizerFast")
_import_structure["models.fnet"].append("FNetTokenizerFast")
_import_structure["models.roformer"].append("RoFormerTokenizerFast")
_import_structure["models.clip"].append("CLIPTokenizerFast")
Expand Down Expand Up @@ -2542,6 +2543,7 @@
from .models.mt5 import MT5TokenizerFast
from .models.openai import OpenAIGPTTokenizerFast
from .models.pegasus import PegasusTokenizerFast
from .models.realm import RealmTokenizerFast
from .models.reformer import ReformerTokenizerFast
from .models.rembert import RemBertTokenizerFast
from .models.retribert import RetriBertTokenizerFast
Expand Down
1 change: 1 addition & 0 deletions src/transformers/convert_slow_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,7 @@ def converted(self) -> Tokenizer:
"MobileBertTokenizer": BertConverter,
"OpenAIGPTTokenizer": OpenAIGPTConverter,
"PegasusTokenizer": PegasusConverter,
"RealmTokenizer": BertConverter,
"ReformerTokenizer": ReformerConverter,
"RemBertTokenizer": RemBertConverter,
"RetriBertTokenizer": BertConverter,
Expand Down
5 changes: 5 additions & 0 deletions src/transformers/models/realm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
"tokenization_realm": ["RealmTokenizer"],
}

if is_tokenizers_available():
_import_structure["tokenization_realm_fast"] = ["RealmTokenizerFast"]

if is_torch_available():
_import_structure["modeling_realm"] = [
Expand All @@ -44,6 +46,9 @@
from .configuration_realm import REALM_PRETRAINED_CONFIG_ARCHIVE_MAP, RealmConfig
from .tokenization_realm import RealmTokenizer

if is_tokenizers_available():
from .tokenization_realm import RealmTokenizerFast

if is_torch_available():
from .modeling_realm import (
REALM_PRETRAINED_MODEL_ARCHIVE_LIST,
Expand Down
16 changes: 8 additions & 8 deletions src/transformers/models/realm/configuration_realm.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@
logger = logging.get_logger(__name__)

REALM_PRETRAINED_CONFIG_ARCHIVE_MAP = {
"realm-cc-news-pretrained-embedder": "https://huggingface.co/qqaatw/realm-cc-news-pretrained-embedder/resolve/main/config.json",
"realm-cc-news-pretrained-encoder": "https://huggingface.co/qqaatw/realm-cc-news-pretrained-encoder/resolve/main/config.json",
"realm-cc-news-pretrained-scorer": "https://huggingface.co/qqaatw/realm-cc-news-pretrained-scorer/resolve/main/config.json",
"realm-cc-news-pretrained-openqa": "https://huggingface.co/qqaatw/realm-cc-news-pretrained-openqa/aresolve/main/config.json",
"realm-orqa-nq-openqa": "https://huggingface.co/qqaatw/realm-orqa-nq-openqa/resolve/main/config.json",
"realm-orqa-nq-reader": "https://huggingface.co/qqaatw/realm-orqa-nq-reader/resolve/main/config.json",
"realm-orqa-wq-openqa": "https://huggingface.co/qqaatw/realm-orqa-wq-openqa/resolve/main/config.json",
"realm-orqa-wq-reader": "https://huggingface.co/qqaatw/realm-orqa-wq-reader/resolve/main/config.json",
"qqaatw/realm-cc-news-pretrained-embedder": "https://huggingface.co/qqaatw/realm-cc-news-pretrained-embedder/resolve/main/config.json",
"qqaatw/realm-cc-news-pretrained-encoder": "https://huggingface.co/qqaatw/realm-cc-news-pretrained-encoder/resolve/main/config.json",
"qqaatw/realm-cc-news-pretrained-scorer": "https://huggingface.co/qqaatw/realm-cc-news-pretrained-scorer/resolve/main/config.json",
"qqaatw/realm-cc-news-pretrained-openqa": "https://huggingface.co/qqaatw/realm-cc-news-pretrained-openqa/aresolve/main/config.json",
"qqaatw/realm-orqa-nq-openqa": "https://huggingface.co/qqaatw/realm-orqa-nq-openqa/resolve/main/config.json",
"qqaatw/realm-orqa-nq-reader": "https://huggingface.co/qqaatw/realm-orqa-nq-reader/resolve/main/config.json",
"qqaatw/realm-orqa-wq-openqa": "https://huggingface.co/qqaatw/realm-orqa-wq-openqa/resolve/main/config.json",
"qqaatw/realm-orqa-wq-reader": "https://huggingface.co/qqaatw/realm-orqa-wq-reader/resolve/main/config.json",
Comment on lines +24 to +31
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unrelated to this PR:
but these checkpoints should be under google org since the model is released by google.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think @qqaatw actually trained the model himself no?

So guess in this case it's ok to leave it under your name :-)

Copy link
Contributor Author

@qqaatw qqaatw Jan 20, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These checkpoints were trained by Google in TF, and I converted them to PyTorch and validated their effectiveness, which is the benchmarking results previously shown.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can however fine-tune on my side, but we should first upload block_records.npy to qqaatw/realm-cc-news-pretrained-openqa so that I'm able to do it with the latest RealmRetriever.from_pretrained API.

# See all REALM models at https://huggingface.co/models?filter=realm
}

Expand Down
Loading