-
Notifications
You must be signed in to change notification settings - Fork 115
Add Qwen3 model support #423
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
8adaf2e
0499d71
1d92e9e
47c337d
6f68d8f
5641a4f
fa592c3
8208efd
1f24cc6
cb181f3
1651488
c02c295
bd19c79
8d787ee
a1923e1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's remove this file as well :) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# Bumblebee Examples | ||
|
||
This directory contains example scripts demonstrating how to use Bumblebee models. | ||
|
||
## Qwen3 Examples | ||
|
||
See the `qwen3/` subdirectory for comprehensive Qwen3 model examples: | ||
|
||
### Text Generation | ||
```bash | ||
elixir examples/qwen3/qwen3.exs | ||
``` | ||
|
||
### Text Embeddings | ||
```bash | ||
elixir examples/qwen3/qwen3_embedding.exs | ||
elixir examples/qwen3/qwen3_embedding_prompts.exs | ||
``` | ||
|
||
### Document Reranking | ||
```bash | ||
elixir examples/qwen3/qwen3_reranker.exs | ||
``` | ||
|
||
### Features Demonstrated | ||
|
||
**Text Generation** (`qwen3.exs`): | ||
- Text completion | ||
- Question answering | ||
- Chat format | ||
- Code generation | ||
|
||
**Embeddings** (`qwen3_embedding.exs`, `qwen3_embedding_prompts.exs`): | ||
- 1024-dimensional text embeddings | ||
- Semantic similarity computation | ||
- Instruction-aware prompts (recommended by Qwen team) | ||
- Multilingual support | ||
- Code search | ||
|
||
**Reranking** (`qwen3_reranker.exs`): | ||
- Query-document relevance scoring | ||
- Custom task instructions | ||
- Top-k result selection | ||
|
||
### Requirements | ||
|
||
- **Text Generation**: ~8GB disk space, ~10GB RAM | ||
- **Embeddings**: ~1.5GB disk space, ~4GB RAM (0.6B model) | ||
- **Reranking**: ~1.5GB disk space, ~4GB RAM (0.6B model) | ||
- **Backend**: EXLA (CPU or GPU) | ||
|
||
### Documentation | ||
|
||
See `examples/qwen3/QWEN3_IEX_GUIDE.md` for interactive IEx usage examples. | ||
|
||
## Phoenix Examples | ||
|
||
See the `phoenix/` subdirectory for LiveView-based examples. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,7 +50,9 @@ defmodule Bumblebee.Layers.Transformer do | |
:block_type, | ||
:attention_window_size, | ||
:scale_attention_weights, | ||
:rotary_embedding | ||
:rotary_embedding, | ||
:query_norm, | ||
:key_norm | ||
] | ||
|
||
opts = | ||
|
@@ -317,7 +319,9 @@ defmodule Bumblebee.Layers.Transformer do | |
layer_norm: [], | ||
attention_window_size: nil, | ||
scale_attention_weights: true, | ||
rotary_embedding: nil | ||
rotary_embedding: nil, | ||
query_norm: nil, | ||
key_norm: nil | ||
]) | ||
|
||
name = opts[:name] | ||
|
@@ -347,6 +351,8 @@ defmodule Bumblebee.Layers.Transformer do | |
attention_window_size = opts[:attention_window_size] | ||
scale_attention_weights = opts[:scale_attention_weights] | ||
rotary_embedding = opts[:rotary_embedding] | ||
query_norm = opts[:query_norm] | ||
key_norm = opts[:key_norm] | ||
|
||
ffn_fun = | ||
case ffn do | ||
|
@@ -405,6 +411,8 @@ defmodule Bumblebee.Layers.Transformer do | |
attention_window_size: attention_window_size, | ||
scale_attention_weights: scale_attention_weights, | ||
rotary_embedding: rotary_embedding, | ||
query_norm: query_norm, | ||
key_norm: key_norm, | ||
name: join(name, "self_attention") | ||
) | ||
|
||
|
@@ -690,6 +698,14 @@ defmodule Bumblebee.Layers.Transformer do | |
|
||
* `:max_positions` - the maximum number of distinct positions | ||
|
||
* `:query_norm` - configuration for query normalization. If set, normalizes | ||
the query projection before rotary embedding. Configured with the same | ||
options as `:layer_norm` in the block function. Defaults to `nil` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's make it always a function. For |
||
|
||
* `:key_norm` - configuration for key normalization. If set, normalizes | ||
the key projection before rotary embedding. Configured with the same | ||
options as `:layer_norm` in the block function. Defaults to `nil` | ||
|
||
* `:name` - the prefix for layer names | ||
|
||
## References | ||
|
@@ -721,7 +737,9 @@ defmodule Bumblebee.Layers.Transformer do | |
key_use_bias: true, | ||
value_use_bias: true, | ||
output_use_bias: true, | ||
rotary_embedding: nil | ||
rotary_embedding: nil, | ||
query_norm: nil, | ||
key_norm: nil | ||
]) | ||
|
||
attention_mask = opts[:attention_mask] | ||
|
@@ -739,6 +757,8 @@ defmodule Bumblebee.Layers.Transformer do | |
scale_attention_weights = opts[:scale_attention_weights] | ||
dropout_rate = opts[:dropout_rate] | ||
rotary_embedding = opts[:rotary_embedding] | ||
query_norm = opts[:query_norm] | ||
key_norm = opts[:key_norm] | ||
|
||
query_use_bias = opts[:query_use_bias] | ||
key_use_bias = opts[:key_use_bias] | ||
|
@@ -778,6 +798,35 @@ defmodule Bumblebee.Layers.Transformer do | |
) | ||
|> Layers.split_heads(num_key_value_heads) | ||
|
||
# Apply query and key normalization if configured (before rotary embedding) | ||
query = | ||
case query_norm do | ||
opts when is_list(opts) -> | ||
opts = Keyword.validate!(opts, epsilon: 1.0e-5) | ||
# Normalize over the head dimension (channel_index: -1) | ||
Layers.rms_norm(query, [epsilon: opts[:epsilon], channel_index: -1, name: join(name, "query_norm")]) | ||
|
||
fun when is_function(fun) -> | ||
fun.(query, join(name, "query_norm")) | ||
|
||
nil -> | ||
query | ||
end | ||
|
||
key = | ||
case key_norm do | ||
opts when is_list(opts) -> | ||
opts = Keyword.validate!(opts, epsilon: 1.0e-5) | ||
# Normalize over the head dimension (channel_index: -1) | ||
Layers.rms_norm(key, [epsilon: opts[:epsilon], channel_index: -1, name: join(name, "key_norm")]) | ||
|
||
fun when is_function(fun) -> | ||
fun.(key, join(name, "key_norm")) | ||
|
||
nil -> | ||
key | ||
end | ||
|
||
{query, key} = | ||
case rotary_embedding do | ||
opts when is_list(opts) -> | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -200,6 +200,12 @@ defmodule Bumblebee.Text.PreTrainedTokenizer do | |||||||||||
}, | ||||||||||||
default_template_options: [language_token: "eng_Latn"] | ||||||||||||
}, | ||||||||||||
qwen2: %{ | ||||||||||||
special_tokens: %{ | ||||||||||||
eos: "<|im_end|>", | ||||||||||||
pad: "<|endoftext|>" | ||||||||||||
Comment on lines
+205
to
+206
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We want the same defaults as hf/transformers. If a particular uploaded model uses different ones, it is in the configuration files and we load those.
Suggested change
|
||||||||||||
} | ||||||||||||
}, | ||||||||||||
roberta: %{ | ||||||||||||
special_tokens: %{ | ||||||||||||
bos: "<s>", | ||||||||||||
|
Uh oh!
There was an error while loading. Please reload this page.