diff --git a/.buildkite/pyproject.toml b/.buildkite/pyproject.toml
index 083bb795caf5..d5cad1c73c6f 100644
--- a/.buildkite/pyproject.toml
+++ b/.buildkite/pyproject.toml
@@ -6,11 +6,6 @@
 
 [tool.ruff]
 line-length = 88
-exclude = [
-    # External file, leaving license intact
-    "examples/other/fp8/quantizer/quantize.py",
-    "vllm/vllm_flash_attn/flash_attn_interface.pyi"
-]
 
 [tool.ruff.lint.per-file-ignores]
 "vllm/third_party/**" = ["ALL"]
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 250140a7eeda..66e2e3312337 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -246,7 +246,7 @@ steps:
     - python3 offline_inference/vision_language.py --seed 0
     - python3 offline_inference/vision_language_embedding.py --seed 0
     - python3 offline_inference/vision_language_multi_image.py --seed 0
-    - VLLM_USE_V1=0 python3 other/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 other/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
+    - VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
     - python3 offline_inference/encoder_decoder.py
     - python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
     - python3 offline_inference/basic/classify.py
diff --git a/.gitignore b/.gitignore
index 8d5af1bed92d..e49d1d6ba619 100644
--- a/.gitignore
+++ b/.gitignore
@@ -146,7 +146,7 @@ venv.bak/
 
 # mkdocs documentation
 /site
-docs/getting_started/examples
+docs/examples
 
 # mypy
 .mypy_cache/
diff --git a/benchmarks/pyproject.toml b/benchmarks/pyproject.toml
index f825cb203269..65b1e09a247e 100644
--- a/benchmarks/pyproject.toml
+++ b/benchmarks/pyproject.toml
@@ -6,11 +6,6 @@
 
 [tool.ruff]
 line-length = 88
-exclude = [
-    # External file, leaving license intact
-    "examples/other/fp8/quantizer/quantize.py",
-    "vllm/vllm_flash_attn/flash_attn_interface.pyi"
-]
 
 [tool.ruff.lint.per-file-ignores]
 "vllm/third_party/**" = ["ALL"]
diff --git a/docs/.nav.yml b/docs/.nav.yml
index 4a870b40ed23..42aba9775360 100644
--- a/docs/.nav.yml
+++ b/docs/.nav.yml
@@ -5,11 +5,9 @@ nav:
       - getting_started/quickstart.md
       - getting_started/installation
     - Examples:
-      - Offline Inference: getting_started/examples/offline_inference
-      - Online Serving: getting_started/examples/online_serving
-      - Others:
-        - LMCache: getting_started/examples/lmcache
-        - getting_started/examples/other/*
+      - Offline Inference: examples/offline_inference
+      - Online Serving: examples/online_serving
+      - Others: examples/others
     - Quick Links:
       - User Guide: usage/README.md
       - Developer Guide: contributing/README.md
@@ -19,6 +17,7 @@ nav:
       - Releases: https://github.com/vllm-project/vllm/releases
   - User Guide:
     - Summary: usage/README.md
+    - usage/v1_guide.md
     - General:
       - usage/*
     - Inference and Serving:
diff --git a/docs/configuration/README.md b/docs/configuration/README.md
index 442a8d441430..6a8fbc79f4af 100644
--- a/docs/configuration/README.md
+++ b/docs/configuration/README.md
@@ -1,4 +1,9 @@
 # Configuration Options
 
-This section lists the most common options for running the vLLM engine.
-For a full list, refer to the [configuration][configuration] page.
+This section lists the most common options for running vLLM.
+
+There are three main levels of configuration, from highest priority to lowest priority:
+
+- [Request parameters][completions-api] and [input arguments][sampling-params]
+- [Engine arguments](./engine_args.md)
+- [Environment variables](./env_vars.md)
diff --git a/docs/usage/env_vars.md b/docs/configuration/env_vars.md
similarity index 100%
rename from docs/usage/env_vars.md
rename to docs/configuration/env_vars.md
diff --git a/docs/design/v1/metrics.md b/docs/design/v1/metrics.md
index 6080390ba0ed..7156ee9dd3ec 100644
--- a/docs/design/v1/metrics.md
+++ b/docs/design/v1/metrics.md
@@ -61,7 +61,7 @@ These are documented under [Inferencing and Serving -> Production Metrics](../..
 
 ### Grafana Dashboard
 
-vLLM also provides [a reference example](https://docs.vllm.ai/en/latest/getting_started/examples/prometheus_grafana.html) for how to collect and store these metrics using Prometheus and visualize them using a Grafana dashboard.
+vLLM also provides [a reference example](https://docs.vllm.ai/en/latest/examples/prometheus_grafana.html) for how to collect and store these metrics using Prometheus and visualize them using a Grafana dashboard.
 
 The subset of metrics exposed in the Grafana dashboard gives us an indication of which metrics are especially important:
 
@@ -673,7 +673,7 @@ v0 has support for OpenTelemetry tracing:
 - [OpenTelemetry blog
   post](https://opentelemetry.io/blog/2024/llm-observability/)
 - [User-facing
-  docs](https://docs.vllm.ai/en/latest/getting_started/examples/opentelemetry.html)
+  docs](https://docs.vllm.ai/en/latest/examples/opentelemetry.html)
 - [Blog
   post](https://medium.com/@ronen.schaffer/follow-the-trail-supercharging-vllm-with-opentelemetry-distributed-tracing-aa655229b46f)
 - [IBM product
diff --git a/docs/mkdocs/hooks/generate_examples.py b/docs/mkdocs/hooks/generate_examples.py
index a2131c342e8c..c2f1f2d96f00 100644
--- a/docs/mkdocs/hooks/generate_examples.py
+++ b/docs/mkdocs/hooks/generate_examples.py
@@ -9,7 +9,7 @@
 ROOT_DIR = Path(__file__).parent.parent.parent.parent
 ROOT_DIR_RELATIVE = '../../../../..'
 EXAMPLE_DIR = ROOT_DIR / "examples"
-EXAMPLE_DOC_DIR = ROOT_DIR / "docs/getting_started/examples"
+EXAMPLE_DOC_DIR = ROOT_DIR / "docs/examples"
 print(ROOT_DIR.resolve())
 print(EXAMPLE_DIR.resolve())
 print(EXAMPLE_DOC_DIR.resolve())
diff --git a/docs/models/extensions/tensorizer.md b/docs/models/extensions/tensorizer.md
index 36b49626d47d..b6feb405c6ca 100644
--- a/docs/models/extensions/tensorizer.md
+++ b/docs/models/extensions/tensorizer.md
@@ -10,7 +10,7 @@ shorter Pod startup times and CPU memory usage. Tensor encryption is also suppor
 
 For more information on CoreWeave's Tensorizer, please refer to
 [CoreWeave's Tensorizer documentation](https://github.com/coreweave/tensorizer). For more information on serializing a vLLM model, as well a general usage guide to using Tensorizer with vLLM, see
-the [vLLM example script](https://docs.vllm.ai/en/latest/getting_started/examples/tensorize_vllm_model.html).
+the [vLLM example script](https://docs.vllm.ai/en/latest/examples/tensorize_vllm_model.html).
 
 !!! note
     Note that to use this feature you will need to install `tensorizer` by running `pip install vllm[tensorizer]`.
diff --git a/docs/training/rlhf.md b/docs/training/rlhf.md
index 72e89c0c7478..4f75e4e01495 100644
--- a/docs/training/rlhf.md
+++ b/docs/training/rlhf.md
@@ -6,6 +6,6 @@ vLLM can be used to generate the completions for RLHF. The best way to do this i
 
 See the following basic examples to get started if you don't want to use an existing library:
 
-- [Training and inference processes are located on separate GPUs (inspired by OpenRLHF)](https://docs.vllm.ai/en/latest/getting_started/examples/rlhf.html)
-- [Training and inference processes are colocated on the same GPUs using Ray](https://docs.vllm.ai/en/latest/getting_started/examples/rlhf_colocate.html)
-- [Utilities for performing RLHF with vLLM](https://docs.vllm.ai/en/latest/getting_started/examples/rlhf_utils.html)
+- [Training and inference processes are located on separate GPUs (inspired by OpenRLHF)](../examples/offline_inference/rlhf.md)
+- [Training and inference processes are colocated on the same GPUs using Ray](../examples/offline_inference/rlhf_colocate.md)
+- [Utilities for performing RLHF with vLLM](../examples/offline_inference/rlhf_utils.md)
diff --git a/examples/lmcache/README.md b/examples/others/lmcache/README.md
similarity index 100%
rename from examples/lmcache/README.md
rename to examples/others/lmcache/README.md
diff --git a/examples/lmcache/cpu_offload_lmcache.py b/examples/others/lmcache/cpu_offload_lmcache.py
similarity index 100%
rename from examples/lmcache/cpu_offload_lmcache.py
rename to examples/others/lmcache/cpu_offload_lmcache.py
diff --git a/examples/lmcache/disagg_prefill_lmcache_v0.py b/examples/others/lmcache/disagg_prefill_lmcache_v0.py
similarity index 100%
rename from examples/lmcache/disagg_prefill_lmcache_v0.py
rename to examples/others/lmcache/disagg_prefill_lmcache_v0.py
diff --git a/examples/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-decoder-config.yaml b/examples/others/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-decoder-config.yaml
similarity index 100%
rename from examples/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-decoder-config.yaml
rename to examples/others/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-decoder-config.yaml
diff --git a/examples/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-prefiller-config.yaml b/examples/others/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-prefiller-config.yaml
similarity index 100%
rename from examples/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-prefiller-config.yaml
rename to examples/others/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-prefiller-config.yaml
diff --git a/examples/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh b/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh
similarity index 100%
rename from examples/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh
rename to examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh
diff --git a/examples/lmcache/disagg_prefill_lmcache_v1/disagg_proxy_server.py b/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_proxy_server.py
similarity index 100%
rename from examples/lmcache/disagg_prefill_lmcache_v1/disagg_proxy_server.py
rename to examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_proxy_server.py
diff --git a/examples/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh b/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh
similarity index 100%
rename from examples/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh
rename to examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh
diff --git a/examples/lmcache/kv_cache_sharing_lmcache_v1.py b/examples/others/lmcache/kv_cache_sharing_lmcache_v1.py
similarity index 100%
rename from examples/lmcache/kv_cache_sharing_lmcache_v1.py
rename to examples/others/lmcache/kv_cache_sharing_lmcache_v1.py
diff --git a/examples/other/logging_configuration.md b/examples/others/logging_configuration.md
similarity index 100%
rename from examples/other/logging_configuration.md
rename to examples/others/logging_configuration.md
diff --git a/examples/other/tensorize_vllm_model.py b/examples/others/tensorize_vllm_model.py
similarity index 97%
rename from examples/other/tensorize_vllm_model.py
rename to examples/others/tensorize_vllm_model.py
index b1f2ce871bb4..38193b1c1002 100644
--- a/examples/other/tensorize_vllm_model.py
+++ b/examples/others/tensorize_vllm_model.py
@@ -28,7 +28,7 @@
 To serialize a model, install vLLM from source, then run something 
 like this from the root level of this repository:
 
-python examples/other/tensorize_vllm_model.py \
+python examples/others/tensorize_vllm_model.py \
    --model facebook/opt-125m \
    serialize \
    --serialized-directory s3://my-bucket \
@@ -48,7 +48,7 @@
 To deserialize a model, you can run something like this from the root 
 level of this repository:
 
-python examples/other/tensorize_vllm_model.py \
+python examples/others/tensorize_vllm_model.py \
    --model EleutherAI/gpt-j-6B \
    --dtype float16 \
    deserialize \
@@ -66,11 +66,11 @@
 model-rank-%03d.tensors
 
 For more information on the available arguments for serializing, run 
-`python -m examples.other.tensorize_vllm_model serialize --help`.
+`python -m examples.others.tensorize_vllm_model serialize --help`.
 
 Or for deserializing:
 
-`python examples/other/tensorize_vllm_model.py deserialize --help`.
+`python examples/others/tensorize_vllm_model.py deserialize --help`.
 
 Once a model is serialized, tensorizer can be invoked with the `LLM` class 
 directly to load models:
@@ -91,7 +91,7 @@
 In order to see all of the available arguments usable to configure 
 loading with tensorizer that are given to `TensorizerConfig`, run:
 
-`python examples/other/tensorize_vllm_model.py deserialize --help`
+`python examples/others/tensorize_vllm_model.py deserialize --help`
 
 under the `tensorizer options` section. These can also be used for
 deserialization in this example script, although `--tensorizer-uri` and
diff --git a/pyproject.toml b/pyproject.toml
index 2e4242f6d5c8..c642aa048586 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -62,11 +62,6 @@ ignore_patterns = [
 [tool.ruff]
 # Allow lines to be as long as 80.
 line-length = 80
-exclude = [
-    # External file, leaving license intact
-    "examples/other/fp8/quantizer/quantize.py",
-    "vllm/vllm_flash_attn/flash_attn_interface.pyi"
-]
 
 [tool.ruff.lint.per-file-ignores]
 "vllm/third_party/**" = ["ALL"]
diff --git a/requirements/common.txt b/requirements/common.txt
index dd0175dbbef2..625efc3366f4 100644
--- a/requirements/common.txt
+++ b/requirements/common.txt
@@ -41,7 +41,7 @@ compressed-tensors == 0.9.4 # required for compressed-tensors
 depyf==0.18.0 # required for profiling and debugging with compilation config
 cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
 watchfiles # required for http server to monitor the updates of TLS files
-python-json-logger # Used by logging as per examples/other/logging_configuration.md
+python-json-logger # Used by logging as per examples/others/logging_configuration.md
 scipy # Required for phi-4-multimodal-instruct
 ninja # Required for xgrammar, rocm, tpu, xpu
 opentelemetry-sdk>=1.26.0  # vllm.tracing
diff --git a/tests/lora/test_llama_tp.py b/tests/lora/test_llama_tp.py
index 37bbc3cfa7d0..580992dea53d 100644
--- a/tests/lora/test_llama_tp.py
+++ b/tests/lora/test_llama_tp.py
@@ -207,7 +207,7 @@ def test_tp2_serialize_and_deserialize_lora(tmp_path, sql_lora_files,
     try:
         result = subprocess.run([
             sys.executable,
-            f"{VLLM_PATH}/examples/other/tensorize_vllm_model.py", "--model",
+            f"{VLLM_PATH}/examples/others/tensorize_vllm_model.py", "--model",
             MODEL_PATH, "--lora-path", lora_path, "--tensor-parallel-size",
             str(tp_size), "serialize", "--serialized-directory",
             str(tmp_path), "--suffix", suffix
diff --git a/vllm/model_executor/model_loader/tensorizer.py b/vllm/model_executor/model_loader/tensorizer.py
index 6f9408d892c3..4c4502284a6a 100644
--- a/vllm/model_executor/model_loader/tensorizer.py
+++ b/vllm/model_executor/model_loader/tensorizer.py
@@ -251,7 +251,7 @@ class TensorizerArgs:
       encryption_keyfile: File path to a binary file containing a  
           binary key to use for decryption. `None` (the default) means 
           no decryption. See the example script in 
-          examples/other/tensorize_vllm_model.py. 
+          examples/others/tensorize_vllm_model.py. 
       s3_access_key_id: The access key for the S3 bucket. Can also be set via
           the S3_ACCESS_KEY_ID environment variable.
       s3_secret_access_key: The secret access key for the S3 bucket. Can also
@@ -469,7 +469,7 @@ def tensorizer_weights_iterator(
                    "loading on vLLM, as tensorizer is forced to load to CPU. "
                    "Consider deserializing a vLLM model instead for faster "
                    "load times. See the "
-                   "examples/other/tensorize_vllm_model.py example script "
+                   "examples/others/tensorize_vllm_model.py example script "
                    "for serializing vLLM models.")
 
     deserializer_args = tensorizer_args.deserializer_params
diff --git a/vllm/model_executor/model_loader/tensorizer_loader.py b/vllm/model_executor/model_loader/tensorizer_loader.py
index 26f8c0946b0a..2afe2b59e2f9 100644
--- a/vllm/model_executor/model_loader/tensorizer_loader.py
+++ b/vllm/model_executor/model_loader/tensorizer_loader.py
@@ -48,7 +48,7 @@ def _load_model_serialized_cpu(
         """Load a serialized model with tensorizer to the CPU.
 
         This is only necessary when the model isn't vLLM-tensorized (see
-        examples/other/tensorize_vllm_model.py) This should still
+        examples/others/tensorize_vllm_model.py) This should still
         be faster than default HuggingFace loading, but will be slower than
         loading a vLLM-tensorized model.
         """
@@ -68,7 +68,7 @@ def _load_model_serialized(
         """Load a serialized model with tensorizer.
 
         Expects a vLLM-tensorized model. See the
-        examples/other/tensorize_vllm_model.py example script
+        examples/others/tensorize_vllm_model.py example script
         for serializing vLLM models."""
 
         device_config = vllm_config.device_config