Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions lib/crewai-tools/src/crewai_tools/adapters/lancedb_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,22 @@


def _default_embedding_function():
"""Create a default embedding function using OpenAI's text-embedding-ada-002 model.

This function creates and returns an embedding function that uses OpenAI's API
to generate embeddings for text inputs. The embedding function is used by the
LanceDBAdapter to convert text queries into vector representations for similarity search.

Returns:
Callable: A function that takes a list of strings and returns their embeddings
as a list of vectors.

Example:
>>> embed_fn = _default_embedding_function()
>>> embeddings = embed_fn(["Hello world"])
>>> len(embeddings[0]) # Vector dimension
1536
"""
client = OpenAIClient()

def _embedding_function(input):
Expand All @@ -24,6 +40,32 @@ def _embedding_function(input):


class LanceDBAdapter(Adapter):
"""Adapter for integrating LanceDB vector database with CrewAI RAG tools.

LanceDBAdapter provides a bridge between CrewAI's RAG (Retrieval-Augmented Generation)
system and LanceDB, enabling efficient vector similarity search for knowledge retrieval.
It handles embedding generation, vector search, and data ingestion with precise control
over query parameters and column mappings.

Attributes:
uri: Database connection URI or path to the LanceDB database.
table_name: Name of the table to query within the LanceDB database.
embedding_function: Function to convert text into embeddings. Defaults to OpenAI's
text-embedding-ada-002 model.
top_k: Number of top results to return from similarity search. Defaults to 3.
vector_column_name: Name of the column containing vector embeddings. Defaults to "vector".
text_column_name: Name of the column containing text content. Defaults to "text".

Example:
>>> from crewai_tools.adapters.lancedb_adapter import LanceDBAdapter
>>> adapter = LanceDBAdapter(
... uri="./my_lancedb",
... table_name="documents",
... top_k=5
... )
>>> results = adapter.query("What is machine learning?")
>>> print(results)
"""
uri: str | Path
table_name: str
embedding_function: Callable = Field(default_factory=_default_embedding_function)
Expand All @@ -35,12 +77,44 @@ class LanceDBAdapter(Adapter):
_table: LanceDBTable = PrivateAttr()

def model_post_init(self, __context: Any) -> None:
"""Initialize the database connection and table after model instantiation.

This method is automatically called after the Pydantic model is initialized.
It establishes the connection to the LanceDB database and opens the specified
table for querying and data operations.

Args:
__context: Pydantic context object passed during initialization.

Raises:
Exception: If the database connection fails or the table does not exist.
"""
self._db = lancedb_connect(self.uri)
self._table = self._db.open_table(self.table_name)

super().model_post_init(__context)

def query(self, question: str) -> str: # type: ignore[override]
"""Perform a vector similarity search for the given question.

This method converts the input question into an embedding vector and searches
the LanceDB table for the most similar entries. It returns the top-k results
based on vector similarity, providing precise retrieval for RAG applications.

Args:
question: The text query to search for in the vector database.

Returns:
A string containing the concatenated text results from the top-k most
similar entries, separated by newlines.

Example:
>>> adapter = LanceDBAdapter(uri="./db", table_name="docs")
>>> results = adapter.query("What is CrewAI?")
>>> print(results)
CrewAI is a framework for orchestrating AI agents...
CrewAI provides precise control over agent workflows...
"""
query = self.embedding_function([question])[0]
results = (
self._table.search(query, vector_column_name=self.vector_column_name)
Expand All @@ -56,4 +130,23 @@ def add(
*args: Any,
**kwargs: Any,
) -> None:
"""Add data to the LanceDB table.

This method provides a direct interface to add new records to the underlying
LanceDB table. It accepts the same arguments as the LanceDB table's add method,
allowing flexible data ingestion for building knowledge bases.

Args:
*args: Positional arguments to pass to the LanceDB table's add method.
**kwargs: Keyword arguments to pass to the LanceDB table's add method.
Common kwargs include 'data' (list of records) and 'mode' (append/overwrite).

Example:
>>> adapter = LanceDBAdapter(uri="./db", table_name="docs")
>>> data = [
... {"text": "CrewAI enables agent collaboration", "vector": [0.1, 0.2, ...]},
... {"text": "LanceDB provides vector storage", "vector": [0.3, 0.4, ...]}
... ]
>>> adapter.add(data=data)
"""
self._table.add(*args, **kwargs)
62 changes: 62 additions & 0 deletions lib/crewai-tools/tests/adapters/test_lancedb_adapter_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""Test that LanceDB adapter has proper docstrings."""

import inspect

import pytest

lancedb = pytest.importorskip("lancedb")

from crewai_tools.adapters.lancedb_adapter import (
LanceDBAdapter,
_default_embedding_function,
)


def test_lancedb_adapter_class_has_docstring():
"""Verify that LanceDBAdapter class has a docstring."""
assert LanceDBAdapter.__doc__ is not None, "LanceDBAdapter class is missing a docstring"
assert len(LanceDBAdapter.__doc__.strip()) > 0, "LanceDBAdapter docstring is empty"


def test_lancedb_adapter_model_post_init_has_docstring():
"""Verify that model_post_init method has a docstring."""
assert (
LanceDBAdapter.model_post_init.__doc__ is not None
), "model_post_init method is missing a docstring"
assert (
len(LanceDBAdapter.model_post_init.__doc__.strip()) > 0
), "model_post_init docstring is empty"


def test_lancedb_adapter_query_has_docstring():
"""Verify that query method has a docstring."""
assert LanceDBAdapter.query.__doc__ is not None, "query method is missing a docstring"
assert len(LanceDBAdapter.query.__doc__.strip()) > 0, "query docstring is empty"


def test_lancedb_adapter_add_has_docstring():
"""Verify that add method has a docstring."""
assert LanceDBAdapter.add.__doc__ is not None, "add method is missing a docstring"
assert len(LanceDBAdapter.add.__doc__.strip()) > 0, "add docstring is empty"


def test_default_embedding_function_has_docstring():
"""Verify that _default_embedding_function has a docstring."""
assert (
_default_embedding_function.__doc__ is not None
), "_default_embedding_function is missing a docstring"
assert (
len(_default_embedding_function.__doc__.strip()) > 0
), "_default_embedding_function docstring is empty"


def test_docstrings_contain_required_sections():
"""Verify that docstrings contain Args, Returns, or Example sections where appropriate."""
query_doc = LanceDBAdapter.query.__doc__
assert query_doc is not None
assert "Args:" in query_doc or "Parameters:" in query_doc, "query docstring should have Args/Parameters section"
assert "Returns:" in query_doc, "query docstring should have Returns section"

add_doc = LanceDBAdapter.add.__doc__
assert add_doc is not None
assert "Args:" in add_doc or "Parameters:" in add_doc, "add docstring should have Args/Parameters section"
Loading