diff --git a/README.md b/README.md index 7af985b..27a0777 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ export LANGCHAIN_API_KEY=ls-... ## Repo Structure -The package is located within [langchain_benchmarks](./langchain_benchmarks/). Check out the [docs](https://langchain-ai.github.io/langchain-benchmarks/index.html) for information on how to get starte. +The package is located within [langchain_benchmarks](./langchain_benchmarks/). Check out the [docs](https://langchain-ai.github.io/langchain-benchmarks/index.html) for information on how to get started. The other directories are legacy and may be moved in the future. diff --git a/docs/source/notebooks/datasets.ipynb b/docs/source/notebooks/datasets.ipynb index 2c84349..978da80 100644 --- a/docs/source/notebooks/datasets.ipynb +++ b/docs/source/notebooks/datasets.ipynb @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "58b94f6d-0c91-4361-9b22-f758ffaa150a", "metadata": { "tags": [] @@ -79,7 +79,7 @@ ], "source": [ "download_public_dataset(\n", - " \"https://smith.langchain.com/public/452ccafc-18e1-4314-885b-edd735f17b9d/examples\"\n", + " \"https://smith.langchain.com/public/59577193-8938-4ccf-92a7-e8a96bcf4f86/examples\"\n", ")" ] }, @@ -93,7 +93,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "664e90fc-af84-4c5f-a3dd-5d9ffe649650", "metadata": { "tags": [] @@ -103,56 +103,73 @@ "name": "stdout", "output_type": "stream", "text": [ - "[\n", - " {\n", - " \"created_at\": \"2023-11-15T15:26:53.511629\",\n", - " \"dataset_id\": \"9f73165c-d333-4d14-8f59-bd7eede5db08\",\n", - " \"id\": \"0703a989-2693-4039-a1f6-7281fc1b4cb0\",\n", - " \"inputs\": {\n", - " \"question\": \"do bob and alice live in the same city?\"\n", - " },\n", - " \"modified_at\": \"2023-11-15T15:26:53.511629\",\n", - " \"outputs\": {\n", - " \"expected_steps\": [\n", - " \"find_users_by_name\",\n", - " \"get_user_location\",\n", - " \"get_city_for_location\",\n", - " \"get_user_location\",\n", - " \"get_city_for_location\"\n", - " ],\n", - " \"order_matters\": false,\n", - " \"reference\": \"no\"\n", - " },\n", - " \"runs\": []\n", - " },\n", - " {\n", - " \"created_at\": \"2023-11-15T15:26:53.491359\",\n", - " \"dataset_id\": \"9f73165c-d333-4d14-8f59-bd7eede5db08\",\n", - " \"id\": \"b258b95a-9524-4da7-b758-c5481109322d\",\n", - " \"inputs\": {\n", - " \"question\": \"Is it likely that Donna is outside with an umbrella at this time?\"\n", - " },\n", - " \"modified_at\": \"2023-11-15T15:26:53.491359\",\n", - " \"outputs\": {\n", - " \"expected_steps\": [\n", - " \"find_users_by_name\",\n", - " \"get_user_location\",\n", - " \"get_current_time_for_location\",\n", - " \"get_current_weather_for_location\"\n", - " ],\n", - " \"order_matters\": false,\n", - " \"reference\": \"yes\"\n", - " },\n", - " \"runs\": []\n", - " }\n", - "]\n" - ] + "[\n", + " {\n", + " \"created_at\": \"2023-11-21T19:34:17.103178+00:00\",\n", + " \"dataset_id\": \"82ca6840-cf23-4bb0-a9be-55237ebbe9d3\",\n", + " \"id\": \"c17e9d5a-b9f8-43dc-b5a9-6e45d21c9a2a\",\n", + " \"inputs\": {\n", + " \"question\": \"communication\"\n", + " },\n", + " \"metadata\": null,\n", + " \"modified_at\": \"2023-11-21T19:34:17.103178+00:00\",\n", + " \"outputs\": {\n", + " \"expected_steps\": [\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\"\n", + " ],\n", + " \"order_matters\": false,\n", + " \"reference\": \"communication\"\n", + " },\n", + " \"runs\": []\n", + " },\n", + " {\n", + " \"created_at\": \"2023-11-21T19:34:17.007329+00:00\",\n", + " \"dataset_id\": \"82ca6840-cf23-4bb0-a9be-55237ebbe9d3\",\n", + " \"id\": \"57e29316-e258-4ed9-bbeb-b23c8bcb4bd2\",\n", + " \"inputs\": {\n", + " \"question\": \"information\"\n", + " },\n", + " \"metadata\": null,\n", + " \"modified_at\": \"2023-11-21T19:34:17.007329+00:00\",\n", + " \"outputs\": {\n", + " \"expected_steps\": [\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\",\n", + " \"type_letter\"\n", + " ],\n", + " \"order_matters\": false,\n", + " \"reference\": \"information\"\n", + " },\n", + " \"runs\": []\n", + " }\n", + "]\n" + ] } ], "source": [ "import json\n", "\n", - "with open(\"./e95d45da-aaa3-44b3-ba2b-7c15ff6e46f5.json\", \"r\", encoding=\"utf-8\") as f:\n", + "with open(\"./59577193-8938-4ccf-92a7-e8a96bcf4f86.json\", \"r\", encoding=\"utf-8\") as f:\n", " print(json.dumps(json.load(f)[:2], indent=2, sort_keys=True))" ] }, diff --git a/docs/source/notebooks/tool_usage/intro.ipynb b/docs/source/notebooks/tool_usage/intro.ipynb index b228b8b..5b0202f 100644 --- a/docs/source/notebooks/tool_usage/intro.ipynb +++ b/docs/source/notebooks/tool_usage/intro.ipynb @@ -74,7 +74,7 @@ "{\n", " \"output\": \"It's super sunny. Like 75F\", // the output from the agent\n", " \"intermediate_steps\": [... \"find_locations_by_name\" ...], // list of the intermediate steps taken by the agent (see format in LangChain)\n", - " \"state\": .., // Can be anything, this is the state fo the environment after the agent has taken all of its actions (optional key)\n", + " \"state\": .., // Can be anything, this is the state of the environment after the agent has taken all of its actions (optional key)\n", "}\n", "```" ] @@ -222,6 +222,10 @@ "\n", "---------\n", "```python\n", + "import dataclasses\n", + "from typing import Any, Callable, List, Optional\n", + "\n", + "from langchain.tools import BaseTool\n", "\n", "@dataclasses.dataclass(frozen=True)\n", "class ToolUsageEnvironment:\n", diff --git a/langchain_benchmarks/tool_usage/tasks/type_writer.py b/langchain_benchmarks/tool_usage/tasks/type_writer.py index 352fc9f..0438242 100644 --- a/langchain_benchmarks/tool_usage/tasks/type_writer.py +++ b/langchain_benchmarks/tool_usage/tasks/type_writer.py @@ -75,7 +75,7 @@ def _read_state() -> Any: The objective of this task is to evaluate the ability of the model to use the provided \ tools to repeat a given input string. -For example, if the string is 'abc', the tools 'a', 'b', and 'c' must be invoked \ +For example, if the string is 'abc', the tool with argument 'a', 'b', and 'c' must be invoked \ in that order. The dataset includes examples of varying difficulty. The difficulty is measured \