Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 8 additions & 15 deletions examples/rag/rag.pdl
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,14 @@ description: Retrieval-augmented generation for NL-to-Code generation task.
text:
- lang: python
code: | # initialize PDL_SESSION.vec_db and PDL_SESSION.embed() function
import datasets, numpy, os, requests
genai_key, genai_api = os.environ["WATSONX_KEY"], os.environ["WATSONX_API"]
def embed(text):
endpoint = f"{genai_api}/v1/text/embeddings?version=2024-05-02"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {genai_key}",
}
json_data = {
"model_id": "sentence-transformers/all-minilm-l6-v2",
"input": text,
}
response = requests.post(endpoint, headers=headers, json=json_data)
return numpy.asarray(response.json()["results"][0])
import datasets, sklearn.feature_extraction.text
train_in = datasets.load_dataset("mbpp", "sanitized", split="train")
corpus = [row["prompt"] for row in train_in]
tfidf = sklearn.feature_extraction.text.TfidfVectorizer().fit(corpus)
def embed(text):
singleton_batch = [text]
sparse_result = tfidf.transform(raw_documents=singleton_batch)
return sparse_result.toarray().flatten()
train_em = train_in.map(lambda row: {"embeddings": embed(row["prompt"])})
PDL_SESSION.vec_db = train_em.add_faiss_index("embeddings")
PDL_SESSION.embed = embed
Expand Down Expand Up @@ -50,6 +43,6 @@ text:

Q: ${ TEST_PROMPT }
A: ```
- model: watsonx/ibm/granite-20b-code-instruct-v2
- model: watsonx/ibm/granite-34b-code-instruct
parameters:
stop: ["```"]