Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,6 +3,7 @@ import copy
|
|
| 3 |
from llama_cpp import Llama
|
| 4 |
from huggingface_hub import hf_hub_download
|
| 5 |
import chromadb
|
|
|
|
| 6 |
from sentence_transformers import SentenceTransformer
|
| 7 |
|
| 8 |
# Initialize the Llama model
|
|
@@ -26,24 +27,26 @@ class VectorStore:
|
|
| 26 |
self.chroma_client = chromadb.Client()
|
| 27 |
self.collection = self.chroma_client.create_collection(name=collection_name)
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
| 33 |
|
| 34 |
# Method to populate the vector store with embeddings from a dataset
|
| 35 |
-
def populate_vectors(self, dataset):
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
|
| 48 |
def search_context(self, query, n_results=1):
|
| 49 |
query_embedding = self.embedding_model.encode([query]).tolist()
|
|
@@ -51,7 +54,9 @@ class VectorStore:
|
|
| 51 |
return results['documents']
|
| 52 |
|
| 53 |
# Example initialization (assuming you've already populated the vector store)
|
|
|
|
| 54 |
vector_store = VectorStore("embedding_vector")
|
|
|
|
| 55 |
|
| 56 |
def generate_text(
|
| 57 |
message,
|
|
|
|
| 3 |
from llama_cpp import Llama
|
| 4 |
from huggingface_hub import hf_hub_download
|
| 5 |
import chromadb
|
| 6 |
+
from datasets import load_dataset
|
| 7 |
from sentence_transformers import SentenceTransformer
|
| 8 |
|
| 9 |
# Initialize the Llama model
|
|
|
|
| 27 |
self.chroma_client = chromadb.Client()
|
| 28 |
self.collection = self.chroma_client.create_collection(name=collection_name)
|
| 29 |
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def populate_vectors(self, texts, ids):
|
| 33 |
+
embeddings = self.embedding_model.encode(texts, batch_size=32).tolist()
|
| 34 |
+
for text, embedding, doc_id in zip(texts, embeddings, ids):
|
| 35 |
+
self.collection.add(embeddings=[embedding], documents=[text], ids=[doc_id])
|
| 36 |
|
| 37 |
# Method to populate the vector store with embeddings from a dataset
|
| 38 |
+
# def populate_vectors(self, dataset):
|
| 39 |
+
# # Select the text columns to concatenate
|
| 40 |
+
# # title = dataset['train']['title_cleaned'][:1000] # Limiting to 100 examples for the demo
|
| 41 |
+
# recipe = dataset['train']['recipe_new'][:1000]
|
| 42 |
+
# allergy = dataset['train']['allergy_type'][:1000]
|
| 43 |
+
# ingredients = dataset['train']['ingredients_alternatives'][:1000]
|
| 44 |
|
| 45 |
+
# # Concatenate the text from both columns
|
| 46 |
+
# texts = [f"{rep} {ingr} {alle}" for rep, ingr,alle in zip(recipe, ingredients,allergy)]
|
| 47 |
+
# for i, item in enumerate(texts):
|
| 48 |
+
# embeddings = self.embedding_model.encode(item).tolist()
|
| 49 |
+
# self.collection.add(embeddings=[embeddings], documents=[item], ids=[str(i)])
|
| 50 |
|
| 51 |
def search_context(self, query, n_results=1):
|
| 52 |
query_embedding = self.embedding_model.encode([query]).tolist()
|
|
|
|
| 54 |
return results['documents']
|
| 55 |
|
| 56 |
# Example initialization (assuming you've already populated the vector store)
|
| 57 |
+
dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full')
|
| 58 |
vector_store = VectorStore("embedding_vector")
|
| 59 |
+
vector_store.populate_vectors(dataset)
|
| 60 |
|
| 61 |
def generate_text(
|
| 62 |
message,
|