File size: 2,144 Bytes
66dbebd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
# faiss_manager.py
import faiss
import numpy as np
class FAISSLiteManager:
def __init__(self, db_path: str):
self.db_path = db_path
self.dimension = 384 # all-MiniLM-L6-v2 dimension
self.index = self._initialize_index()
def _initialize_index(self):
"""Initialize FAISS index with SQLite backend"""
try:
return faiss.read_index(f"{self.db_path}.faiss")
except:
# Create new index
index = faiss.IndexFlatIP(self.dimension)
faiss.write_index(index, f"{self.db_path}.faiss")
return index
async def store_embedding(self, session_id: str, text: str, embedding: list):
"""Store embedding with session context"""
# Convert to numpy array
vector = np.array([embedding], dtype=np.float32)
# Add to index
self.index.add(vector)
# Store metadata in SQLite
await self._store_metadata(session_id, text, len(self.index.ntotal) - 1)
async def search_similar(self, query_embedding: list, k: int = 5) -> list:
"""
Search for similar embeddings
"""
vector = np.array([query_embedding], dtype=np.float32)
distances, indices = self.index.search(vector, k)
# Retrieve metadata for results
results = await self._retrieve_metadata(indices[0])
return results
async def _store_metadata(self, session_id: str, text: str, index_position: int):
"""
Store metadata in SQLite database
"""
# TODO: Implement SQLite storage
pass
async def _retrieve_metadata(self, indices: list) -> list:
"""
Retrieve metadata for given indices
"""
# TODO: Implement SQLite retrieval
return []
def save_index(self):
"""
Save the FAISS index to disk
"""
faiss.write_index(self.index, f"{self.db_path}.faiss")
def get_index_size(self) -> int:
"""
Get the number of vectors in the index
"""
return self.index.ntotal
|