|
|
|
|
|
import faiss |
|
|
import numpy as np |
|
|
|
|
|
class FAISSLiteManager: |
|
|
def __init__(self, db_path: str): |
|
|
self.db_path = db_path |
|
|
self.dimension = 384 |
|
|
self.index = self._initialize_index() |
|
|
|
|
|
def _initialize_index(self): |
|
|
"""Initialize FAISS index with SQLite backend""" |
|
|
try: |
|
|
return faiss.read_index(f"{self.db_path}.faiss") |
|
|
except: |
|
|
|
|
|
index = faiss.IndexFlatIP(self.dimension) |
|
|
faiss.write_index(index, f"{self.db_path}.faiss") |
|
|
return index |
|
|
|
|
|
async def store_embedding(self, session_id: str, text: str, embedding: list): |
|
|
"""Store embedding with session context""" |
|
|
|
|
|
vector = np.array([embedding], dtype=np.float32) |
|
|
|
|
|
|
|
|
self.index.add(vector) |
|
|
|
|
|
|
|
|
await self._store_metadata(session_id, text, len(self.index.ntotal) - 1) |
|
|
|
|
|
async def search_similar(self, query_embedding: list, k: int = 5) -> list: |
|
|
""" |
|
|
Search for similar embeddings |
|
|
""" |
|
|
vector = np.array([query_embedding], dtype=np.float32) |
|
|
distances, indices = self.index.search(vector, k) |
|
|
|
|
|
|
|
|
results = await self._retrieve_metadata(indices[0]) |
|
|
return results |
|
|
|
|
|
async def _store_metadata(self, session_id: str, text: str, index_position: int): |
|
|
""" |
|
|
Store metadata in SQLite database |
|
|
""" |
|
|
|
|
|
pass |
|
|
|
|
|
async def _retrieve_metadata(self, indices: list) -> list: |
|
|
""" |
|
|
Retrieve metadata for given indices |
|
|
""" |
|
|
|
|
|
return [] |
|
|
|
|
|
def save_index(self): |
|
|
""" |
|
|
Save the FAISS index to disk |
|
|
""" |
|
|
faiss.write_index(self.index, f"{self.db_path}.faiss") |
|
|
|
|
|
def get_index_size(self) -> int: |
|
|
""" |
|
|
Get the number of vectors in the index |
|
|
""" |
|
|
return self.index.ntotal |
|
|
|
|
|
|