79 lines
3.6 KiB
Python
79 lines
3.6 KiB
Python
|
from llama_index.core import VectorStoreIndex, StorageContext
|
||
|
from llama_index.vector_stores.qdrant import QdrantVectorStore
|
||
|
from qdrant_client import QdrantClient
|
||
|
from qdrant_client.http.models import Distance, VectorParams
|
||
|
import logging
|
||
|
|
||
|
logger = logging.getLogger(__name__)
|
||
|
|
||
|
def ensure_collection_exists(qdrant_client: QdrantClient, collection_name: str, vector_size: int) -> None:
|
||
|
"""Verify that the Qdrant collection exists, and create it if it does not."""
|
||
|
try:
|
||
|
if not qdrant_client.collection_exists(collection_name):
|
||
|
qdrant_client.create_collection(
|
||
|
collection_name=collection_name,
|
||
|
vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE)
|
||
|
)
|
||
|
logger.info(f"Created Qdrant collection '{collection_name}' with vector size {vector_size}")
|
||
|
else:
|
||
|
logger.info(f"Qdrant collection '{collection_name}' already exists")
|
||
|
except Exception as e:
|
||
|
logger.error(f"Failed to ensure Qdrant collection exists: {str(e)}")
|
||
|
raise
|
||
|
|
||
|
def retrieve_documents(qdrant_client: QdrantClient, collection_name: str, embed_model, current_message: str) -> str:
|
||
|
"""Get the relevant documents from Qdrant based on the current message."""
|
||
|
logger.info("Initializing Qdrant vector store")
|
||
|
vector_store = QdrantVectorStore(
|
||
|
client=qdrant_client,
|
||
|
collection_name=collection_name,
|
||
|
embed_model=embed_model
|
||
|
)
|
||
|
logger.info("Building vector store index")
|
||
|
index = VectorStoreIndex.from_vector_store(
|
||
|
vector_store=vector_store,
|
||
|
embed_model=embed_model
|
||
|
)
|
||
|
logger.info("Retrieving documents")
|
||
|
retriever = index.as_retriever()
|
||
|
retrieved_nodes = retriever.retrieve(current_message)
|
||
|
retrieved_docs = "\n\n".join([node.text for node in retrieved_nodes])
|
||
|
logger.debug(f"Retrieved documents (first 200 chars): {retrieved_docs[:200]}...")
|
||
|
return retrieved_docs
|
||
|
|
||
|
def index_documents(qdrant_client: QdrantClient, collection_name: str, embed_model, documents) -> None:
|
||
|
"""Index the provided documents into the Qdrant collection."""
|
||
|
vector_store = QdrantVectorStore(
|
||
|
client=qdrant_client,
|
||
|
collection_name=collection_name,
|
||
|
embed_model=embed_model
|
||
|
)
|
||
|
logger.info(f"Indexing documents into Qdrant collection '{collection_name}'")
|
||
|
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
||
|
VectorStoreIndex.from_documents(
|
||
|
documents,
|
||
|
storage_context=storage_context,
|
||
|
embed_model=embed_model
|
||
|
)
|
||
|
logger.info("Successfully indexed documents")
|
||
|
|
||
|
def delete_all_documents(qdrant_client: QdrantClient, collection_name: str, vector_size: int) -> None:
|
||
|
"""Delete all vectors from the Qdrant collection by recreating it."""
|
||
|
try:
|
||
|
# Check if collection exists
|
||
|
if qdrant_client.collection_exists(collection_name):
|
||
|
# Delete the collection
|
||
|
qdrant_client.delete_collection(collection_name=collection_name)
|
||
|
logger.info(f"Deleted Qdrant collection '{collection_name}'")
|
||
|
|
||
|
# Recreate the empty collection with the same parameters
|
||
|
qdrant_client.create_collection(
|
||
|
collection_name=collection_name,
|
||
|
vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE)
|
||
|
)
|
||
|
logger.info(f"Recreated empty Qdrant collection '{collection_name}'")
|
||
|
else:
|
||
|
logger.warning(f"Qdrant collection '{collection_name}' does not exist, nothing to delete")
|
||
|
except Exception as e:
|
||
|
logger.error(f"Failed to delete Qdrant collection: {str(e)}")
|
||
|
raise
|