from llama_index.core import VectorStoreIndex, StorageContext from llama_index.vector_stores.qdrant import QdrantVectorStore from qdrant_client import QdrantClient from qdrant_client.http.models import Distance, VectorParams import logging logger = logging.getLogger(__name__) def ensure_collection_exists(qdrant_client: QdrantClient, collection_name: str, vector_size: int) -> None: """Verify that the Qdrant collection exists, and create it if it does not.""" try: if not qdrant_client.collection_exists(collection_name): qdrant_client.create_collection( collection_name=collection_name, vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE) ) logger.info(f"Created Qdrant collection '{collection_name}' with vector size {vector_size}") else: logger.info(f"Qdrant collection '{collection_name}' already exists") except Exception as e: logger.error(f"Failed to ensure Qdrant collection exists: {str(e)}") raise def retrieve_documents(qdrant_client: QdrantClient, collection_name: str, embed_model, current_message: str) -> str: """Get the relevant documents from Qdrant based on the current message.""" logger.info("Initializing Qdrant vector store") vector_store = QdrantVectorStore( client=qdrant_client, collection_name=collection_name, embed_model=embed_model ) logger.info("Building vector store index") index = VectorStoreIndex.from_vector_store( vector_store=vector_store, embed_model=embed_model ) logger.info("Retrieving documents") retriever = index.as_retriever() retrieved_nodes = retriever.retrieve(current_message) retrieved_docs = "\n\n".join([node.text for node in retrieved_nodes]) logger.debug(f"Retrieved documents (first 200 chars): {retrieved_docs[:200]}...") return retrieved_docs def index_documents(qdrant_client: QdrantClient, collection_name: str, embed_model, documents) -> None: """Index the provided documents into the Qdrant collection.""" vector_store = QdrantVectorStore( client=qdrant_client, collection_name=collection_name, embed_model=embed_model ) logger.info(f"Indexing documents into Qdrant collection '{collection_name}'") storage_context = StorageContext.from_defaults(vector_store=vector_store) VectorStoreIndex.from_documents( documents, storage_context=storage_context, embed_model=embed_model ) logger.info("Successfully indexed documents") def delete_all_documents(qdrant_client: QdrantClient, collection_name: str, vector_size: int) -> None: """Delete all vectors from the Qdrant collection by recreating it.""" try: # Check if collection exists if qdrant_client.collection_exists(collection_name): # Delete the collection qdrant_client.delete_collection(collection_name=collection_name) logger.info(f"Deleted Qdrant collection '{collection_name}'") # Recreate the empty collection with the same parameters qdrant_client.create_collection( collection_name=collection_name, vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE) ) logger.info(f"Recreated empty Qdrant collection '{collection_name}'") else: logger.warning(f"Qdrant collection '{collection_name}' does not exist, nothing to delete") except Exception as e: logger.error(f"Failed to delete Qdrant collection: {str(e)}") raise