medichaid/docker_svc/agent/app/libs/qdrant_helper.py

79 lines
3.6 KiB
Python
Raw Normal View History

2025-07-10 01:43:01 +02:00
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
import logging
logger = logging.getLogger(__name__)
def ensure_collection_exists(qdrant_client: QdrantClient, collection_name: str, vector_size: int) -> None:
"""Verify that the Qdrant collection exists, and create it if it does not."""
try:
if not qdrant_client.collection_exists(collection_name):
qdrant_client.create_collection(
collection_name=collection_name,
vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE)
)
logger.info(f"Created Qdrant collection '{collection_name}' with vector size {vector_size}")
else:
logger.info(f"Qdrant collection '{collection_name}' already exists")
except Exception as e:
logger.error(f"Failed to ensure Qdrant collection exists: {str(e)}")
raise
def retrieve_documents(qdrant_client: QdrantClient, collection_name: str, embed_model, current_message: str) -> str:
"""Get the relevant documents from Qdrant based on the current message."""
logger.info("Initializing Qdrant vector store")
vector_store = QdrantVectorStore(
client=qdrant_client,
collection_name=collection_name,
embed_model=embed_model
)
logger.info("Building vector store index")
index = VectorStoreIndex.from_vector_store(
vector_store=vector_store,
embed_model=embed_model
)
logger.info("Retrieving documents")
retriever = index.as_retriever()
retrieved_nodes = retriever.retrieve(current_message)
retrieved_docs = "\n\n".join([node.text for node in retrieved_nodes])
logger.debug(f"Retrieved documents (first 200 chars): {retrieved_docs[:200]}...")
return retrieved_docs
def index_documents(qdrant_client: QdrantClient, collection_name: str, embed_model, documents) -> None:
"""Index the provided documents into the Qdrant collection."""
vector_store = QdrantVectorStore(
client=qdrant_client,
collection_name=collection_name,
embed_model=embed_model
)
logger.info(f"Indexing documents into Qdrant collection '{collection_name}'")
storage_context = StorageContext.from_defaults(vector_store=vector_store)
VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
embed_model=embed_model
)
logger.info("Successfully indexed documents")
def delete_all_documents(qdrant_client: QdrantClient, collection_name: str, vector_size: int) -> None:
"""Delete all vectors from the Qdrant collection by recreating it."""
try:
# Check if collection exists
if qdrant_client.collection_exists(collection_name):
# Delete the collection
qdrant_client.delete_collection(collection_name=collection_name)
logger.info(f"Deleted Qdrant collection '{collection_name}'")
# Recreate the empty collection with the same parameters
qdrant_client.create_collection(
collection_name=collection_name,
vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE)
)
logger.info(f"Recreated empty Qdrant collection '{collection_name}'")
else:
logger.warning(f"Qdrant collection '{collection_name}' does not exist, nothing to delete")
except Exception as e:
logger.error(f"Failed to delete Qdrant collection: {str(e)}")
raise