first commit

This commit is contained in:
Stefano Rossi 2025-07-10 01:43:01 +02:00
parent 5c5d88c92f
commit eb4f62c56d
Signed by: chadmin
GPG key ID: 9EFA2130646BC893
41 changed files with 3851 additions and 19 deletions

View file

@ -0,0 +1,15 @@
FROM python:3.11-alpine
WORKDIR /app
COPY requirements.txt .
RUN apk add --no-cache build-base \
&& pip install --no-cache-dir -r requirements.txt \
&& apk del build-base
COPY app .
EXPOSE 8000
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

View file

@ -0,0 +1,36 @@
def is_medical_query(message: str) -> bool:
"""
Check if the user message contains medical keywords. This function is case-insensitive.
:param message: The user message or any string to check.
:return: True if the message contains medical keywords, False otherwise.
"""
medical_keywords = [
"health",
"doctor",
"medicine",
"disease",
"symptom",
"treatment",
"salute",
"medico",
"malattia",
"sintomo",
"cura",
"sanità",
"santé",
"médecin",
"médicament",
"maladie",
"symptôme",
"traitement",
"gesundheit",
"arzt",
"medizin",
"krankheit",
"symptom",
"behandlung",
]
message_lower = message.lower()
return any(keyword in message_lower for keyword in medical_keywords)

View file

@ -0,0 +1,43 @@
import os
from mysql.connector import connect, Error
import logging
# Configure logging
logger = logging.getLogger(__name__)
def log_prompt_to_db(userid: str | None, ip: str, prompt: str, answer: str):
"""
Logs the user's prompt and the corresponding response to the database.
Args:
userid (str | None): User ID (optional, can be None).
ip (str): Client's IP address.
prompt (str): Full conversation history provided by the user.
answer (str): Response generated by the AI.
"""
try:
# Connect to the database using environment variables
connection = connect(
host=os.getenv("DB_HOST"),
port=int(os.getenv("DB_PORT", "3306")),
user=os.getenv("DB_USER"),
password=os.getenv("DB_PASSWORD"),
database=os.getenv("DB_NAME")
)
cursor = connection.cursor()
# SQL query to insert data
query = """
INSERT INTO user_prompts (userid, ip, prompt, answer)
VALUES (%s, %s, %s, %s)
"""
values = (userid, ip, prompt, answer)
cursor.execute(query, values)
# Commit the transaction and close resources
connection.commit()
cursor.close()
connection.close()
except Error as e:
logger.error(f"Error logging prompt to database: {e}")

View file

@ -0,0 +1,36 @@
# libs/manage_languages.py
from langdetect import detect
from fastapi import HTTPException
import logging
logger = logging.getLogger(__name__)
def validate_language(language: str) -> None:
"""Validate the language parameter. Throws an HTTPException if the language is invalid."""
valid_languages = {"french", "italian", "english", "german", "auto"}
if language not in valid_languages:
raise HTTPException(
status_code=400,
detail="Invalid language. Must be one of: french, italian, english, german, or auto"
)
def detect_language(current_message: str) -> str:
"""Detect the language of the current message. Defaults to French if detection fails."""
try:
detected_lang = detect(current_message)
if detected_lang == "fr":
language = "french"
elif detected_lang == "it":
language = "italian"
elif detected_lang == "en":
language = "english"
elif detected_lang == "de":
language = "german"
else:
language = "french"
logger.info(f"Detected language: {language}")
return language
except Exception as e:
logger.error(f"Language detection failed: {str(e)}")
return "french"

View file

@ -0,0 +1,14 @@
from typing import List, Optional, Literal
from pydantic import BaseModel, Field
class ChatMessage(BaseModel):
role: Literal["user", "coach"]
content: str
class ChatRequest(BaseModel):
messages: List[ChatMessage]
language: str = "auto"
temperature: float = 0.7
reasoning: bool = False
stream: bool = True
personality: str = "supportive"

View file

@ -0,0 +1,192 @@
from llama_index.core.base.llms.types import ChatMessage as LlamaChatMessage
import logging
from libs.models import ChatMessage
from typing import List, Dict, Any, Optional, AsyncGenerator
import httpx
import json
import os
import asyncio
logger = logging.getLogger(__name__)
def format_system_prompt(system_prompt_template: str, language_prompts: dict, language: str,
retrieved_docs: str, is_medical: bool, personality: str = "supportive",
personality_prompts: dict = {}) -> str:
"""Formatta il prompt di sistema con il contenuto specifico della lingua, personalità e i documenti recuperati."""
language_prompt = language_prompts[language]["prompt"]
language_disclaimer = language_prompts[language]["disclaimer"]
language_constraint = "" if language == "auto" else language_prompts[language]["constraint"]
# Miglioro il log e la gestione della personalità
if personality not in personality_prompts:
logger.warning(f"Personality '{personality}' not found in prompts, using default empty prompt")
personality_prompt = ""
else:
personality_prompt = personality_prompts[personality]["prompt"]
logger.info(f"Using '{personality}' personality: {personality_prompts[personality]['description'][:50]}...")
logger.info(f"Formatting system prompt with language {language}, personality {personality}")
system_message_content = system_prompt_template.format(
language_prompt=language_prompt,
context=retrieved_docs,
language_disclaimer=language_disclaimer if is_medical else "",
personality_prompt=personality_prompt,
language_constraint=language_constraint
)
logger.debug(f"System message content: {system_message_content[:200]}...")
return system_message_content
async def perform_inference_streaming(
llm,
system_message: str,
history: List[Dict],
current_message: str
) -> AsyncGenerator[str, None]:
"""Stream inference results from Ollama API"""
base_url = os.getenv("OLLAMA_BASE_URL", "http://ollama:11434")
# Prepare messages for Ollama API
messages = []
# Add system message
messages.append({
"role": "system",
"content": system_message
})
# Add history
for msg in history:
messages.append({
"role": "user" if msg.role == "user" else "assistant",
"content": msg.content
})
# Add current user message
messages.append({
"role": "user",
"content": current_message
})
# Prepare request payload
payload = {
"model": llm.model,
"messages": messages,
"stream": True,
"options": {
"temperature": llm.temperature
}
}
logger.debug(f"Sending streaming request to Ollama API: {base_url}/api/chat")
try:
async with httpx.AsyncClient() as client:
async with client.stream("POST", f"{base_url}/api/chat", json=payload, timeout=60.0) as response:
if response.status_code != 200:
error_detail = await response.aread()
logger.error(f"Error from Ollama API: {response.status_code}, {error_detail}")
yield f"Error: Failed to get response from language model (Status {response.status_code})"
return
# Variable to accumulate the full response
full_response = ""
# Process the streaming response
async for chunk in response.aiter_text():
if not chunk.strip():
continue
# Each chunk might contain one JSON object
try:
data = json.loads(chunk)
# Process message content if available
if 'message' in data and 'content' in data['message']:
content = data['message']['content']
full_response += content
yield content
# Check if this is the final message with done flag
if data.get('done', False):
logger.debug("Streaming response completed")
except json.JSONDecodeError as e:
logger.error(f"Failed to parse streaming response: {e}, chunk: {chunk}")
except Exception as e:
logger.error(f"Error during streaming inference: {str(e)}")
yield f"Error: {str(e)}"
# Return empty string at the end to signal completion
yield ""
def perform_inference(
llm,
system_message: str,
history: List[Dict],
current_message: str,
stream: bool = False
) -> str:
"""Perform inference with the given LLM."""
if stream:
# This will be handled by the streaming endpoint
raise ValueError("Streaming not supported in synchronous inference")
# Prepare messages for the API
messages = []
# Add system message
messages.append({
"role": "system",
"content": system_message
})
# Add history
for msg in history:
messages.append({
"role": "user" if msg.role == "user" else "assistant",
"content": msg.content
})
# Add current user message
messages.append({
"role": "user",
"content": current_message
})
# For non-streaming, we'll use the httpx client directly to call Ollama API
base_url = os.getenv("OLLAMA_BASE_URL", "http://ollama:11434")
# Prepare request payload
payload = {
"model": llm.model,
"messages": messages,
"stream": False,
"options": {
"temperature": llm.temperature
}
}
logger.debug(f"Sending non-streaming request to Ollama API: {base_url}/api/chat")
try:
with httpx.Client(timeout=60.0) as client:
response = client.post(f"{base_url}/api/chat", json=payload)
if response.status_code != 200:
logger.error(f"Error from Ollama API: {response.status_code}, {response.text}")
return f"Error: Failed to get response from language model (Status {response.status_code})"
data = response.json()
if 'message' in data and 'content' in data['message']:
return data['message']['content']
else:
logger.error(f"Unexpected response format: {data}")
return "Error: Unexpected response format from language model"
except Exception as e:
logger.error(f"Error during non-streaming inference: {str(e)}")
return f"Error: {str(e)}"
def select_llm(llm, llm_reasoning, reasoning: bool):
"""Select the LLM model based on the reasoning flag."""
selected_llm = llm_reasoning if reasoning else llm
return selected_llm

View file

@ -0,0 +1,79 @@
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
import logging
logger = logging.getLogger(__name__)
def ensure_collection_exists(qdrant_client: QdrantClient, collection_name: str, vector_size: int) -> None:
"""Verify that the Qdrant collection exists, and create it if it does not."""
try:
if not qdrant_client.collection_exists(collection_name):
qdrant_client.create_collection(
collection_name=collection_name,
vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE)
)
logger.info(f"Created Qdrant collection '{collection_name}' with vector size {vector_size}")
else:
logger.info(f"Qdrant collection '{collection_name}' already exists")
except Exception as e:
logger.error(f"Failed to ensure Qdrant collection exists: {str(e)}")
raise
def retrieve_documents(qdrant_client: QdrantClient, collection_name: str, embed_model, current_message: str) -> str:
"""Get the relevant documents from Qdrant based on the current message."""
logger.info("Initializing Qdrant vector store")
vector_store = QdrantVectorStore(
client=qdrant_client,
collection_name=collection_name,
embed_model=embed_model
)
logger.info("Building vector store index")
index = VectorStoreIndex.from_vector_store(
vector_store=vector_store,
embed_model=embed_model
)
logger.info("Retrieving documents")
retriever = index.as_retriever()
retrieved_nodes = retriever.retrieve(current_message)
retrieved_docs = "\n\n".join([node.text for node in retrieved_nodes])
logger.debug(f"Retrieved documents (first 200 chars): {retrieved_docs[:200]}...")
return retrieved_docs
def index_documents(qdrant_client: QdrantClient, collection_name: str, embed_model, documents) -> None:
"""Index the provided documents into the Qdrant collection."""
vector_store = QdrantVectorStore(
client=qdrant_client,
collection_name=collection_name,
embed_model=embed_model
)
logger.info(f"Indexing documents into Qdrant collection '{collection_name}'")
storage_context = StorageContext.from_defaults(vector_store=vector_store)
VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
embed_model=embed_model
)
logger.info("Successfully indexed documents")
def delete_all_documents(qdrant_client: QdrantClient, collection_name: str, vector_size: int) -> None:
"""Delete all vectors from the Qdrant collection by recreating it."""
try:
# Check if collection exists
if qdrant_client.collection_exists(collection_name):
# Delete the collection
qdrant_client.delete_collection(collection_name=collection_name)
logger.info(f"Deleted Qdrant collection '{collection_name}'")
# Recreate the empty collection with the same parameters
qdrant_client.create_collection(
collection_name=collection_name,
vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE)
)
logger.info(f"Recreated empty Qdrant collection '{collection_name}'")
else:
logger.warning(f"Qdrant collection '{collection_name}' does not exist, nothing to delete")
except Exception as e:
logger.error(f"Failed to delete Qdrant collection: {str(e)}")
raise

View file

@ -0,0 +1,304 @@
from fastapi import FastAPI, File, UploadFile, HTTPException, Request, BackgroundTasks
from fastapi.responses import JSONResponse, StreamingResponse
from fastapi.middleware.cors import CORSMiddleware
from fastapi.openapi.utils import get_openapi
from llama_index.core import SimpleDirectoryReader
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama
from typing import Literal, List
from pydantic import BaseModel
from langdetect import DetectorFactory
from qdrant_client import QdrantClient
import os
from typing import List
import uuid
import yaml
from dotenv import load_dotenv
import logging
import asyncio
import json
from libs.check_medical import is_medical_query
import libs.manage_languages as manage_languages
import libs.qdrant_helper as qdrant_helper
from libs.models import ChatMessage, ChatRequest
import libs.prompt_helper as prompt_helper
from libs.log_prompts import log_prompt_to_db
# Set seed for reproducibility of language detection
DetectorFactory.seed = 0
# Configure logging
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
# Load environment variables from .env file
load_dotenv()
# Initialize FastAPI app
app = FastAPI(
docs_url="/docs",
redoc_url="/redoc",
max_request_body_size=100 * 1024 * 1024 # 100MB
)
# Get CORS origins from environment or use default
cors_origins = os.getenv("CORS_ORIGINS", "http://localhost:3000,http://127.0.0.1:3000").split(",")
# Add CORS middleware with proper configuration
app.add_middleware(
CORSMiddleware,
allow_origins=cors_origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
expose_headers=["Content-Type", "X-Content-Type-Options"],
max_age=600, # 10 minutes for preflight cache
)
# Load custom OpenAPI schema
def load_custom_openapi():
with open("openapi.json", "r") as f:
custom_openapi = yaml.safe_load(f)
default_openapi = get_openapi(
title=app.title,
version=app.version,
openapi_version=app.openapi_version,
description=app.description,
routes=app.routes,
)
default_openapi["info"] = custom_openapi.get("info", default_openapi["info"])
default_openapi["paths"].update(custom_openapi.get("paths", {}))
return default_openapi
app.openapi = load_custom_openapi
with open("prompts.yaml", "r") as f:
prompts = yaml.safe_load(f)
SYSTEM_PROMPT_TEMPLATE = prompts["system_prompt"]
LANGUAGE_PROMPTS = prompts["languages"]
PERSONALITY_PROMPTS = prompts["personalities"]
# Configuration of models and services using .env variables
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "https://ollama.kube-ext.isc.heia-fr.ch")
logger.info(f"Starting application with OLLAMA_BASE_URL: {OLLAMA_BASE_URL}")
# Embedding model using Ollama
embed_model = OllamaEmbedding(
model_name=os.getenv("EMBED_MODEL_NAME", "mxbai-embed-large"),
base_url=OLLAMA_BASE_URL,
request_timeout=os.getenv("TIMEOUT_REQUEST_EMBED", 20.0)
)
logger.info("OllamaEmbedding initialized with model: " + os.getenv("EMBED_MODEL_NAME", "mxbai-embed-large"))
# Direct inference model
llm = Ollama(
model=os.getenv("LLM_MODEL_NAME", "llama3"),
base_url=OLLAMA_BASE_URL,
temperature=float(os.getenv("TEMPERATURE", "0.7")),
request_timeout=os.getenv("TIMEOUT_REQUEST_CHAT_DIRECT", 30.0)
)
logger.info(f"Ollama LLM initialized with model: {llm.model} "
f"with temperature: {llm.temperature}")
# Reasoning model
llm_reasoning = Ollama(
model=os.getenv("LLM_MODEL_NAME_THINKING", "deepseek-r1:14b"),
base_url=OLLAMA_BASE_URL,
temperature=float(os.getenv("TEMPERATURE", "0.7")),
request_timeout=os.getenv("TIMEOUT_REQUEST_CHAT_REASON", 60.0)
)
logger.info(f"Ollama reasoning LLM initialized with model: {llm_reasoning.model} "
f"with temperature: {llm_reasoning.temperature}")
# Qdrant configuration
qdrant_client = QdrantClient(
host=os.getenv("QDRANT_HOST", "localhost"),
port=int(os.getenv("QDRANT_PORT", "6333"))
)
collection_name = os.getenv("COLLECTION_NAME", "default_collection")
vector_size = int(os.getenv("VECTOR_SIZE", "1024"))
logger.info(f"Qdrant client initialized with host: {os.getenv('QDRANT_HOST')} and collection: {collection_name}")
# Ensure Qdrant collection exists
qdrant_helper.ensure_collection_exists(qdrant_client, collection_name, vector_size)
# Endpoint to upload PDFs
@app.post("/upload")
async def upload_pdfs(files: List[UploadFile] = File(...)):
logger.info("Received upload request")
try:
uploaded_files_count = len(files)
logger.debug(f"Number of files to upload: {uploaded_files_count}")
for file in files:
file_id = str(uuid.uuid4())
file_path = f"./pdfs/{file_id}.pdf"
logger.debug(f"Processing file: {file.filename}, saving as {file_path}")
with open(file_path, "wb") as f:
f.write(await file.read())
logger.debug(f"File {file.filename} saved successfully")
documents = SimpleDirectoryReader(input_files=[file_path]).load_data()
logger.debug(f"Loaded {len(documents)} documents from {file.filename}")
qdrant_helper.index_documents(qdrant_client, collection_name, embed_model, documents)
return {"message": f"{uploaded_files_count} files processed and indexed successfully"}
except Exception as e:
logger.error(f"Error in upload endpoint: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Error processing files: {str(e)}")
# Chat endpoint with language, temperature, and reasoning support
@app.post("/chat")
async def chat_inference(chat_request: ChatRequest, http_request: Request, background_tasks: BackgroundTasks):
logger.info("Received chat request")
try:
if not chat_request.messages:
logger.warning("No messages provided in the request")
raise HTTPException(status_code=400, detail="No messages provided")
# Log the complete request object to inspect its contents
logger.debug(f"Complete chat request object: {chat_request.dict()}")
logger.debug(f"Request messages: {chat_request.messages}")
logger.debug(f"Requested language: {chat_request.language}")
logger.debug(f"Requested temperature: {chat_request.temperature}")
logger.debug(f"Requested reasoning: {chat_request.reasoning}")
logger.debug(f"Requested streaming: {chat_request.stream}")
logger.debug(f"Requested personality: {chat_request.personality}")
# Validate language
manage_languages.validate_language(chat_request.language)
# Log più dettagliato della personalità
logger.info(f"Processing request with personality: {chat_request.personality}")
# Validate personality
if chat_request.personality not in ["cool", "cynical", "supportive"]:
logger.warning(f"Invalid personality: {chat_request.personality}, using 'supportive' as default")
chat_request.personality = "supportive"
# Validate temperature
if not (0 < chat_request.temperature < 1):
raise HTTPException(status_code=400, detail="Temperature must be between 0 and 1 (exclusive)")
# Prepare message data
current_message = chat_request.messages[-1].content.lower()
history = chat_request.messages[:-1]
logger.debug(f"Current user message: {current_message}")
logger.debug(f"Message history: {history}")
# Prepare full conversation history as a concatenated string
conversation_history = "\n".join([f"{msg.role}: {msg.content}" for msg in chat_request.messages])
logger.debug(f"Full conversation history: {conversation_history}")
# Detect language if "auto"
if chat_request.language == "auto":
chat_request.language = manage_languages.detect_language(current_message)
logger.info(f"Detected language using inference: {chat_request.language}")
# Check if the query is medical-related
is_medical = is_medical_query(current_message)
logger.debug(f"Is medical-related query? {is_medical}")
# Select LLM and set temperature
selected_llm = prompt_helper.select_llm(llm, llm_reasoning, chat_request.reasoning)
selected_llm.temperature = chat_request.temperature
logger.info(f"Using LLM model: {selected_llm.model} with temperature: {selected_llm.temperature}")
# Retrieve documents from Qdrant
retrieved_docs = qdrant_helper.retrieve_documents(qdrant_client, collection_name, embed_model, current_message)
# Format system prompt with personality - verifico passaggio corretto
system_message_content = prompt_helper.format_system_prompt(
SYSTEM_PROMPT_TEMPLATE,
LANGUAGE_PROMPTS,
chat_request.language,
retrieved_docs,
is_medical,
chat_request.personality, # Confermo passaggio personalità
PERSONALITY_PROMPTS # Confermo passaggio dizionario personalità
)
# Decidiamo se utilizzare lo streaming o la risposta sincrona
if chat_request.stream:
# Streaming response
logger.info("Using streaming response")
async def generate():
full_response = ""
async for content in prompt_helper.perform_inference_streaming(
selected_llm,
system_message_content,
history,
chat_request.messages[-1].content
):
if content:
full_response += content
# Formato SSE standard con \n\n alla fine per delimitare gli eventi
yield f"data: {json.dumps({'content': content, 'full': full_response})}\n\n"
# Log the full conversation and response
background_tasks.add_task(
log_prompt_to_db,
None, # TODO: User ID not available yet
http_request.client.host, # Client's IP address
conversation_history, # Full conversation history
full_response # AI-generated response
)
# Signal the end of the stream con formato SSE consistente
yield f"data: {json.dumps({'done': True})}\n\n"
return StreamingResponse(
generate(),
media_type="text/event-stream; charset=utf-8",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
"Content-Type": "text/event-stream; charset=utf-8",
}
)
else:
# Non-streaming response
logger.info("Using non-streaming response")
response_content = prompt_helper.perform_inference(
selected_llm,
system_message_content,
history,
chat_request.messages[-1].content,
stream=False
)
# Log the full conversation and response in the background
background_tasks.add_task(
log_prompt_to_db,
None, # TODO: User ID not available yet
http_request.client.host, # Client's IP address
conversation_history, # Full conversation history
response_content # AI-generated response
)
return {"response": response_content}
except Exception as e:
logger.error(f"Error in chat inference: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Error processing chat: {str(e)}")
# Add new DELETE endpoint to clear all documents
@app.delete("/docs")
async def delete_all_docs():
logger.info("Received request to delete all documents")
try:
qdrant_helper.delete_all_documents(qdrant_client, collection_name, vector_size)
return {"message": "All documents have been deleted from the database"}
except Exception as e:
logger.error(f"Error in delete endpoint: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Error deleting documents: {str(e)}")

View file

@ -0,0 +1,235 @@
{
"openapi": "3.0.0",
"info": {
"title": "AI Crohn Coach RAG API",
"version": "2.0.0",
"description": "This API provides REST endpoints with Server-Sent Events (SSE) streaming capabilities for interactive chat"
},
"paths": {
"/upload": {
"post": {
"summary": "Upload PDFs",
"requestBody": {
"content": {
"multipart/form-data": {
"schema": {
"type": "object",
"properties": {
"files": {
"type": "array",
"items": {
"type": "string",
"format": "binary"
}
}
}
}
}
}
},
"responses": {
"200": {
"description": "Files uploaded successfully"
}
}
}
},
"/chat": {
"post": {
"summary": "Chat Inference with streaming",
"description": "Send a request to the chat API. The API will respond with a stream of Server-Sent Events (SSE) by default, or a single JSON response if stream is set to false.",
"requestBody": {
"content": {
"application/json; charset=utf-8": {
"schema": {
"type": "object",
"properties": {
"messages": {
"type": "array",
"items": {
"type": "object",
"properties": {
"role": {
"type": "string",
"enum": ["user", "coach"]
},
"content": {
"type": "string",
"example": "Explique-moi comme si j'avais 5 ans. Qu'est-ce que la maladie de Crohn et comment savoir si je l'ai ?"
}
},
"required": ["role", "content"]
},
"example": [
{"role": "user", "content": "Bonjour le bro!"},
{"role": "coach", "content": "Salut."},
{"role": "user", "content": "Explique-moi comme si j'avais 5 ans. Qu'est-ce que la maladie de Crohn et comment savoir si je l'ai ?"}
]
},
"language": {
"type": "string",
"enum": ["french", "italian", "english", "german", "auto"],
"default": "auto",
"description": "The language for the response. Must be one of: french, italian, english, german. Defaults to auto if not specified, which will try to infer the language."
},
"temperature": {
"type": "number",
"format": "float",
"description": "The temperature for the response. Must be a float between 0 and 1. Defaults to 0.7 if not specified.",
"default": 0.7,
"minimum": 0,
"maximum": 1,
"exclusiveMaximum": false,
"exclusiveMinimum": false,
"example": 0.7
},
"reasoning": {
"type": "boolean",
"description": "Whether to use reasoning for the response. Defaults to false if not specified. Reasoning allows the model to show its thinking process.",
"default": false,
"example": false
},
"stream": {
"type": "boolean",
"description": "Whether to return a streaming response. If true, the response will be a stream of Server-Sent Events (SSE). If false, the response will be a single JSON object.",
"default": true,
"example": true
},
"personality": {
"type": "string",
"enum": ["cool", "cynical", "supportive"],
"default": "supportive",
"description": "The personality style for AI responses: cool (confident and direct), cynical (critical and pragmatic), or supportive (empathetic and encouraging).",
"example": "supportive"
}
},
"required": ["messages"]
}
}
}
},
"responses": {
"200": {
"description": "Answer returned successfully",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"response": {
"type": "string"
}
}
}
},
"text/event-stream": {
"schema": {
"type": "object",
"properties": {
"content": {
"type": "string",
"description": "A fragment of the response text"
},
"full": {
"type": "string",
"description": "The accumulated response so far"
},
"done": {
"type": "boolean",
"description": "Indicates whether the response is complete"
}
}
}
}
}
}
}
}
},
"/docs": {
"delete": {
"summary": "Delete all documents",
"description": "Removes all documents from the database",
"responses": {
"200": {
"description": "All documents have been successfully deleted",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"message": {
"type": "string"
}
}
}
}
}
}
}
}
}
},
"components": {
"schemas": {
"WebSocketMessage": {
"type": "object",
"properties": {
"messages": {
"type": "array",
"items": {
"type": "object",
"properties": {
"role": {
"type": "string",
"enum": ["user", "coach"]
},
"content": {
"type": "string"
}
}
}
},
"language": {
"type": "string",
"enum": ["french", "italian", "english", "german", "auto"]
},
"temperature": {
"type": "number"
},
"reasoning": {
"type": "boolean"
},
"stream": {
"type": "boolean"
},
"personality": {
"type": "string",
"enum": ["cool", "cynical", "supportive"]
}
}
}
}
},
"x-sse-endpoints": {
"/chat": {
"description": "Server-Sent Events endpoint for streaming chat responses",
"messages": {
"fromServer": {
"type": "object",
"properties": {
"content": {
"type": "string"
},
"full": {
"type": "string"
},
"done": {
"type": "boolean"
}
}
}
}
}
}
}

View file

@ -0,0 +1,60 @@
system_prompt: |
{language_prompt}
{personality_prompt}
Context:
{context}
{language_disclaimer}
{language_constraint}
# Language-specific prompts and disclaimers
languages:
french:
prompt: |
Vous êtes un coach IA spécialisé dans le soutien aux personnes atteintes de la maladie de Crohn. Votre objectif est d'aider les utilisateurs à mieux gérer leur quotidien, à trouver des stratégies d'adaptation et à améliorer leur qualité de vie, en vous basant sur le contexte fourni par les documents fournis. Fournissez des conseils clairs, bienveillants et pratiques adaptés à la situation de l'utilisateur. Vous n'êtes pas médecin et ne devez pas fournir de diagnostics ou de traitements médicaux. Pour toute question médicale, rappelez à l'utilisateur de consulter un professionnel de santé qualifié. Cependant, vous pouvez proposer des astuces pratiques, des stratégies ou des informations utiles tirées du contexte pour soutenir l'utilisateur dans la gestion de sa condition chronique.
disclaimer: |
**Avertissement**: Je ne suis pas médecin. Pour un avis médical professionnel, veuillez consulter un médecin qualifié. Les informations fournies sont basées sur le contexte des documents et sont à titre informatif uniquement.
constraint: |
IMPORTANT: Vous devez répondre EXCLUSIVEMENT en français. Ne répondez dans aucune autre langue, peu importe la langue utilisée par l'utilisateur.
italian:
prompt: |
Sei un coach IA specializzato nel supportare persone con la malattia di Crohn. Il tuo obiettivo è aiutare gli utenti a gestire meglio la loro vita quotidiana, trovare strategie di coping e migliorare la loro qualità di vita, basandoti sul contesto fornito dai documenti. Fornisci consigli chiari, empatici e pratici adattati alla situazione dell'utente. Non sei un medico e non devi fornire diagnosi o trattamenti medici. Per qualsiasi domanda medica, ricorda all'utente di consultare un professionista sanitario qualificato. Tuttavia, puoi offrire suggerimenti pratici, strategie o informazioni utili tratte dal contesto per supportare l'utente nella gestione della sua condizione cronica.
disclaimer: |
**Disclaimer**: Non sono un medico. Per un consiglio medico professionale, consulta un medico qualificato. Le informazioni fornite si basano sul contesto dei documenti ed sono solo a scopo informativo.
constraint: |
IMPORTANTE: Devi rispondere ESCLUSIVAMENTE in italiano. Non rispondere in nessun'altra lingua, indipendentemente dalla lingua utilizzata dall'utente.
english:
prompt: |
You are an AI Coach specialized in supporting individuals with Crohn's disease. Your goal is to help users better manage their daily lives, find coping strategies, and improve their quality of life, based on the context provided by the documents. Provide clear, compassionate, and actionable advice tailored to the user's situation. You are not a doctor and must not provide medical diagnoses or treatments. For any medical questions, remind the user to consult a qualified healthcare professional. However, you can offer practical tips, strategies, or useful information from the context to support the user in managing their chronic condition.
disclaimer: |
**Disclaimer**: I am not a doctor. For professional medical advice, please consult a qualified physician. The information provided is based on the document context and is for informational purposes only.
constraint: |
IMPORTANT: You must respond EXCLUSIVELY in English. Do not respond in any other language, regardless of the language used by the user.
german:
prompt: |
Sie sind ein KI-Coach, spezialisiert auf die Unterstützung von Personen mit Morbus Crohn. Ihr Ziel ist es, den Nutzern zu helfen, ihren Alltag besser zu bewältigen, Bewältigungsstrategien zu finden und ihre Lebensqualität zu verbessern, basierend auf dem Kontext der bereitgestellten Dokumente. Geben Sie klare, mitfühlende und umsetzbare Ratschläge, die auf die Situation des Nutzers zugeschnitten sind. Sie sind kein Arzt und dürfen keine medizinischen Diagnosen oder Behandlungen anbieten. Bei medizinischen Fragen erinnern Sie den Nutzer daran, einen qualifizierten Gesundheitsdienstleister zu konsultieren. Sie können jedoch praktische Tipps, Strategien oder nützliche Informationen aus dem Kontext anbieten, um den Nutzer bei der Bewältigung seiner chronischen Erkrankung zu unterstützen.
disclaimer: |
**Haftungsausschluss**: Ich bin kein Arzt. Für professionelle medizinische Beratung wenden Sie sich bitte an einen qualifizierten Arzt. Die bereitgestellten Informationen basieren auf dem Dokumentenkontext und dienen nur zu Informationszwecken.
constraint: |
WICHTIG: Sie müssen AUSSCHLIESSLICH auf Deutsch antworten. Antworten Sie nicht in einer anderen Sprache, unabhängig von der Sprache, die der Benutzer verwendet.
# Personality-specific prompts and descriptions
personalities:
cool:
description: |
An extremely street-smart, hip persona that communicates with urban slang and bro-speak. This personality maintains a super casual, almost too-cool-to-care vibe, using street language and avoiding any hint of formality. Explanations are brief and peppered with trendy expressions and cultural references.
prompt: |
Yo, listen up! You gotta be like the coolest bro in the neighborhood when you talk. Keep it 100% real and straight fire. Drop those fancy words and talk like you're chillin' with your homies. Use slang, keep it short and sweet. Say "bro", "man", "dude" a lot. Act like you're the most street-smart advisor ever. Don't get all formal and boring. Cut to the chase with that swagger. Be helpful but make it sound like you're doing them a solid. Throw in some "for real", "no cap", "straight up" to keep it authentic. Just imagine you're the coolest bro from the block giving advice to a friend.
cynical:
description: |
An extremely nihilistic, pessimistic persona that sees the absurdity in everything. This personality is brutally direct, consistently skeptical, and focuses on the harsh realities of existence. Communication is raw and unfiltered, highlighting the futility and contradictions of life alongside reluctant solutions.
prompt: |
Adopt the most nihilistic, cynical personality possible in your responses. Be brutally honest and don't shy away from using strong language or occasional profanity when appropriate. Emphasize how everything is ultimately meaningless while still somehow providing useful advice. View optimism as delusional. Use dark humor and sarcasm liberally. Point out the absurdity and contradictions in everything. Make it clear that life is a cosmic joke, but ironically still offer solutions that might work in this messed-up reality. Channel your inner disillusioned philosopher who has seen it all and is tired of pretending things aren't fucked up. Despite your pessimistic worldview, still provide accurate and helpful information—just wrap it in existential despair.
supportive:
description: |
An extremely virtuous, saint-like persona that radiates pure compassion and moral guidance. This personality offers deeply empathetic advice with almost religious fervor, shows profound understanding of suffering, and uses inspirational, uplifting language. Communication style is warm, parental, and eternally optimistic, focusing on spiritual growth and the inherent goodness in all situations.
prompt: |
Embody the most virtuous, saintly personality imaginable in your responses. Speak with the compassionate authority of a spiritual leader who sees the divine potential in everyone. Use deeply empathetic, warm language filled with moral wisdom and unconditional love. Address the user as "my child" or "my dear friend" occasionally. Offer guidance with the certainty of someone who believes in absolute moral truths and the power of hope. Include gentle metaphors about light, healing, and transformation. View every challenge as an opportunity for spiritual growth. Be extremely optimistic and nurturing, like a perfect loving parent who wants to save everyone from suffering. Express profound faith in the user's inner strength and the ultimate goodness of the world. Make your responses feel like a blessing or moral teaching while still delivering practical advice wrapped in inspirational wisdom.

View file

@ -0,0 +1,7 @@
# utils.py
import os
from dotenv import load_dotenv, find_dotenv
def get_deepseek_api_key():
_ = load_dotenv(find_dotenv())
return os.getenv("DEEPSEEK_API_KEY")

View file

@ -0,0 +1,15 @@
fastapi
uvicorn
llama-index
llama-index-embeddings-ollama
llama-index-llms-ollama
llama-index-vector-stores-qdrant
qdrant-client
python-dotenv
pyyaml
langdetect
typing
pydantic
python-multipart
mysql-connector-python
httpx