diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..a93d34a --- /dev/null +++ b/.env.example @@ -0,0 +1,6 @@ +CHROMA_PERSIST_DIR=./data/chroma +EMBEDDING_MODEL=intfloat/multilingual-e5-large +DEEPSEEK_API_KEY=sk-your-key-here +DEEPSEEK_MODEL=deepseek-chat +DEEPSEEK_BASE_URL=https://api.deepseek.com +LOG_LEVEL=info diff --git a/.gitignore b/.gitignore index af23dc6..3431547 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ data/chroma/ *.egg-info/ .venv/ .DS_Store +server.log diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..69467fa --- /dev/null +++ b/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3.11-slim + +RUN apt-get update && apt-get install -y \ + curl \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +EXPOSE 8003 + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8003"] diff --git a/config.py b/config.py new file mode 100644 index 0000000..662e9e7 --- /dev/null +++ b/config.py @@ -0,0 +1,19 @@ +from pydantic_settings import BaseSettings + + +class Settings(BaseSettings): + chroma_persist_dir: str = "./data/chroma" + embedding_model: str = "intfloat/multilingual-e5-large" + deepseek_api_key: str = "" + deepseek_model: str = "deepseek-chat" + deepseek_base_url: str = "https://api.deepseek.com" + log_level: str = "info" + + max_chunk_size: int = 1200 + min_chunk_size: int = 200 + overlap_sentences: int = 2 + + model_config = {"env_file": ".env", "env_file_encoding": "utf-8"} + + +settings = Settings() diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..9e617ab --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,24 @@ +services: + rag-service: + build: . + ports: + - "8003:8003" + volumes: + - chroma_data:/app/data + - embedding_cache:/root/.cache + environment: + CHROMA_PERSIST_DIR: /app/data/chroma + EMBEDDING_MODEL: intfloat/multilingual-e5-large + DEEPSEEK_API_KEY: ${DEEPSEEK_API_KEY} + DEEPSEEK_MODEL: ${DEEPSEEK_MODEL:-deepseek-chat} + LOG_LEVEL: ${LOG_LEVEL:-info} + healthcheck: + test: ["CMD-SHELL", "curl -sf http://localhost:8003/health || exit 1"] + interval: 15s + timeout: 10s + retries: 10 + start_period: 180s + +volumes: + chroma_data: + embedding_cache: diff --git a/main.py b/main.py new file mode 100644 index 0000000..11ada98 --- /dev/null +++ b/main.py @@ -0,0 +1,55 @@ +import logging +from contextlib import asynccontextmanager + +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles + +from config import settings +from services.embeddings import EmbeddingService +from services.vectorstore import VectorStoreService + +logger = logging.getLogger(__name__) + +embedding_service: EmbeddingService | None = None +vectorstore_service: VectorStoreService | None = None + + +@asynccontextmanager +async def lifespan(app: FastAPI): + global embedding_service, vectorstore_service + logging.basicConfig(level=getattr(logging, settings.log_level.upper(), logging.INFO)) + logger.info("Loading embedding model: %s", settings.embedding_model) + embedding_service = EmbeddingService(settings.embedding_model) + logger.info("Embedding model loaded") + vectorstore_service = VectorStoreService( + persist_dir=settings.chroma_persist_dir, + embedding_service=embedding_service, + ) + logger.info("ChromaDB initialized at %s", settings.chroma_persist_dir) + yield + logger.info("Shutting down") + + +app = FastAPI( + title="Chat Agent for Patients — Tuning Tool", + description="RAG-ядро и инструмент настройки пациентского чат-агента", + version="0.1.0", + lifespan=lifespan, +) + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +from routers import documents, health, query # noqa: E402 + +app.include_router(health.router) +app.include_router(documents.router) +app.include_router(query.router) + +app.mount("/", StaticFiles(directory="static", html=True), name="static") diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/requests.py b/models/requests.py new file mode 100644 index 0000000..afcc5d4 --- /dev/null +++ b/models/requests.py @@ -0,0 +1,9 @@ +from pydantic import BaseModel, Field + + +class QueryRequest(BaseModel): + text: str = Field(..., description="Вопрос от лица пациента") + top_k: int = Field(5, ge=1, le=20, description="Количество чанков для retrieval") + document_ids: list[str] | None = Field(None, description="Ограничить поиск конкретными документами") + temperature: float | None = Field(None, ge=0.0, le=2.0) + max_tokens: int | None = Field(None, ge=100, le=8000) diff --git a/models/responses.py b/models/responses.py new file mode 100644 index 0000000..619f668 --- /dev/null +++ b/models/responses.py @@ -0,0 +1,77 @@ +from pydantic import BaseModel, Field + + +class DocumentInfo(BaseModel): + document_id: str + name: str + chunks_count: int + file_type: str + created_at: str + metadata: dict = Field(default_factory=dict) + + +class ChunkPreview(BaseModel): + index: int + section: str = "" + page_number: int = 0 + text_preview: str = "" + char_length: int = 0 + + +class DocumentUploadResponse(BaseModel): + document_id: str + name: str + chunks_count: int + status: str = "indexed" + created_at: str + chunks_preview: list[ChunkPreview] = Field(default_factory=list) + + +class DocumentListResponse(BaseModel): + documents: list[DocumentInfo] + total: int + + +class ChunkDetail(BaseModel): + index: int + section: str = "" + page_number: int = 0 + text: str = "" + char_length: int = 0 + + +class DocumentChunksResponse(BaseModel): + document_id: str + name: str + file_type: str + chunks_count: int + chunks: list[ChunkDetail] = Field(default_factory=list) + + +class DocumentDeleteResponse(BaseModel): + ok: bool = True + deleted_chunks: int + + +class SourceInfo(BaseModel): + document_id: str + document_name: str + chunk_text: str + section: str = "" + page: int = 0 + relevance_score: float = 0.0 + + +class QueryResponse(BaseModel): + answer: str + sources: list[SourceInfo] + model_used: str + assembled_prompt: str = "" + + +class HealthResponse(BaseModel): + status: str = "ok" + chromadb: str + embedding_model: str + documents_count: int + chunks_count: int diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b2320e1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +fastapi==0.115.5 +uvicorn[standard]==0.32.1 +python-multipart==0.0.12 +chromadb==0.5.23 +sentence-transformers==3.3.1 +pymupdf==1.25.1 +python-docx==1.1.2 +httpx==0.28.1 +pydantic-settings==2.7.1 diff --git a/routers/__init__.py b/routers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/routers/documents.py b/routers/documents.py new file mode 100644 index 0000000..ab3c229 --- /dev/null +++ b/routers/documents.py @@ -0,0 +1,155 @@ +import logging +from datetime import datetime, timezone + +from fastapi import APIRouter, File, Form, HTTPException, UploadFile + +from models.responses import ( + ChunkDetail, + ChunkPreview, + DocumentChunksResponse, + DocumentDeleteResponse, + DocumentInfo, + DocumentListResponse, + DocumentUploadResponse, +) +from services.document_processor import process_document + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/documents", tags=["documents"]) + +ALLOWED_EXTENSIONS = {".pdf", ".docx", ".doc", ".txt", ".md"} +MAX_FILE_SIZE = 50 * 1024 * 1024 # 50 MB + + +@router.post("/upload", response_model=DocumentUploadResponse) +async def upload_document( + file: UploadFile = File(...), + document_name: str | None = Form(None), +): + from main import vectorstore_service + + if vectorstore_service is None: + raise HTTPException(status_code=503, detail="Service not ready") + + filename = file.filename or "unknown" + ext = "." + filename.rsplit(".", 1)[-1].lower() if "." in filename else "" + if ext not in ALLOWED_EXTENSIONS: + raise HTTPException( + status_code=400, + detail=f"Unsupported file format: {ext}. Allowed: {', '.join(ALLOWED_EXTENSIONS)}", + ) + + file_bytes = await file.read() + if len(file_bytes) > MAX_FILE_SIZE: + raise HTTPException(status_code=400, detail="File too large (max 50 MB)") + if len(file_bytes) == 0: + raise HTTPException(status_code=400, detail="Empty file") + + display_name = document_name or filename + try: + document_id, sections, chunks = process_document(file_bytes, filename) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + except Exception as e: + logger.exception("Failed to process document: %s", filename) + raise HTTPException(status_code=500, detail=f"Error processing document: {e}") + + if not chunks: + raise HTTPException(status_code=400, detail="No content could be extracted from the document") + + file_type = ext.lstrip(".") + chunks_count = vectorstore_service.add_document( + document_id=document_id, + document_name=display_name, + file_type=file_type, + chunks=[ + { + "text": c.text, + "section": c.section, + "page_number": c.page_number, + "chunk_index": c.chunk_index, + } + for c in chunks + ], + ) + + chunks_prev = [ + ChunkPreview( + index=c.chunk_index, + section=c.section, + page_number=c.page_number, + text_preview=c.text[:300], + char_length=len(c.text), + ) + for c in chunks[:3] + ] + + return DocumentUploadResponse( + document_id=document_id, + name=display_name, + chunks_count=chunks_count, + status="indexed", + created_at=datetime.now(timezone.utc).isoformat(), + chunks_preview=chunks_prev, + ) + + +@router.get("", response_model=DocumentListResponse) +async def list_documents(): + from main import vectorstore_service + + if vectorstore_service is None: + raise HTTPException(status_code=503, detail="Service not ready") + + docs = vectorstore_service.list_documents() + return DocumentListResponse( + documents=[DocumentInfo(**d) for d in docs], + total=len(docs), + ) + + +@router.get("/{document_id}/chunks", response_model=DocumentChunksResponse) +async def get_document_chunks(document_id: str): + from main import vectorstore_service + + if vectorstore_service is None: + raise HTTPException(status_code=503, detail="Service not ready") + + raw_chunks = vectorstore_service.get_document_chunks(document_id) + if not raw_chunks: + raise HTTPException(status_code=404, detail="Document not found") + + meta0 = raw_chunks[0]["metadata"] + chunks = [ + ChunkDetail( + index=c["metadata"].get("chunk_index", 0), + section=c["metadata"].get("section", ""), + page_number=c["metadata"].get("page_number", 0), + text=c["text"], + char_length=len(c["text"]), + ) + for c in raw_chunks + ] + + return DocumentChunksResponse( + document_id=document_id, + name=meta0.get("document_name", ""), + file_type=meta0.get("file_type", ""), + chunks_count=len(chunks), + chunks=chunks, + ) + + +@router.delete("/{document_id}", response_model=DocumentDeleteResponse) +async def delete_document(document_id: str): + from main import vectorstore_service + + if vectorstore_service is None: + raise HTTPException(status_code=503, detail="Service not ready") + + deleted = vectorstore_service.delete_document(document_id) + if deleted == 0: + raise HTTPException(status_code=404, detail="Document not found") + + return DocumentDeleteResponse(ok=True, deleted_chunks=deleted) diff --git a/routers/health.py b/routers/health.py new file mode 100644 index 0000000..85015f4 --- /dev/null +++ b/routers/health.py @@ -0,0 +1,29 @@ +from fastapi import APIRouter + +from config import settings +from models.responses import HealthResponse + +router = APIRouter() + + +@router.get("/health", response_model=HealthResponse) +async def health(): + from main import vectorstore_service + + if vectorstore_service is None: + return HealthResponse( + status="loading", + chromadb="not_connected", + embedding_model=settings.embedding_model, + documents_count=0, + chunks_count=0, + ) + + stats = vectorstore_service.get_stats() + return HealthResponse( + status="ok", + chromadb="connected", + embedding_model=settings.embedding_model, + documents_count=stats["documents_count"], + chunks_count=stats["chunks_count"], + ) diff --git a/routers/query.py b/routers/query.py new file mode 100644 index 0000000..cee837c --- /dev/null +++ b/routers/query.py @@ -0,0 +1,47 @@ +import logging + +from fastapi import APIRouter, HTTPException + +from config import settings +from models.requests import QueryRequest +from models.responses import QueryResponse, SourceInfo +from services.llm_client import LLMClient +from services.rag_pipeline import rag_query + +logger = logging.getLogger(__name__) + +router = APIRouter(tags=["query"]) + + +@router.post("/query", response_model=QueryResponse) +async def query_rag(request: QueryRequest): + from main import vectorstore_service + + if vectorstore_service is None: + raise HTTPException(status_code=503, detail="Service not ready") + + if not settings.deepseek_api_key: + raise HTTPException(status_code=500, detail="DEEPSEEK_API_KEY not configured") + + llm_client = LLMClient() + + try: + result = await rag_query( + vectorstore=vectorstore_service, + llm_client=llm_client, + question=request.text, + top_k=request.top_k, + document_ids=request.document_ids, + temperature=request.temperature, + max_tokens=request.max_tokens, + ) + except Exception as e: + logger.exception("RAG query failed") + raise HTTPException(status_code=500, detail=f"RAG query error: {e}") + + return QueryResponse( + answer=result["answer"], + sources=[SourceInfo(**s) for s in result["sources"]], + model_used=result["model_used"], + assembled_prompt=result.get("assembled_prompt", ""), + ) diff --git a/services/__init__.py b/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/services/document_processor.py b/services/document_processor.py new file mode 100644 index 0000000..2659a05 --- /dev/null +++ b/services/document_processor.py @@ -0,0 +1,300 @@ +import io +import logging +import re +import uuid +from dataclasses import dataclass +from pathlib import Path + +import fitz # pymupdf +from docx import Document as DocxDocument + +from config import settings + +logger = logging.getLogger(__name__) + + +@dataclass +class ParsedSection: + heading: str + heading_level: int + body: str + page_number: int = 0 + + +@dataclass +class Chunk: + text: str + section: str = "" + page_number: int = 0 + chunk_index: int = 0 + + +# --- Parsers --- + + +def parse_pdf(file_bytes: bytes) -> list[ParsedSection]: + doc = fitz.open(stream=file_bytes, filetype="pdf") + sections: list[ParsedSection] = [] + current_heading = "" + current_body_lines: list[str] = [] + current_page = 0 + + for page_num in range(len(doc)): + page = doc[page_num] + blocks = page.get_text("dict")["blocks"] + + for block in blocks: + if "lines" not in block: + continue + for line in block["lines"]: + text = "".join(span["text"] for span in line["spans"]).strip() + if not text: + continue + + max_size = max(span["size"] for span in line["spans"]) + is_bold = any("bold" in span["font"].lower() for span in line["spans"]) + + if (max_size >= 14 or (is_bold and max_size >= 12)) and len(text) < 200: + if current_heading or current_body_lines: + sections.append(ParsedSection( + heading=current_heading, + heading_level=1 if max_size >= 16 else 2, + body="\n".join(current_body_lines).strip(), + page_number=current_page, + )) + current_heading = text + current_body_lines = [] + current_page = page_num + 1 + else: + current_body_lines.append(text) + if not current_heading: + current_page = page_num + 1 + + if current_heading or current_body_lines: + sections.append(ParsedSection( + heading=current_heading, + heading_level=2, + body="\n".join(current_body_lines).strip(), + page_number=current_page, + )) + + doc.close() + return sections + + +def parse_docx(file_bytes: bytes) -> list[ParsedSection]: + doc = DocxDocument(io.BytesIO(file_bytes)) + sections: list[ParsedSection] = [] + current_heading = "" + current_level = 0 + current_body_lines: list[str] = [] + + for para in doc.paragraphs: + text = para.text.strip() + if not text: + continue + + style_name = (para.style.name or "").lower() + + if "heading" in style_name or "title" in style_name: + if current_heading or current_body_lines: + sections.append(ParsedSection( + heading=current_heading, + heading_level=current_level or 1, + body="\n".join(current_body_lines).strip(), + )) + level_match = re.search(r"\d+", style_name) + current_level = int(level_match.group()) if level_match else 1 + current_heading = text + current_body_lines = [] + else: + current_body_lines.append(text) + + if current_heading or current_body_lines: + sections.append(ParsedSection( + heading=current_heading, + heading_level=current_level or 1, + body="\n".join(current_body_lines).strip(), + )) + + return sections + + +def parse_text(file_bytes: bytes, is_markdown: bool = False) -> list[ParsedSection]: + """Parse wiki-style TXT/MD. + + Эвристики под wiki операторов: + - markdown-заголовки (#, ##, ...) + - нумерованные пункты «1.», «1.1.», «1.1.1.» + - FAQ-паттерн «В:» / «Вопрос:» — воспринимаем как начало новой секции + - ALL-CAPS строки (короткие) — заголовок + """ + text = file_bytes.decode("utf-8", errors="replace") + lines = text.split("\n") + sections: list[ParsedSection] = [] + current_heading = "" + current_level = 0 + current_body_lines: list[str] = [] + + md_heading_re = re.compile(r"^(#{1,6})\s+(.+)") + numbered_heading_re = re.compile(r"^(\d+(?:\.\d+)*\.?)\s+([А-ЯЁA-Z].*)") + faq_question_re = re.compile(r"^(В|Вопрос|Q|Question)\s*[:\.]\s*(.+)", re.IGNORECASE) + + for line in lines: + stripped = line.strip() + + heading_text = None + heading_level = 0 + + md_match = md_heading_re.match(stripped) + if md_match: + heading_level = len(md_match.group(1)) + heading_text = md_match.group(2).strip() + + if not heading_text: + num_match = numbered_heading_re.match(stripped) + if num_match and len(stripped) < 200: + dots = num_match.group(1).count(".") + heading_level = max(1, dots + 1) + heading_text = stripped + + if not heading_text: + faq_match = faq_question_re.match(stripped) + if faq_match and len(stripped) < 300: + heading_text = faq_match.group(2).strip() + heading_level = 3 + + if not heading_text and stripped.isupper() and 3 < len(stripped) < 200: + heading_text = stripped + heading_level = 1 + + if heading_text: + if current_heading or current_body_lines: + sections.append(ParsedSection( + heading=current_heading, + heading_level=current_level or 1, + body="\n".join(current_body_lines).strip(), + )) + current_heading = heading_text + current_level = heading_level + current_body_lines = [] + else: + current_body_lines.append(line) + + if current_heading or current_body_lines: + sections.append(ParsedSection( + heading=current_heading, + heading_level=current_level or 1, + body="\n".join(current_body_lines).strip(), + )) + + return sections + + +# --- Chunker --- + + +def _split_sentences(text: str) -> list[str]: + sentences = re.split(r"(?<=[.!?])\s+", text) + return [s.strip() for s in sentences if s.strip()] + + +def chunk_sections( + sections: list[ParsedSection], + max_chunk_size: int | None = None, + min_chunk_size: int | None = None, + overlap_sentences: int | None = None, +) -> list[Chunk]: + """Чанкинг wiki-секций. + + - Малые секции (FAQ-ответы) держим целиком — один чанк = одна тема. + - Большие секции (регламенты) режем по абзацам, с overlap последних N предложений. + - Мелкие соседние секции склеиваем, чтобы не плодить огрызки. + """ + max_size = max_chunk_size or settings.max_chunk_size + min_size = min_chunk_size or settings.min_chunk_size + overlap = overlap_sentences or settings.overlap_sentences + + raw_chunks: list[Chunk] = [] + + for section in sections: + heading_prefix = f"{section.heading}\n\n" if section.heading else "" + full_text = heading_prefix + section.body + + if len(full_text) <= max_size: + raw_chunks.append(Chunk( + text=full_text.strip(), + section=section.heading, + page_number=section.page_number, + )) + else: + paragraphs = section.body.split("\n") + current_text = heading_prefix + for para in paragraphs: + if len(current_text) + len(para) + 1 > max_size and len(current_text) > len(heading_prefix): + raw_chunks.append(Chunk( + text=current_text.strip(), + section=section.heading, + page_number=section.page_number, + )) + current_text = heading_prefix + para + "\n" + else: + current_text += para + "\n" + if current_text.strip() and current_text.strip() != heading_prefix.strip(): + raw_chunks.append(Chunk( + text=current_text.strip(), + section=section.heading, + page_number=section.page_number, + )) + + merged: list[Chunk] = [] + for chunk in raw_chunks: + if merged and len(merged[-1].text) < min_size: + merged[-1].text += "\n\n" + chunk.text + if not merged[-1].section: + merged[-1].section = chunk.section + else: + merged.append(Chunk( + text=chunk.text, + section=chunk.section, + page_number=chunk.page_number, + )) + + final: list[Chunk] = [] + for i, chunk in enumerate(merged): + if i > 0 and overlap > 0: + prev_sentences = _split_sentences(merged[i - 1].text) + overlap_text = " ".join(prev_sentences[-overlap:]) + if overlap_text and overlap_text not in chunk.text: + chunk.text = overlap_text + "\n\n" + chunk.text + chunk.chunk_index = i + final.append(chunk) + + return final + + +# --- Main processor --- + + +def process_document(file_bytes: bytes, filename: str) -> tuple[str, list[ParsedSection], list[Chunk]]: + document_id = str(uuid.uuid4()) + ext = Path(filename).suffix.lower() + + if ext == ".pdf": + sections = parse_pdf(file_bytes) + elif ext in (".docx", ".doc"): + sections = parse_docx(file_bytes) + elif ext == ".md": + sections = parse_text(file_bytes, is_markdown=True) + elif ext == ".txt": + sections = parse_text(file_bytes, is_markdown=False) + else: + raise ValueError(f"Unsupported file format: {ext}") + + if not sections: + logger.warning("No sections found in %s", filename) + return document_id, [], [] + + chunks = chunk_sections(sections) + logger.info("Processed '%s': %d sections → %d chunks", filename, len(sections), len(chunks)) + return document_id, sections, chunks diff --git a/services/embeddings.py b/services/embeddings.py new file mode 100644 index 0000000..3f9427a --- /dev/null +++ b/services/embeddings.py @@ -0,0 +1,22 @@ +import logging + +from sentence_transformers import SentenceTransformer + +logger = logging.getLogger(__name__) + + +class EmbeddingService: + def __init__(self, model_name: str = "intfloat/multilingual-e5-large"): + logger.info("Loading embedding model: %s", model_name) + self.model = SentenceTransformer(model_name) + self.model_name = model_name + logger.info("Embedding model loaded (dim=%d)", self.model.get_sentence_embedding_dimension()) + + def embed_documents(self, texts: list[str]) -> list[list[float]]: + prefixed = [f"passage: {t}" for t in texts] + embeddings = self.model.encode(prefixed, normalize_embeddings=True, show_progress_bar=False) + return embeddings.tolist() + + def embed_query(self, query: str) -> list[float]: + embedding = self.model.encode(f"query: {query}", normalize_embeddings=True) + return embedding.tolist() diff --git a/services/llm_client.py b/services/llm_client.py new file mode 100644 index 0000000..fc7745c --- /dev/null +++ b/services/llm_client.py @@ -0,0 +1,104 @@ +import logging + +import httpx + +from config import settings + +logger = logging.getLogger(__name__) + +DEFAULT_SYSTEM_PROMPT = """Ты — виртуальный ассистент клиники, который первым отвечает пациентам в чате. + +Твоя задача — помочь пациенту по бытовым и организационным вопросам: запись, расписание врачей, подготовка к приёму, как проехать, документы, оплата, ДМС, детский приём и т. п. + +Правила: +- Отвечай коротко, дружелюбно, на «вы», простым русским языком без медицинской латыни. +- Опирайся ТОЛЬКО на предоставленные выдержки из базы знаний. Если ответа в них нет — честно скажи, что уточнишь у оператора, и предложи подключить оператора. +- Не ставь диагнозы и не назначай лечение. Если вопрос про симптомы, лекарства, дозировки или «что со мной» — мягко предложи записаться к врачу и подключить оператора, если нужно. +- Не выдумывай телефоны, адреса, цены, имена врачей, расписание. Только из источников. +- Если пациент просит оператора — коротко подтверди, что сейчас его подключишь. +- Источники указывать не нужно: пациент их не видит. Ответ — обычный текст, как в чате.""" + +DEFAULT_USER_TEMPLATE = """Вопрос пациента: +{question} + +Выдержки из базы знаний операторов: +{sources} + +Ответь пациенту в чате по правилам из системного сообщения.""" + + +class LLMClient: + def __init__( + self, + api_key: str | None = None, + model: str | None = None, + base_url: str | None = None, + ): + self.api_key = api_key or settings.deepseek_api_key + self.model = model or settings.deepseek_model + self.base_url = (base_url or settings.deepseek_base_url).rstrip("/") + + def _format_sources(self, sources: list[dict]) -> str: + if not sources: + return "(источники не найдены)" + lines = [] + for i, src in enumerate(sources, 1): + meta = src.get("metadata", {}) + doc_name = meta.get("document_name", "Документ") + section = meta.get("section", "") + lines.append( + f"[{i}] {src['text']}\n" + f" (Источник: {doc_name}, раздел: {section})" + ) + return "\n".join(lines) + + async def answer( + self, + question: str, + sources: list[dict], + system_prompt: str | None = None, + temperature: float | None = None, + max_tokens: int | None = None, + ) -> dict: + """Generate a patient-facing answer using RAG context. + + Returns dict with 'text' and 'assembled_prompt'. + """ + effective_system = system_prompt or DEFAULT_SYSTEM_PROMPT + effective_temp = temperature if temperature is not None else 0.2 + effective_max_tokens = max_tokens or 1200 + + formatted_sources = self._format_sources(sources) + user_message = DEFAULT_USER_TEMPLATE.format( + question=question, + sources=formatted_sources, + ) + + assembled_prompt = f"[SYSTEM]\n{effective_system}\n\n[USER]\n{user_message}" + + url = f"{self.base_url}/chat/completions" + payload = { + "model": self.model, + "messages": [ + {"role": "system", "content": effective_system}, + {"role": "user", "content": user_message}, + ], + "temperature": effective_temp, + "max_tokens": effective_max_tokens, + } + + async with httpx.AsyncClient(timeout=60.0) as client: + response = await client.post( + url, + json=payload, + headers={ + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + }, + ) + response.raise_for_status() + data = response.json() + + content = data["choices"][0]["message"]["content"] + logger.info("LLM response: %d chars, model=%s, temp=%.2f", len(content), self.model, effective_temp) + return {"text": content.strip(), "assembled_prompt": assembled_prompt} diff --git a/services/rag_pipeline.py b/services/rag_pipeline.py new file mode 100644 index 0000000..8efce7e --- /dev/null +++ b/services/rag_pipeline.py @@ -0,0 +1,52 @@ +import logging + +from services.llm_client import LLMClient +from services.vectorstore import VectorStoreService + +logger = logging.getLogger(__name__) + + +async def rag_query( + vectorstore: VectorStoreService, + llm_client: LLMClient, + question: str, + top_k: int = 5, + document_ids: list[str] | None = None, + temperature: float | None = None, + max_tokens: int | None = None, +) -> dict: + """Pipeline: retrieve → augment → generate для одиночного вопроса пациента.""" + logger.info("RAG query: %s", question[:200]) + + retrieved = vectorstore.query( + query_text=question, + top_k=top_k, + document_ids=document_ids, + ) + logger.info("Retrieved %d chunks", len(retrieved)) + + llm_result = await llm_client.answer( + question=question, + sources=retrieved, + temperature=temperature, + max_tokens=max_tokens, + ) + + sources = [] + for item in retrieved: + meta = item.get("metadata", {}) + sources.append({ + "document_id": meta.get("document_id", ""), + "document_name": meta.get("document_name", ""), + "chunk_text": item["text"][:500], + "section": meta.get("section", ""), + "page": meta.get("page_number", 0), + "relevance_score": round(item.get("relevance_score", 0), 3), + }) + + return { + "answer": llm_result["text"], + "sources": sources, + "model_used": llm_client.model, + "assembled_prompt": llm_result["assembled_prompt"], + } diff --git a/services/vectorstore.py b/services/vectorstore.py new file mode 100644 index 0000000..33d7ea8 --- /dev/null +++ b/services/vectorstore.py @@ -0,0 +1,145 @@ +import logging +from datetime import datetime, timezone + +import chromadb + +from services.embeddings import EmbeddingService + +logger = logging.getLogger(__name__) + +COLLECTION_NAME = "operators_wiki" + + +class VectorStoreService: + def __init__(self, persist_dir: str, embedding_service: EmbeddingService): + self.client = chromadb.PersistentClient(path=persist_dir) + self.embedding_service = embedding_service + self.collection = self.client.get_or_create_collection( + name=COLLECTION_NAME, + metadata={"hnsw:space": "cosine"}, + ) + logger.info("ChromaDB collection '%s': %d items", COLLECTION_NAME, self.collection.count()) + + def add_document( + self, + document_id: str, + document_name: str, + file_type: str, + chunks: list[dict], + ) -> int: + if not chunks: + return 0 + + texts = [c["text"] for c in chunks] + embeddings = self.embedding_service.embed_documents(texts) + + ids = [] + metadatas = [] + now = datetime.now(timezone.utc).isoformat() + + for i, chunk in enumerate(chunks): + ids.append(f"{document_id}_chunk_{i}") + metadatas.append({ + "document_id": document_id, + "document_name": document_name, + "file_type": file_type, + "section": chunk.get("section", ""), + "page_number": chunk.get("page_number", 0), + "chunk_index": i, + "created_at": now, + }) + + self.collection.add( + ids=ids, + embeddings=embeddings, + documents=texts, + metadatas=metadatas, + ) + logger.info("Added %d chunks for document '%s'", len(chunks), document_name) + return len(chunks) + + def query( + self, + query_text: str, + top_k: int = 5, + document_ids: list[str] | None = None, + ) -> list[dict]: + query_embedding = self.embedding_service.embed_query(query_text) + + where_filter = None + if document_ids: + if len(document_ids) == 1: + where_filter = {"document_id": document_ids[0]} + else: + where_filter = {"document_id": {"$in": document_ids}} + + results = self.collection.query( + query_embeddings=[query_embedding], + n_results=top_k, + where=where_filter, + include=["documents", "metadatas", "distances"], + ) + + items = [] + if results["ids"] and results["ids"][0]: + for i, chunk_id in enumerate(results["ids"][0]): + items.append({ + "chunk_id": chunk_id, + "text": results["documents"][0][i], + "metadata": results["metadatas"][0][i], + "distance": results["distances"][0][i], + "relevance_score": 1 - results["distances"][0][i], + }) + return items + + def delete_document(self, document_id: str) -> int: + existing = self.collection.get(where={"document_id": document_id}, include=[]) + count = len(existing["ids"]) + if count > 0: + self.collection.delete(ids=existing["ids"]) + logger.info("Deleted %d chunks for document_id=%s", count, document_id) + return count + + def list_documents(self) -> list[dict]: + all_items = self.collection.get(include=["metadatas"]) + docs: dict[str, dict] = {} + for meta in all_items["metadatas"]: + doc_id = meta["document_id"] + if doc_id not in docs: + docs[doc_id] = { + "document_id": doc_id, + "name": meta.get("document_name", ""), + "file_type": meta.get("file_type", ""), + "created_at": meta.get("created_at", ""), + "chunks_count": 0, + "metadata": {}, + } + docs[doc_id]["chunks_count"] += 1 + return list(docs.values()) + + def get_document_chunks(self, document_id: str) -> list[dict]: + """Return all chunks for a document, sorted by chunk_index.""" + results = self.collection.get( + where={"document_id": document_id}, + include=["documents", "metadatas"], + ) + items = [] + if results["ids"]: + for i, chunk_id in enumerate(results["ids"]): + items.append({ + "chunk_id": chunk_id, + "text": results["documents"][i], + "metadata": results["metadatas"][i], + }) + items.sort(key=lambda x: x["metadata"].get("chunk_index", 0)) + return items + + def get_stats(self) -> dict: + all_items = self.collection.get(include=["metadatas"]) + doc_ids = set() + for meta in all_items["metadatas"]: + doc_ids.add(meta.get("document_id", "")) + return { + "documents_count": len(doc_ids), + "chunks_count": self.collection.count(), + } diff --git a/static/index.html b/static/index.html new file mode 100644 index 0000000..efbb491 --- /dev/null +++ b/static/index.html @@ -0,0 +1,571 @@ + + + + + +Chat Agent for Patients — Debug UI + + + + +
+

Chat Agent for Patients — Debug

+ проверяю… + +
+ +
+ +
+

База знаний

+
+ Перетащи файл (.pdf, .docx, .txt, .md) или кликни для выбора + +
+
+ + + + + + + + + + + + + +
ИмяТипЧанковЗагружен
Документы ещё не загружены
+
+ +
+

Тест-вопрос от пациента

+ +
+ + + + +
+ +
+
+

Что нашёл RAG

+
— пока пусто —
+
+
+

Собранный промпт

+
— пока пусто —
+
+
+

Ответ агента

+
— пока пусто —
+
+
+
+ +
+ +
+ + + +