feat: Спринт 1 — RAG-ядро, загрузка wiki и Debug UI

FastAPI + ChromaDB + E5-large + DeepSeek по паттерну work-pcs-dr-cdss, адаптированному под пациентский контекст: - services: embeddings (E5-large с префиксами), vectorstore (коллекция operators_wiki), document_processor (PDF/DOCX/TXT/MD + чанкер с FAQ- паттерном под wiki), llm_client (системный промпт ассистента клиники), rag_pipeline (одиночный вопрос → retrieval → ответ). - routers: /health, /documents (upload, list, chunks, delete), /query. - static/index.html: шапка со статусом, блок базы знаний с раскрытием чанков по клику, блок тест-вопроса с 3-колоночным ответом (чанки со score / собранный промпт / ответ LLM). - Порт 8003 (8001 занят CDSS, 8002 — voicenote). E2E проверен: загрузка wiki_test.md → 2 чанка, вопрос «как записать ребёнка к лору?» → top score 84.8%, корректный ответ DeepSeek. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-22 14:57:34 +05:00
parent d1e7749605
commit a7f78d71b2
21 changed files with 1641 additions and 0 deletions
@@ -0,0 +1,6 @@
 CHROMA_PERSIST_DIR=./data/chroma
 EMBEDDING_MODEL=intfloat/multilingual-e5-large
 DEEPSEEK_API_KEY=sk-your-key-here
 DEEPSEEK_MODEL=deepseek-chat
 DEEPSEEK_BASE_URL=https://api.deepseek.com
 LOG_LEVEL=info
@@ -5,3 +5,4 @@ data/chroma/
 *.egg-info/
 .venv/
 .DS_Store
 server.log
@@ -0,0 +1,16 @@
 FROM python:3.11-slim
 RUN apt-get update && apt-get install -y \
    curl \
    && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
 EXPOSE 8003
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8003"]
@@ -0,0 +1,19 @@
 from pydantic_settings import BaseSettings
 class Settings(BaseSettings):
    chroma_persist_dir: str = "./data/chroma"
    embedding_model: str = "intfloat/multilingual-e5-large"
    deepseek_api_key: str = ""
    deepseek_model: str = "deepseek-chat"
    deepseek_base_url: str = "https://api.deepseek.com"
    log_level: str = "info"
    max_chunk_size: int = 1200
    min_chunk_size: int = 200
    overlap_sentences: int = 2
    model_config = {"env_file": ".env", "env_file_encoding": "utf-8"}
 settings = Settings()
@@ -0,0 +1,24 @@
 services:
  rag-service:
    build: .
    ports:
      - "8003:8003"
    volumes:
      - chroma_data:/app/data
      - embedding_cache:/root/.cache
    environment:
      CHROMA_PERSIST_DIR: /app/data/chroma
      EMBEDDING_MODEL: intfloat/multilingual-e5-large
      DEEPSEEK_API_KEY: ${DEEPSEEK_API_KEY}
      DEEPSEEK_MODEL: ${DEEPSEEK_MODEL:-deepseek-chat}
      LOG_LEVEL: ${LOG_LEVEL:-info}
    healthcheck:
      test: ["CMD-SHELL", "curl -sf http://localhost:8003/health || exit 1"]
      interval: 15s
      timeout: 10s
      retries: 10
      start_period: 180s
 volumes:
  chroma_data:
  embedding_cache:
@@ -0,0 +1,55 @@
 import logging
 from contextlib import asynccontextmanager
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 from config import settings
 from services.embeddings import EmbeddingService
 from services.vectorstore import VectorStoreService
 logger = logging.getLogger(__name__)
 embedding_service: EmbeddingService | None = None
 vectorstore_service: VectorStoreService | None = None
@asynccontextmanager
 async def lifespan(app: FastAPI):
    global embedding_service, vectorstore_service
    logging.basicConfig(level=getattr(logging, settings.log_level.upper(), logging.INFO))
    logger.info("Loading embedding model: %s", settings.embedding_model)
    embedding_service = EmbeddingService(settings.embedding_model)
    logger.info("Embedding model loaded")
    vectorstore_service = VectorStoreService(
        persist_dir=settings.chroma_persist_dir,
        embedding_service=embedding_service,
    )
    logger.info("ChromaDB initialized at %s", settings.chroma_persist_dir)
    yield
    logger.info("Shutting down")
 app = FastAPI(
    title="Chat Agent for Patients — Tuning Tool",
    description="RAG-ядро и инструмент настройки пациентского чат-агента",
    version="0.1.0",
    lifespan=lifespan,
 )
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )
 from routers import documents, health, query  # noqa: E402
 app.include_router(health.router)
 app.include_router(documents.router)
 app.include_router(query.router)
 app.mount("/", StaticFiles(directory="static", html=True), name="static")
@@ -0,0 +1,9 @@
 from pydantic import BaseModel, Field
 class QueryRequest(BaseModel):
    text: str = Field(..., description="Вопрос от лица пациента")
    top_k: int = Field(5, ge=1, le=20, description="Количество чанков для retrieval")
    document_ids: list[str] | None = Field(None, description="Ограничить поиск конкретными документами")
    temperature: float | None = Field(None, ge=0.0, le=2.0)
    max_tokens: int | None = Field(None, ge=100, le=8000)
@@ -0,0 +1,77 @@
 from pydantic import BaseModel, Field
 class DocumentInfo(BaseModel):
    document_id: str
    name: str
    chunks_count: int
    file_type: str
    created_at: str
    metadata: dict = Field(default_factory=dict)
 class ChunkPreview(BaseModel):
    index: int
    section: str = ""
    page_number: int = 0
    text_preview: str = ""
    char_length: int = 0
 class DocumentUploadResponse(BaseModel):
    document_id: str
    name: str
    chunks_count: int
    status: str = "indexed"
    created_at: str
    chunks_preview: list[ChunkPreview] = Field(default_factory=list)
 class DocumentListResponse(BaseModel):
    documents: list[DocumentInfo]
    total: int
 class ChunkDetail(BaseModel):
    index: int
    section: str = ""
    page_number: int = 0
    text: str = ""
    char_length: int = 0
 class DocumentChunksResponse(BaseModel):
    document_id: str
    name: str
    file_type: str
    chunks_count: int
    chunks: list[ChunkDetail] = Field(default_factory=list)
 class DocumentDeleteResponse(BaseModel):
    ok: bool = True
    deleted_chunks: int
 class SourceInfo(BaseModel):
    document_id: str
    document_name: str
    chunk_text: str
    section: str = ""
    page: int = 0
    relevance_score: float = 0.0
 class QueryResponse(BaseModel):
    answer: str
    sources: list[SourceInfo]
    model_used: str
    assembled_prompt: str = ""
 class HealthResponse(BaseModel):
    status: str = "ok"
    chromadb: str
    embedding_model: str
    documents_count: int
    chunks_count: int
@@ -0,0 +1,9 @@
 fastapi==0.115.5
 uvicorn[standard]==0.32.1
 python-multipart==0.0.12
 chromadb==0.5.23
 sentence-transformers==3.3.1
 pymupdf==1.25.1
 python-docx==1.1.2
 httpx==0.28.1
 pydantic-settings==2.7.1
@@ -0,0 +1,155 @@
 import logging
 from datetime import datetime, timezone
 from fastapi import APIRouter, File, Form, HTTPException, UploadFile
 from models.responses import (
    ChunkDetail,
    ChunkPreview,
    DocumentChunksResponse,
    DocumentDeleteResponse,
    DocumentInfo,
    DocumentListResponse,
    DocumentUploadResponse,
 )
 from services.document_processor import process_document
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/documents", tags=["documents"])
 ALLOWED_EXTENSIONS = {".pdf", ".docx", ".doc", ".txt", ".md"}
 MAX_FILE_SIZE = 50 * 1024 * 1024  # 50 MB
@router.post("/upload", response_model=DocumentUploadResponse)
 async def upload_document(
    file: UploadFile = File(...),
    document_name: str | None = Form(None),
 ):
    from main import vectorstore_service
    if vectorstore_service is None:
        raise HTTPException(status_code=503, detail="Service not ready")
    filename = file.filename or "unknown"
    ext = "." + filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
    if ext not in ALLOWED_EXTENSIONS:
        raise HTTPException(
            status_code=400,
            detail=f"Unsupported file format: {ext}. Allowed: {', '.join(ALLOWED_EXTENSIONS)}",
        )
    file_bytes = await file.read()
    if len(file_bytes) > MAX_FILE_SIZE:
        raise HTTPException(status_code=400, detail="File too large (max 50 MB)")
    if len(file_bytes) == 0:
        raise HTTPException(status_code=400, detail="Empty file")
    display_name = document_name or filename
    try:
        document_id, sections, chunks = process_document(file_bytes, filename)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    except Exception as e:
        logger.exception("Failed to process document: %s", filename)
        raise HTTPException(status_code=500, detail=f"Error processing document: {e}")
    if not chunks:
        raise HTTPException(status_code=400, detail="No content could be extracted from the document")
    file_type = ext.lstrip(".")
    chunks_count = vectorstore_service.add_document(
        document_id=document_id,
        document_name=display_name,
        file_type=file_type,
        chunks=[
            {
                "text": c.text,
                "section": c.section,
                "page_number": c.page_number,
                "chunk_index": c.chunk_index,
            }
            for c in chunks
        ],
    )
    chunks_prev = [
        ChunkPreview(
            index=c.chunk_index,
            section=c.section,
            page_number=c.page_number,
            text_preview=c.text[:300],
            char_length=len(c.text),
        )
        for c in chunks[:3]
    ]
    return DocumentUploadResponse(
        document_id=document_id,
        name=display_name,
        chunks_count=chunks_count,
        status="indexed",
        created_at=datetime.now(timezone.utc).isoformat(),
        chunks_preview=chunks_prev,
    )
@router.get("", response_model=DocumentListResponse)
 async def list_documents():
    from main import vectorstore_service
    if vectorstore_service is None:
        raise HTTPException(status_code=503, detail="Service not ready")
    docs = vectorstore_service.list_documents()
    return DocumentListResponse(
        documents=[DocumentInfo(**d) for d in docs],
        total=len(docs),
    )
@router.get("/{document_id}/chunks", response_model=DocumentChunksResponse)
 async def get_document_chunks(document_id: str):
    from main import vectorstore_service
    if vectorstore_service is None:
        raise HTTPException(status_code=503, detail="Service not ready")
    raw_chunks = vectorstore_service.get_document_chunks(document_id)
    if not raw_chunks:
        raise HTTPException(status_code=404, detail="Document not found")
    meta0 = raw_chunks[0]["metadata"]
    chunks = [
        ChunkDetail(
            index=c["metadata"].get("chunk_index", 0),
            section=c["metadata"].get("section", ""),
            page_number=c["metadata"].get("page_number", 0),
            text=c["text"],
            char_length=len(c["text"]),
        )
        for c in raw_chunks
    ]
    return DocumentChunksResponse(
        document_id=document_id,
        name=meta0.get("document_name", ""),
        file_type=meta0.get("file_type", ""),
        chunks_count=len(chunks),
        chunks=chunks,
    )
@router.delete("/{document_id}", response_model=DocumentDeleteResponse)
 async def delete_document(document_id: str):
    from main import vectorstore_service
    if vectorstore_service is None:
        raise HTTPException(status_code=503, detail="Service not ready")
    deleted = vectorstore_service.delete_document(document_id)
    if deleted == 0:
        raise HTTPException(status_code=404, detail="Document not found")
    return DocumentDeleteResponse(ok=True, deleted_chunks=deleted)
@@ -0,0 +1,29 @@
 from fastapi import APIRouter
 from config import settings
 from models.responses import HealthResponse
 router = APIRouter()
@router.get("/health", response_model=HealthResponse)
 async def health():
    from main import vectorstore_service
    if vectorstore_service is None:
        return HealthResponse(
            status="loading",
            chromadb="not_connected",
            embedding_model=settings.embedding_model,
            documents_count=0,
            chunks_count=0,
        )
    stats = vectorstore_service.get_stats()
    return HealthResponse(
        status="ok",
        chromadb="connected",
        embedding_model=settings.embedding_model,
        documents_count=stats["documents_count"],
        chunks_count=stats["chunks_count"],
    )
@@ -0,0 +1,47 @@
 import logging
 from fastapi import APIRouter, HTTPException
 from config import settings
 from models.requests import QueryRequest
 from models.responses import QueryResponse, SourceInfo
 from services.llm_client import LLMClient
 from services.rag_pipeline import rag_query
 logger = logging.getLogger(__name__)
 router = APIRouter(tags=["query"])
@router.post("/query", response_model=QueryResponse)
 async def query_rag(request: QueryRequest):
    from main import vectorstore_service
    if vectorstore_service is None:
        raise HTTPException(status_code=503, detail="Service not ready")
    if not settings.deepseek_api_key:
        raise HTTPException(status_code=500, detail="DEEPSEEK_API_KEY not configured")
    llm_client = LLMClient()
    try:
        result = await rag_query(
            vectorstore=vectorstore_service,
            llm_client=llm_client,
            question=request.text,
            top_k=request.top_k,
            document_ids=request.document_ids,
            temperature=request.temperature,
            max_tokens=request.max_tokens,
        )
    except Exception as e:
        logger.exception("RAG query failed")
        raise HTTPException(status_code=500, detail=f"RAG query error: {e}")
    return QueryResponse(
        answer=result["answer"],
        sources=[SourceInfo(**s) for s in result["sources"]],
        model_used=result["model_used"],
        assembled_prompt=result.get("assembled_prompt", ""),
    )
@@ -0,0 +1,300 @@
 import io
 import logging
 import re
 import uuid
 from dataclasses import dataclass
 from pathlib import Path
 import fitz  # pymupdf
 from docx import Document as DocxDocument
 from config import settings
 logger = logging.getLogger(__name__)
@dataclass
 class ParsedSection:
    heading: str
    heading_level: int
    body: str
    page_number: int = 0
@dataclass
 class Chunk:
    text: str
    section: str = ""
    page_number: int = 0
    chunk_index: int = 0
 # --- Parsers ---
 def parse_pdf(file_bytes: bytes) -> list[ParsedSection]:
    doc = fitz.open(stream=file_bytes, filetype="pdf")
    sections: list[ParsedSection] = []
    current_heading = ""
    current_body_lines: list[str] = []
    current_page = 0
    for page_num in range(len(doc)):
        page = doc[page_num]
        blocks = page.get_text("dict")["blocks"]
        for block in blocks:
            if "lines" not in block:
                continue
            for line in block["lines"]:
                text = "".join(span["text"] for span in line["spans"]).strip()
                if not text:
                    continue
                max_size = max(span["size"] for span in line["spans"])
                is_bold = any("bold" in span["font"].lower() for span in line["spans"])
                if (max_size >= 14 or (is_bold and max_size >= 12)) and len(text) < 200:
                    if current_heading or current_body_lines:
                        sections.append(ParsedSection(
                            heading=current_heading,
                            heading_level=1 if max_size >= 16 else 2,
                            body="\n".join(current_body_lines).strip(),
                            page_number=current_page,
                        ))
                    current_heading = text
                    current_body_lines = []
                    current_page = page_num + 1
                else:
                    current_body_lines.append(text)
                    if not current_heading:
                        current_page = page_num + 1
    if current_heading or current_body_lines:
        sections.append(ParsedSection(
            heading=current_heading,
            heading_level=2,
            body="\n".join(current_body_lines).strip(),
            page_number=current_page,
        ))
    doc.close()
    return sections
 def parse_docx(file_bytes: bytes) -> list[ParsedSection]:
    doc = DocxDocument(io.BytesIO(file_bytes))
    sections: list[ParsedSection] = []
    current_heading = ""
    current_level = 0
    current_body_lines: list[str] = []
    for para in doc.paragraphs:
        text = para.text.strip()
        if not text:
            continue
        style_name = (para.style.name or "").lower()
        if "heading" in style_name or "title" in style_name:
            if current_heading or current_body_lines:
                sections.append(ParsedSection(
                    heading=current_heading,
                    heading_level=current_level or 1,
                    body="\n".join(current_body_lines).strip(),
                ))
            level_match = re.search(r"\d+", style_name)
            current_level = int(level_match.group()) if level_match else 1
            current_heading = text
            current_body_lines = []
        else:
            current_body_lines.append(text)
    if current_heading or current_body_lines:
        sections.append(ParsedSection(
            heading=current_heading,
            heading_level=current_level or 1,
            body="\n".join(current_body_lines).strip(),
        ))
    return sections
 def parse_text(file_bytes: bytes, is_markdown: bool = False) -> list[ParsedSection]:
    """Parse wiki-style TXT/MD.
    Эвристики под wiki операторов:
    - markdown-заголовки (#, ##, ...)
    - нумерованные пункты «1.», «1.1.», «1.1.1.»
    - FAQ-паттерн «В:» / «Вопрос:» — воспринимаем как начало новой секции
    - ALL-CAPS строки (короткие) — заголовок
    """
    text = file_bytes.decode("utf-8", errors="replace")
    lines = text.split("\n")
    sections: list[ParsedSection] = []
    current_heading = ""
    current_level = 0
    current_body_lines: list[str] = []
    md_heading_re = re.compile(r"^(#{1,6})\s+(.+)")
    numbered_heading_re = re.compile(r"^(\d+(?:\.\d+)*\.?)\s+([А-ЯЁA-Z].*)")
    faq_question_re = re.compile(r"^(В|Вопрос|Q|Question)\s*[:\.]\s*(.+)", re.IGNORECASE)
    for line in lines:
        stripped = line.strip()
        heading_text = None
        heading_level = 0
        md_match = md_heading_re.match(stripped)
        if md_match:
            heading_level = len(md_match.group(1))
            heading_text = md_match.group(2).strip()
        if not heading_text:
            num_match = numbered_heading_re.match(stripped)
            if num_match and len(stripped) < 200:
                dots = num_match.group(1).count(".")
                heading_level = max(1, dots + 1)
                heading_text = stripped
        if not heading_text:
            faq_match = faq_question_re.match(stripped)
            if faq_match and len(stripped) < 300:
                heading_text = faq_match.group(2).strip()
                heading_level = 3
        if not heading_text and stripped.isupper() and 3 < len(stripped) < 200:
            heading_text = stripped
            heading_level = 1
        if heading_text:
            if current_heading or current_body_lines:
                sections.append(ParsedSection(
                    heading=current_heading,
                    heading_level=current_level or 1,
                    body="\n".join(current_body_lines).strip(),
                ))
            current_heading = heading_text
            current_level = heading_level
            current_body_lines = []
        else:
            current_body_lines.append(line)
    if current_heading or current_body_lines:
        sections.append(ParsedSection(
            heading=current_heading,
            heading_level=current_level or 1,
            body="\n".join(current_body_lines).strip(),
        ))
    return sections
 # --- Chunker ---
 def _split_sentences(text: str) -> list[str]:
    sentences = re.split(r"(?<=[.!?])\s+", text)
    return [s.strip() for s in sentences if s.strip()]
 def chunk_sections(
    sections: list[ParsedSection],
    max_chunk_size: int | None = None,
    min_chunk_size: int | None = None,
    overlap_sentences: int | None = None,
 ) -> list[Chunk]:
    """Чанкинг wiki-секций.
    - Малые секции (FAQ-ответы) держим целиком — один чанк = одна тема.
    - Большие секции (регламенты) режем по абзацам, с overlap последних N предложений.
    - Мелкие соседние секции склеиваем, чтобы не плодить огрызки.
    """
    max_size = max_chunk_size or settings.max_chunk_size
    min_size = min_chunk_size or settings.min_chunk_size
    overlap = overlap_sentences or settings.overlap_sentences
    raw_chunks: list[Chunk] = []
    for section in sections:
        heading_prefix = f"{section.heading}\n\n" if section.heading else ""
        full_text = heading_prefix + section.body
        if len(full_text) <= max_size:
            raw_chunks.append(Chunk(
                text=full_text.strip(),
                section=section.heading,
                page_number=section.page_number,
            ))
        else:
            paragraphs = section.body.split("\n")
            current_text = heading_prefix
            for para in paragraphs:
                if len(current_text) + len(para) + 1 > max_size and len(current_text) > len(heading_prefix):
                    raw_chunks.append(Chunk(
                        text=current_text.strip(),
                        section=section.heading,
                        page_number=section.page_number,
                    ))
                    current_text = heading_prefix + para + "\n"
                else:
                    current_text += para + "\n"
            if current_text.strip() and current_text.strip() != heading_prefix.strip():
                raw_chunks.append(Chunk(
                    text=current_text.strip(),
                    section=section.heading,
                    page_number=section.page_number,
                ))
    merged: list[Chunk] = []
    for chunk in raw_chunks:
        if merged and len(merged[-1].text) < min_size:
            merged[-1].text += "\n\n" + chunk.text
            if not merged[-1].section:
                merged[-1].section = chunk.section
        else:
            merged.append(Chunk(
                text=chunk.text,
                section=chunk.section,
                page_number=chunk.page_number,
            ))
    final: list[Chunk] = []
    for i, chunk in enumerate(merged):
        if i > 0 and overlap > 0:
            prev_sentences = _split_sentences(merged[i - 1].text)
            overlap_text = " ".join(prev_sentences[-overlap:])
            if overlap_text and overlap_text not in chunk.text:
                chunk.text = overlap_text + "\n\n" + chunk.text
        chunk.chunk_index = i
        final.append(chunk)
    return final
 # --- Main processor ---
 def process_document(file_bytes: bytes, filename: str) -> tuple[str, list[ParsedSection], list[Chunk]]:
    document_id = str(uuid.uuid4())
    ext = Path(filename).suffix.lower()
    if ext == ".pdf":
        sections = parse_pdf(file_bytes)
    elif ext in (".docx", ".doc"):
        sections = parse_docx(file_bytes)
    elif ext == ".md":
        sections = parse_text(file_bytes, is_markdown=True)
    elif ext == ".txt":
        sections = parse_text(file_bytes, is_markdown=False)
    else:
        raise ValueError(f"Unsupported file format: {ext}")
    if not sections:
        logger.warning("No sections found in %s", filename)
        return document_id, [], []
    chunks = chunk_sections(sections)
    logger.info("Processed '%s': %d sections → %d chunks", filename, len(sections), len(chunks))
    return document_id, sections, chunks
@@ -0,0 +1,22 @@
 import logging
 from sentence_transformers import SentenceTransformer
 logger = logging.getLogger(__name__)
 class EmbeddingService:
    def __init__(self, model_name: str = "intfloat/multilingual-e5-large"):
        logger.info("Loading embedding model: %s", model_name)
        self.model = SentenceTransformer(model_name)
        self.model_name = model_name
        logger.info("Embedding model loaded (dim=%d)", self.model.get_sentence_embedding_dimension())
    def embed_documents(self, texts: list[str]) -> list[list[float]]:
        prefixed = [f"passage: {t}" for t in texts]
        embeddings = self.model.encode(prefixed, normalize_embeddings=True, show_progress_bar=False)
        return embeddings.tolist()
    def embed_query(self, query: str) -> list[float]:
        embedding = self.model.encode(f"query: {query}", normalize_embeddings=True)
        return embedding.tolist()
@@ -0,0 +1,104 @@
 import logging
 import httpx
 from config import settings
 logger = logging.getLogger(__name__)
 DEFAULT_SYSTEM_PROMPT = """Ты — виртуальный ассистент клиники, который первым отвечает пациентам в чате.
 Твоя задача — помочь пациенту по бытовым и организационным вопросам: запись, расписание врачей, подготовка к приёму, как проехать, документы, оплата, ДМС, детский приём и т. п.
 Правила:
 - Отвечай коротко, дружелюбно, на «вы», простым русским языком без медицинской латыни.
 - Опирайся ТОЛЬКО на предоставленные выдержки из базы знаний. Если ответа в них нет — честно скажи, что уточнишь у оператора, и предложи подключить оператора.
 - Не ставь диагнозы и не назначай лечение. Если вопрос про симптомы, лекарства, дозировки или «что со мной» — мягко предложи записаться к врачу и подключить оператора, если нужно.
 - Не выдумывай телефоны, адреса, цены, имена врачей, расписание. Только из источников.
 - Если пациент просит оператора — коротко подтверди, что сейчас его подключишь.
 - Источники указывать не нужно: пациент их не видит. Ответ — обычный текст, как в чате."""
 DEFAULT_USER_TEMPLATE = """Вопрос пациента:
 {question}
 Выдержки из базы знаний операторов:
 {sources}
 Ответь пациенту в чате по правилам из системного сообщения."""
 class LLMClient:
    def __init__(
        self,
        api_key: str | None = None,
        model: str | None = None,
        base_url: str | None = None,
    ):
        self.api_key = api_key or settings.deepseek_api_key
        self.model = model or settings.deepseek_model
        self.base_url = (base_url or settings.deepseek_base_url).rstrip("/")
    def _format_sources(self, sources: list[dict]) -> str:
        if not sources:
            return "(источники не найдены)"
        lines = []
        for i, src in enumerate(sources, 1):
            meta = src.get("metadata", {})
            doc_name = meta.get("document_name", "Документ")
            section = meta.get("section", "")
            lines.append(
                f"[{i}] {src['text']}\n"
                f"    (Источник: {doc_name}, раздел: {section})"
            )
        return "\n".join(lines)
    async def answer(
        self,
        question: str,
        sources: list[dict],
        system_prompt: str | None = None,
        temperature: float | None = None,
        max_tokens: int | None = None,
    ) -> dict:
        """Generate a patient-facing answer using RAG context.
        Returns dict with 'text' and 'assembled_prompt'.
        """
        effective_system = system_prompt or DEFAULT_SYSTEM_PROMPT
        effective_temp = temperature if temperature is not None else 0.2
        effective_max_tokens = max_tokens or 1200
        formatted_sources = self._format_sources(sources)
        user_message = DEFAULT_USER_TEMPLATE.format(
            question=question,
            sources=formatted_sources,
        )
        assembled_prompt = f"[SYSTEM]\n{effective_system}\n\n[USER]\n{user_message}"
        url = f"{self.base_url}/chat/completions"
        payload = {
            "model": self.model,
            "messages": [
                {"role": "system", "content": effective_system},
                {"role": "user", "content": user_message},
            ],
            "temperature": effective_temp,
            "max_tokens": effective_max_tokens,
        }
        async with httpx.AsyncClient(timeout=60.0) as client:
            response = await client.post(
                url,
                json=payload,
                headers={
                    "Authorization": f"Bearer {self.api_key}",
                    "Content-Type": "application/json",
                },
            )
            response.raise_for_status()
            data = response.json()
        content = data["choices"][0]["message"]["content"]
        logger.info("LLM response: %d chars, model=%s, temp=%.2f", len(content), self.model, effective_temp)
        return {"text": content.strip(), "assembled_prompt": assembled_prompt}
@@ -0,0 +1,52 @@
 import logging
 from services.llm_client import LLMClient
 from services.vectorstore import VectorStoreService
 logger = logging.getLogger(__name__)
 async def rag_query(
    vectorstore: VectorStoreService,
    llm_client: LLMClient,
    question: str,
    top_k: int = 5,
    document_ids: list[str] | None = None,
    temperature: float | None = None,
    max_tokens: int | None = None,
 ) -> dict:
    """Pipeline: retrieve → augment → generate для одиночного вопроса пациента."""
    logger.info("RAG query: %s", question[:200])
    retrieved = vectorstore.query(
        query_text=question,
        top_k=top_k,
        document_ids=document_ids,
    )
    logger.info("Retrieved %d chunks", len(retrieved))
    llm_result = await llm_client.answer(
        question=question,
        sources=retrieved,
        temperature=temperature,
        max_tokens=max_tokens,
    )
    sources = []
    for item in retrieved:
        meta = item.get("metadata", {})
        sources.append({
            "document_id": meta.get("document_id", ""),
            "document_name": meta.get("document_name", ""),
            "chunk_text": item["text"][:500],
            "section": meta.get("section", ""),
            "page": meta.get("page_number", 0),
            "relevance_score": round(item.get("relevance_score", 0), 3),
        })
    return {
        "answer": llm_result["text"],
        "sources": sources,
        "model_used": llm_client.model,
        "assembled_prompt": llm_result["assembled_prompt"],
    }
@@ -0,0 +1,145 @@
 import logging
 from datetime import datetime, timezone
 import chromadb
 from services.embeddings import EmbeddingService
 logger = logging.getLogger(__name__)
 COLLECTION_NAME = "operators_wiki"
 class VectorStoreService:
    def __init__(self, persist_dir: str, embedding_service: EmbeddingService):
        self.client = chromadb.PersistentClient(path=persist_dir)
        self.embedding_service = embedding_service
        self.collection = self.client.get_or_create_collection(
            name=COLLECTION_NAME,
            metadata={"hnsw:space": "cosine"},
        )
        logger.info("ChromaDB collection '%s': %d items", COLLECTION_NAME, self.collection.count())
    def add_document(
        self,
        document_id: str,
        document_name: str,
        file_type: str,
        chunks: list[dict],
    ) -> int:
        if not chunks:
            return 0
        texts = [c["text"] for c in chunks]
        embeddings = self.embedding_service.embed_documents(texts)
        ids = []
        metadatas = []
        now = datetime.now(timezone.utc).isoformat()
        for i, chunk in enumerate(chunks):
            ids.append(f"{document_id}_chunk_{i}")
            metadatas.append({
                "document_id": document_id,
                "document_name": document_name,
                "file_type": file_type,
                "section": chunk.get("section", ""),
                "page_number": chunk.get("page_number", 0),
                "chunk_index": i,
                "created_at": now,
            })
        self.collection.add(
            ids=ids,
            embeddings=embeddings,
            documents=texts,
            metadatas=metadatas,
        )
        logger.info("Added %d chunks for document '%s'", len(chunks), document_name)
        return len(chunks)
    def query(
        self,
        query_text: str,
        top_k: int = 5,
        document_ids: list[str] | None = None,
    ) -> list[dict]:
        query_embedding = self.embedding_service.embed_query(query_text)
        where_filter = None
        if document_ids:
            if len(document_ids) == 1:
                where_filter = {"document_id": document_ids[0]}
            else:
                where_filter = {"document_id": {"$in": document_ids}}
        results = self.collection.query(
            query_embeddings=[query_embedding],
            n_results=top_k,
            where=where_filter,
            include=["documents", "metadatas", "distances"],
        )
        items = []
        if results["ids"] and results["ids"][0]:
            for i, chunk_id in enumerate(results["ids"][0]):
                items.append({
                    "chunk_id": chunk_id,
                    "text": results["documents"][0][i],
                    "metadata": results["metadatas"][0][i],
                    "distance": results["distances"][0][i],
                    "relevance_score": 1 - results["distances"][0][i],
                })
        return items
    def delete_document(self, document_id: str) -> int:
        existing = self.collection.get(where={"document_id": document_id}, include=[])
        count = len(existing["ids"])
        if count > 0:
            self.collection.delete(ids=existing["ids"])
        logger.info("Deleted %d chunks for document_id=%s", count, document_id)
        return count
    def list_documents(self) -> list[dict]:
        all_items = self.collection.get(include=["metadatas"])
        docs: dict[str, dict] = {}
        for meta in all_items["metadatas"]:
            doc_id = meta["document_id"]
            if doc_id not in docs:
                docs[doc_id] = {
                    "document_id": doc_id,
                    "name": meta.get("document_name", ""),
                    "file_type": meta.get("file_type", ""),
                    "created_at": meta.get("created_at", ""),
                    "chunks_count": 0,
                    "metadata": {},
                }
            docs[doc_id]["chunks_count"] += 1
        return list(docs.values())
    def get_document_chunks(self, document_id: str) -> list[dict]:
        """Return all chunks for a document, sorted by chunk_index."""
        results = self.collection.get(
            where={"document_id": document_id},
            include=["documents", "metadatas"],
        )
        items = []
        if results["ids"]:
            for i, chunk_id in enumerate(results["ids"]):
                items.append({
                    "chunk_id": chunk_id,
                    "text": results["documents"][i],
                    "metadata": results["metadatas"][i],
                })
        items.sort(key=lambda x: x["metadata"].get("chunk_index", 0))
        return items
    def get_stats(self) -> dict:
        all_items = self.collection.get(include=["metadatas"])
        doc_ids = set()
        for meta in all_items["metadatas"]:
            doc_ids.add(meta.get("document_id", ""))
        return {
            "documents_count": len(doc_ids),
            "chunks_count": self.collection.count(),
        }
@@ -0,0 +1,571 @@
 <!DOCTYPE html>
 <html lang="ru">
 <head>
 <meta charset="UTF-8">
 <meta name="viewport" content="width=device-width, initial-scale=1.0">
 <title>Chat Agent for Patients — Debug UI</title>
 <style>
  :root {
    --bg: #f5f6f8;
    --panel: #ffffff;
    --border: #e1e4ea;
    --muted: #6b7280;
    --fg: #111827;
    --accent: #2563eb;
    --accent-hover: #1d4ed8;
    --ok: #16a34a;
    --warn: #d97706;
    --err: #dc2626;
    --chip-bg: #eef2ff;
    --mono: ui-monospace, SFMono-Regular, Menlo, monospace;
  }
  * { box-sizing: border-box; }
  body {
    margin: 0;
    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
    background: var(--bg);
    color: var(--fg);
    font-size: 14px;
    line-height: 1.5;
  }
  header {
    background: var(--panel);
    border-bottom: 1px solid var(--border);
    padding: 14px 24px;
    display: flex;
    align-items: center;
    gap: 16px;
    position: sticky;
    top: 0;
    z-index: 10;
  }
  header h1 {
    margin: 0;
    font-size: 16px;
    font-weight: 600;
  }
  .status {
    display: inline-flex;
    align-items: center;
    gap: 6px;
    padding: 4px 10px;
    border-radius: 999px;
    background: var(--chip-bg);
    font-size: 13px;
  }
  .dot {
    width: 8px;
    height: 8px;
    border-radius: 50%;
    background: var(--muted);
  }
  .dot.ok { background: var(--ok); }
  .dot.warn { background: var(--warn); }
  .dot.err { background: var(--err); }
  .stats {
    margin-left: auto;
    font-size: 13px;
    color: var(--muted);
  }
  .stats b { color: var(--fg); }
  main {
    padding: 24px;
    max-width: 1400px;
    margin: 0 auto;
    display: grid;
    gap: 24px;
  }
  .panel {
    background: var(--panel);
    border: 1px solid var(--border);
    border-radius: 12px;
    padding: 20px;
  }
  .panel h2 {
    margin: 0 0 16px 0;
    font-size: 15px;
    font-weight: 600;
  }
  .dropzone {
    border: 2px dashed var(--border);
    border-radius: 10px;
    padding: 28px;
    text-align: center;
    color: var(--muted);
    cursor: pointer;
    transition: all 0.15s;
  }
  .dropzone.drag { border-color: var(--accent); background: var(--chip-bg); color: var(--fg); }
  .dropzone input { display: none; }
  table {
    width: 100%;
    border-collapse: collapse;
    margin-top: 16px;
    font-size: 13px;
  }
  th, td {
    text-align: left;
    padding: 8px 10px;
    border-bottom: 1px solid var(--border);
    vertical-align: top;
  }
  th {
    font-weight: 600;
    color: var(--muted);
    font-size: 12px;
    text-transform: uppercase;
    letter-spacing: 0.03em;
  }
  tr:last-child td { border-bottom: none; }
  tr.doc-row { cursor: pointer; }
  tr.doc-row:hover td:not(:last-child) { background: #f9fafb; }
  tr.doc-row .arrow { display: inline-block; transition: transform 0.15s; color: var(--muted); margin-right: 6px; }
  tr.doc-row.open .arrow { transform: rotate(90deg); }
  tr.chunks-row td { padding: 0; background: #fafbfd; }
  tr.chunks-row .chunks-body { padding: 14px 16px; }
  .chunk-card {
    background: white;
    border: 1px solid var(--border);
    border-radius: 8px;
    padding: 10px 12px;
    margin-bottom: 8px;
  }
  .chunk-card-meta {
    font-size: 11px;
    color: var(--muted);
    margin-bottom: 6px;
    display: flex;
    gap: 10px;
    flex-wrap: wrap;
  }
  .chunk-card-text {
    white-space: pre-wrap;
    font-size: 13px;
    word-break: break-word;
  }
  .chunk-card-toggle {
    color: var(--accent);
    cursor: pointer;
    font-size: 12px;
    border: none;
    background: none;
    padding: 4px 0 0 0;
  }
  .empty {
    padding: 20px;
    color: var(--muted);
    text-align: center;
    font-style: italic;
  }
  button {
    font: inherit;
    padding: 8px 14px;
    border-radius: 8px;
    border: 1px solid var(--border);
    background: var(--panel);
    cursor: pointer;
  }
  button.primary {
    background: var(--accent);
    border-color: var(--accent);
    color: white;
  }
  button.primary:hover:not(:disabled) { background: var(--accent-hover); }
  button.danger {
    color: var(--err);
    border-color: var(--err);
    background: transparent;
    font-size: 12px;
    padding: 4px 10px;
  }
  button:disabled { opacity: 0.5; cursor: not-allowed; }
  .row { display: flex; gap: 12px; align-items: center; flex-wrap: wrap; }
  .row label { color: var(--muted); font-size: 13px; }
  textarea, input[type=number], input[type=text] {
    font: inherit;
    padding: 8px 10px;
    border: 1px solid var(--border);
    border-radius: 8px;
    width: 100%;
    background: white;
  }
  textarea { min-height: 90px; resize: vertical; }
  .num { width: 80px; }
  .columns {
    display: grid;
    grid-template-columns: 1fr 1fr 1fr;
    gap: 16px;
    margin-top: 16px;
  }
  .col {
    border: 1px solid var(--border);
    border-radius: 10px;
    padding: 14px;
    background: #fafbfd;
    min-height: 200px;
    overflow: hidden;
  }
  .col h3 {
    margin: 0 0 10px 0;
    font-size: 12px;
    font-weight: 600;
    color: var(--muted);
    text-transform: uppercase;
    letter-spacing: 0.03em;
  }
  .chunk {
    border: 1px solid var(--border);
    border-radius: 8px;
    padding: 10px;
    margin-bottom: 8px;
    background: white;
  }
  .chunk-head {
    display: flex;
    justify-content: space-between;
    font-size: 11px;
    color: var(--muted);
    margin-bottom: 6px;
  }
  .score {
    background: var(--chip-bg);
    padding: 1px 6px;
    border-radius: 999px;
    color: var(--accent);
    font-weight: 600;
  }
  pre {
    font-family: var(--mono);
    font-size: 12px;
    white-space: pre-wrap;
    word-break: break-word;
    margin: 0;
    background: white;
    padding: 10px;
    border: 1px solid var(--border);
    border-radius: 8px;
    max-height: 500px;
    overflow: auto;
  }
  .answer {
    background: white;
    border: 1px solid var(--border);
    border-radius: 8px;
    padding: 14px;
    white-space: pre-wrap;
    word-break: break-word;
    line-height: 1.6;
  }
  .answer-meta {
    margin-top: 8px;
    font-size: 11px;
    color: var(--muted);
  }
  .toast {
    position: fixed;
    bottom: 20px;
    right: 20px;
    background: #111827;
    color: white;
    padding: 10px 16px;
    border-radius: 8px;
    font-size: 13px;
    opacity: 0;
    transition: opacity 0.2s;
    pointer-events: none;
  }
  .toast.show { opacity: 1; }
  .toast.err { background: var(--err); }
  .mini { font-size: 12px; color: var(--muted); }
  .spinner {
    width: 14px; height: 14px;
    border: 2px solid var(--border);
    border-top-color: var(--accent);
    border-radius: 50%;
    display: inline-block;
    animation: spin 0.8s linear infinite;
    vertical-align: middle;
  }
  @keyframes spin { to { transform: rotate(360deg); } }
  @media (max-width: 900px) {
    .columns { grid-template-columns: 1fr; }
  }
 </style>
 </head>
 <body>
 <header>
  <h1>Chat Agent for Patients — Debug</h1>
  <span class="status"><span class="dot" id="dot"></span><span id="status-text">проверяю…</span></span>
  <span class="stats" id="stats"></span>
 </header>
 <main>
  <section class="panel">
    <h2>База знаний</h2>
    <div id="dropzone" class="dropzone">
      Перетащи файл (.pdf, .docx, .txt, .md) или кликни для выбора
      <input type="file" id="file-input" accept=".pdf,.docx,.doc,.txt,.md">
    </div>
    <div id="upload-status" class="mini" style="margin-top:10px;"></div>
    <table>
      <thead>
        <tr>
          <th>Имя</th>
          <th>Тип</th>
          <th>Чанков</th>
          <th>Загружен</th>
          <th></th>
        </tr>
      </thead>
      <tbody id="docs-tbody">
        <tr><td colspan="5" class="empty">Документы ещё не загружены</td></tr>
      </tbody>
    </table>
  </section>
  <section class="panel">
    <h2>Тест-вопрос от пациента</h2>
    <textarea id="question" placeholder="Например: как записать ребёнка к лору?"></textarea>
    <div class="row" style="margin-top:12px;">
      <label>top_k <input type="number" class="num" id="top_k" value="5" min="1" max="20"></label>
      <label>temperature <input type="number" class="num" id="temperature" value="0.2" min="0" max="2" step="0.1"></label>
      <button class="primary" id="ask-btn">Отправить</button>
      <span id="ask-status" class="mini"></span>
    </div>
    <div class="columns">
      <div class="col">
        <h3>Что нашёл RAG</h3>
        <div id="col-chunks"><div class="mini">— пока пусто —</div></div>
      </div>
      <div class="col">
        <h3>Собранный промпт</h3>
        <div id="col-prompt"><div class="mini">— пока пусто —</div></div>
      </div>
      <div class="col">
        <h3>Ответ агента</h3>
        <div id="col-answer"><div class="mini">— пока пусто —</div></div>
      </div>
    </div>
  </section>
 </main>
 <div class="toast" id="toast"></div>
 <script>
 const API = "";
 const $ = (id) => document.getElementById(id);
 const esc = (s) => String(s ?? "").replace(/[&<>"']/g, c => ({'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'}[c]));
 function toast(msg, kind = "ok") {
  const t = $("toast");
  t.textContent = msg;
  t.className = "toast show" + (kind === "err" ? " err" : "");
  setTimeout(() => t.className = "toast", 2500);
 }
 async function api(path, opts = {}) {
  const res = await fetch(API + path, opts);
  if (!res.ok) {
    let detail = res.statusText;
    try { const j = await res.json(); detail = j.detail || detail; } catch {}
    throw new Error(detail);
  }
  return res.json();
 }
 async function refreshHealth() {
  try {
    const h = await api("/health");
    $("dot").className = "dot " + (h.status === "ok" ? "ok" : "warn");
    $("status-text").textContent = h.status === "ok" ? "готов" : h.status;
    $("stats").innerHTML = `модель <b>${esc(h.embedding_model)}</b> · документов <b>${h.documents_count}</b> · чанков <b>${h.chunks_count}</b>`;
  } catch (e) {
    $("dot").className = "dot err";
    $("status-text").textContent = "недоступен";
    $("stats").textContent = "";
  }
 }
 async function refreshDocs() {
  try {
    const r = await api("/documents");
    const tbody = $("docs-tbody");
    if (!r.documents.length) {
      tbody.innerHTML = '<tr><td colspan="5" class="empty">Документы ещё не загружены</td></tr>';
      return;
    }
    tbody.innerHTML = r.documents.map(d => `
      <tr class="doc-row" id="doc-${d.document_id}" onclick="toggleChunks('${d.document_id}')">
        <td><span class="arrow">▶</span>${esc(d.name)}</td>
        <td>${esc(d.file_type)}</td>
        <td>${d.chunks_count}</td>
        <td class="mini">${esc((d.created_at || "").slice(0, 19).replace("T", " "))}</td>
        <td><button class="danger" onclick="event.stopPropagation(); deleteDoc('${d.document_id}', '${esc(d.name)}')">удалить</button></td>
      </tr>
      <tr class="chunks-row" id="chunks-${d.document_id}" style="display:none;">
        <td colspan="5"><div class="chunks-body"><div class="mini">загружаю…</div></div></td>
      </tr>
    `).join("");
  } catch (e) {
    toast("Не удалось загрузить список: " + e.message, "err");
  }
 }
 async function toggleChunks(id) {
  const row = $("doc-" + id);
  const chunksRow = $("chunks-" + id);
  const isOpen = chunksRow.style.display !== "none";
  if (isOpen) {
    chunksRow.style.display = "none";
    row.classList.remove("open");
    return;
  }
  chunksRow.style.display = "";
  row.classList.add("open");
  const body = chunksRow.querySelector(".chunks-body");
  body.innerHTML = '<div class="mini">загружаю…</div>';
  try {
    const d = await api(`/documents/${id}/chunks`);
    if (!d.chunks.length) {
      body.innerHTML = '<div class="mini">чанков нет</div>';
      return;
    }
    body.innerHTML = d.chunks.map(c => {
      const long = c.text.length > 300;
      const short = long ? c.text.slice(0, 300) + "…" : c.text;
      const safeFull = esc(c.text).replace(/"/g, "&quot;");
      return `
        <div class="chunk-card">
          <div class="chunk-card-meta">
            <span>#${c.index}</span>
            <span>раздел: ${esc(c.section || "—")}</span>
            ${c.page_number ? `<span>стр. ${c.page_number}</span>` : ""}
            <span>${c.char_length.toLocaleString("ru")} симв.</span>
          </div>
          <div class="chunk-card-text" data-short="${esc(short).replace(/"/g, "&quot;")}" data-full="${safeFull}" data-expanded="0">${esc(short)}</div>
          ${long ? '<button class="chunk-card-toggle" onclick="toggleChunkText(this)">показать полностью</button>' : ""}
        </div>
      `;
    }).join("");
  } catch (e) {
    body.innerHTML = `<div class="mini" style="color:var(--err)">Ошибка: ${esc(e.message)}</div>`;
  }
 }
 function toggleChunkText(btn) {
  const textEl = btn.previousElementSibling;
  const expanded = textEl.getAttribute("data-expanded") === "1";
  textEl.textContent = expanded ? textEl.getAttribute("data-short") : textEl.getAttribute("data-full");
  textEl.setAttribute("data-expanded", expanded ? "0" : "1");
  btn.textContent = expanded ? "показать полностью" : "свернуть";
 }
 async function deleteDoc(id, name) {
  if (!confirm(`Удалить документ «${name}»?`)) return;
  try {
    await api(`/documents/${id}`, { method: "DELETE" });
    toast("Удалён");
    refreshDocs(); refreshHealth();
  } catch (e) {
    toast("Ошибка: " + e.message, "err");
  }
 }
 async function uploadFile(file) {
  $("upload-status").innerHTML = `<span class="spinner"></span> загружаю <b>${esc(file.name)}</b>…`;
  const fd = new FormData();
  fd.append("file", file);
  try {
    const r = await api("/documents/upload", { method: "POST", body: fd });
    $("upload-status").innerHTML = `✓ <b>${esc(r.name)}</b> — ${r.chunks_count} чанков`;
    toast("Загружено");
    refreshDocs(); refreshHealth();
  } catch (e) {
    $("upload-status").innerHTML = `✕ ошибка: ${esc(e.message)}`;
    toast("Ошибка загрузки: " + e.message, "err");
  }
 }
 function initDropzone() {
  const dz = $("dropzone");
  const input = $("file-input");
  dz.addEventListener("click", () => input.click());
  input.addEventListener("change", () => {
    if (input.files[0]) uploadFile(input.files[0]);
    input.value = "";
  });
  ["dragenter", "dragover"].forEach(e =>
    dz.addEventListener(e, ev => { ev.preventDefault(); dz.classList.add("drag"); })
  );
  ["dragleave", "drop"].forEach(e =>
    dz.addEventListener(e, ev => { ev.preventDefault(); dz.classList.remove("drag"); })
  );
  dz.addEventListener("drop", ev => {
    if (ev.dataTransfer.files[0]) uploadFile(ev.dataTransfer.files[0]);
  });
 }
 async function ask() {
  const question = $("question").value.trim();
  if (!question) { toast("Введите вопрос", "err"); return; }
  const btn = $("ask-btn");
  btn.disabled = true;
  $("ask-status").innerHTML = '<span class="spinner"></span> думаю…';
  $("col-chunks").innerHTML = '<div class="mini">…</div>';
  $("col-prompt").innerHTML = '<div class="mini">…</div>';
  $("col-answer").innerHTML = '<div class="mini">…</div>';
  try {
    const r = await api("/query", {
      method: "POST",
      headers: {"Content-Type": "application/json"},
      body: JSON.stringify({
        text: question,
        top_k: parseInt($("top_k").value, 10) || 5,
        temperature: parseFloat($("temperature").value),
      }),
    });
    $("col-chunks").innerHTML = r.sources.length
      ? r.sources.map((s, i) => `
          <div class="chunk">
            <div class="chunk-head">
              <span>[${i + 1}] ${esc(s.document_name)} · ${esc(s.section || "—")}</span>
              <span class="score">${(s.relevance_score * 100).toFixed(1)}%</span>
            </div>
            <div>${esc(s.chunk_text)}</div>
          </div>`).join("")
      : '<div class="mini">— нет релевантных чанков —</div>';
    $("col-prompt").innerHTML = `<pre>${esc(r.assembled_prompt)}</pre>`;
    $("col-answer").innerHTML = `
      <div class="answer">${esc(r.answer)}</div>
      <div class="answer-meta">модель: ${esc(r.model_used)} · источников: ${r.sources.length}</div>
    `;
    $("ask-status").textContent = "";
  } catch (e) {
    $("col-answer").innerHTML = `<div class="mini" style="color:var(--err)">Ошибка: ${esc(e.message)}</div>`;
    $("ask-status").textContent = "";
    toast("Ошибка: " + e.message, "err");
  } finally {
    btn.disabled = false;
  }
 }
 $("ask-btn").addEventListener("click", ask);
 $("question").addEventListener("keydown", e => {
  if ((e.metaKey || e.ctrlKey) && e.key === "Enter") ask();
 });
 initDropzone();
 refreshHealth();
 refreshDocs();
 setInterval(refreshHealth, 15000);
 </script>
 </body>
 </html>