diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..a93d34a
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,6 @@
+CHROMA_PERSIST_DIR=./data/chroma
+EMBEDDING_MODEL=intfloat/multilingual-e5-large
+DEEPSEEK_API_KEY=sk-your-key-here
+DEEPSEEK_MODEL=deepseek-chat
+DEEPSEEK_BASE_URL=https://api.deepseek.com
+LOG_LEVEL=info
diff --git a/.gitignore b/.gitignore
index af23dc6..3431547 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@ data/chroma/
 *.egg-info/
 .venv/
 .DS_Store
+server.log
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..69467fa
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,16 @@
+FROM python:3.11-slim
+
+RUN apt-get update && apt-get install -y \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+EXPOSE 8003
+
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8003"]
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..662e9e7
--- /dev/null
+++ b/config.py
@@ -0,0 +1,19 @@
+from pydantic_settings import BaseSettings
+
+
+class Settings(BaseSettings):
+    chroma_persist_dir: str = "./data/chroma"
+    embedding_model: str = "intfloat/multilingual-e5-large"
+    deepseek_api_key: str = ""
+    deepseek_model: str = "deepseek-chat"
+    deepseek_base_url: str = "https://api.deepseek.com"
+    log_level: str = "info"
+
+    max_chunk_size: int = 1200
+    min_chunk_size: int = 200
+    overlap_sentences: int = 2
+
+    model_config = {"env_file": ".env", "env_file_encoding": "utf-8"}
+
+
+settings = Settings()
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..9e617ab
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,24 @@
+services:
+  rag-service:
+    build: .
+    ports:
+      - "8003:8003"
+    volumes:
+      - chroma_data:/app/data
+      - embedding_cache:/root/.cache
+    environment:
+      CHROMA_PERSIST_DIR: /app/data/chroma
+      EMBEDDING_MODEL: intfloat/multilingual-e5-large
+      DEEPSEEK_API_KEY: ${DEEPSEEK_API_KEY}
+      DEEPSEEK_MODEL: ${DEEPSEEK_MODEL:-deepseek-chat}
+      LOG_LEVEL: ${LOG_LEVEL:-info}
+    healthcheck:
+      test: ["CMD-SHELL", "curl -sf http://localhost:8003/health || exit 1"]
+      interval: 15s
+      timeout: 10s
+      retries: 10
+      start_period: 180s
+
+volumes:
+  chroma_data:
+  embedding_cache:
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..11ada98
--- /dev/null
+++ b/main.py
@@ -0,0 +1,55 @@
+import logging
+from contextlib import asynccontextmanager
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+
+from config import settings
+from services.embeddings import EmbeddingService
+from services.vectorstore import VectorStoreService
+
+logger = logging.getLogger(__name__)
+
+embedding_service: EmbeddingService | None = None
+vectorstore_service: VectorStoreService | None = None
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global embedding_service, vectorstore_service
+    logging.basicConfig(level=getattr(logging, settings.log_level.upper(), logging.INFO))
+    logger.info("Loading embedding model: %s", settings.embedding_model)
+    embedding_service = EmbeddingService(settings.embedding_model)
+    logger.info("Embedding model loaded")
+    vectorstore_service = VectorStoreService(
+        persist_dir=settings.chroma_persist_dir,
+        embedding_service=embedding_service,
+    )
+    logger.info("ChromaDB initialized at %s", settings.chroma_persist_dir)
+    yield
+    logger.info("Shutting down")
+
+
+app = FastAPI(
+    title="Chat Agent for Patients — Tuning Tool",
+    description="RAG-ядро и инструмент настройки пациентского чат-агента",
+    version="0.1.0",
+    lifespan=lifespan,
+)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+from routers import documents, health, query  # noqa: E402
+
+app.include_router(health.router)
+app.include_router(documents.router)
+app.include_router(query.router)
+
+app.mount("/", StaticFiles(directory="static", html=True), name="static")
diff --git a/models/__init__.py b/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/requests.py b/models/requests.py
new file mode 100644
index 0000000..afcc5d4
--- /dev/null
+++ b/models/requests.py
@@ -0,0 +1,9 @@
+from pydantic import BaseModel, Field
+
+
+class QueryRequest(BaseModel):
+    text: str = Field(..., description="Вопрос от лица пациента")
+    top_k: int = Field(5, ge=1, le=20, description="Количество чанков для retrieval")
+    document_ids: list[str] | None = Field(None, description="Ограничить поиск конкретными документами")
+    temperature: float | None = Field(None, ge=0.0, le=2.0)
+    max_tokens: int | None = Field(None, ge=100, le=8000)
diff --git a/models/responses.py b/models/responses.py
new file mode 100644
index 0000000..619f668
--- /dev/null
+++ b/models/responses.py
@@ -0,0 +1,77 @@
+from pydantic import BaseModel, Field
+
+
+class DocumentInfo(BaseModel):
+    document_id: str
+    name: str
+    chunks_count: int
+    file_type: str
+    created_at: str
+    metadata: dict = Field(default_factory=dict)
+
+
+class ChunkPreview(BaseModel):
+    index: int
+    section: str = ""
+    page_number: int = 0
+    text_preview: str = ""
+    char_length: int = 0
+
+
+class DocumentUploadResponse(BaseModel):
+    document_id: str
+    name: str
+    chunks_count: int
+    status: str = "indexed"
+    created_at: str
+    chunks_preview: list[ChunkPreview] = Field(default_factory=list)
+
+
+class DocumentListResponse(BaseModel):
+    documents: list[DocumentInfo]
+    total: int
+
+
+class ChunkDetail(BaseModel):
+    index: int
+    section: str = ""
+    page_number: int = 0
+    text: str = ""
+    char_length: int = 0
+
+
+class DocumentChunksResponse(BaseModel):
+    document_id: str
+    name: str
+    file_type: str
+    chunks_count: int
+    chunks: list[ChunkDetail] = Field(default_factory=list)
+
+
+class DocumentDeleteResponse(BaseModel):
+    ok: bool = True
+    deleted_chunks: int
+
+
+class SourceInfo(BaseModel):
+    document_id: str
+    document_name: str
+    chunk_text: str
+    section: str = ""
+    page: int = 0
+    relevance_score: float = 0.0
+
+
+class QueryResponse(BaseModel):
+    answer: str
+    sources: list[SourceInfo]
+    model_used: str
+    assembled_prompt: str = ""
+
+
+class HealthResponse(BaseModel):
+    status: str = "ok"
+    chromadb: str
+    embedding_model: str
+    documents_count: int
+    chunks_count: int
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..b2320e1
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,9 @@
+fastapi==0.115.5
+uvicorn[standard]==0.32.1
+python-multipart==0.0.12
+chromadb==0.5.23
+sentence-transformers==3.3.1
+pymupdf==1.25.1
+python-docx==1.1.2
+httpx==0.28.1
+pydantic-settings==2.7.1
diff --git a/routers/__init__.py b/routers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/routers/documents.py b/routers/documents.py
new file mode 100644
index 0000000..ab3c229
--- /dev/null
+++ b/routers/documents.py
@@ -0,0 +1,155 @@
+import logging
+from datetime import datetime, timezone
+
+from fastapi import APIRouter, File, Form, HTTPException, UploadFile
+
+from models.responses import (
+    ChunkDetail,
+    ChunkPreview,
+    DocumentChunksResponse,
+    DocumentDeleteResponse,
+    DocumentInfo,
+    DocumentListResponse,
+    DocumentUploadResponse,
+)
+from services.document_processor import process_document
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/documents", tags=["documents"])
+
+ALLOWED_EXTENSIONS = {".pdf", ".docx", ".doc", ".txt", ".md"}
+MAX_FILE_SIZE = 50 * 1024 * 1024  # 50 MB
+
+
+@router.post("/upload", response_model=DocumentUploadResponse)
+async def upload_document(
+    file: UploadFile = File(...),
+    document_name: str | None = Form(None),
+):
+    from main import vectorstore_service
+
+    if vectorstore_service is None:
+        raise HTTPException(status_code=503, detail="Service not ready")
+
+    filename = file.filename or "unknown"
+    ext = "." + filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
+    if ext not in ALLOWED_EXTENSIONS:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Unsupported file format: {ext}. Allowed: {', '.join(ALLOWED_EXTENSIONS)}",
+        )
+
+    file_bytes = await file.read()
+    if len(file_bytes) > MAX_FILE_SIZE:
+        raise HTTPException(status_code=400, detail="File too large (max 50 MB)")
+    if len(file_bytes) == 0:
+        raise HTTPException(status_code=400, detail="Empty file")
+
+    display_name = document_name or filename
+    try:
+        document_id, sections, chunks = process_document(file_bytes, filename)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.exception("Failed to process document: %s", filename)
+        raise HTTPException(status_code=500, detail=f"Error processing document: {e}")
+
+    if not chunks:
+        raise HTTPException(status_code=400, detail="No content could be extracted from the document")
+
+    file_type = ext.lstrip(".")
+    chunks_count = vectorstore_service.add_document(
+        document_id=document_id,
+        document_name=display_name,
+        file_type=file_type,
+        chunks=[
+            {
+                "text": c.text,
+                "section": c.section,
+                "page_number": c.page_number,
+                "chunk_index": c.chunk_index,
+            }
+            for c in chunks
+        ],
+    )
+
+    chunks_prev = [
+        ChunkPreview(
+            index=c.chunk_index,
+            section=c.section,
+            page_number=c.page_number,
+            text_preview=c.text[:300],
+            char_length=len(c.text),
+        )
+        for c in chunks[:3]
+    ]
+
+    return DocumentUploadResponse(
+        document_id=document_id,
+        name=display_name,
+        chunks_count=chunks_count,
+        status="indexed",
+        created_at=datetime.now(timezone.utc).isoformat(),
+        chunks_preview=chunks_prev,
+    )
+
+
+@router.get("", response_model=DocumentListResponse)
+async def list_documents():
+    from main import vectorstore_service
+
+    if vectorstore_service is None:
+        raise HTTPException(status_code=503, detail="Service not ready")
+
+    docs = vectorstore_service.list_documents()
+    return DocumentListResponse(
+        documents=[DocumentInfo(**d) for d in docs],
+        total=len(docs),
+    )
+
+
+@router.get("/{document_id}/chunks", response_model=DocumentChunksResponse)
+async def get_document_chunks(document_id: str):
+    from main import vectorstore_service
+
+    if vectorstore_service is None:
+        raise HTTPException(status_code=503, detail="Service not ready")
+
+    raw_chunks = vectorstore_service.get_document_chunks(document_id)
+    if not raw_chunks:
+        raise HTTPException(status_code=404, detail="Document not found")
+
+    meta0 = raw_chunks[0]["metadata"]
+    chunks = [
+        ChunkDetail(
+            index=c["metadata"].get("chunk_index", 0),
+            section=c["metadata"].get("section", ""),
+            page_number=c["metadata"].get("page_number", 0),
+            text=c["text"],
+            char_length=len(c["text"]),
+        )
+        for c in raw_chunks
+    ]
+
+    return DocumentChunksResponse(
+        document_id=document_id,
+        name=meta0.get("document_name", ""),
+        file_type=meta0.get("file_type", ""),
+        chunks_count=len(chunks),
+        chunks=chunks,
+    )
+
+
+@router.delete("/{document_id}", response_model=DocumentDeleteResponse)
+async def delete_document(document_id: str):
+    from main import vectorstore_service
+
+    if vectorstore_service is None:
+        raise HTTPException(status_code=503, detail="Service not ready")
+
+    deleted = vectorstore_service.delete_document(document_id)
+    if deleted == 0:
+        raise HTTPException(status_code=404, detail="Document not found")
+
+    return DocumentDeleteResponse(ok=True, deleted_chunks=deleted)
diff --git a/routers/health.py b/routers/health.py
new file mode 100644
index 0000000..85015f4
--- /dev/null
+++ b/routers/health.py
@@ -0,0 +1,29 @@
+from fastapi import APIRouter
+
+from config import settings
+from models.responses import HealthResponse
+
+router = APIRouter()
+
+
+@router.get("/health", response_model=HealthResponse)
+async def health():
+    from main import vectorstore_service
+
+    if vectorstore_service is None:
+        return HealthResponse(
+            status="loading",
+            chromadb="not_connected",
+            embedding_model=settings.embedding_model,
+            documents_count=0,
+            chunks_count=0,
+        )
+
+    stats = vectorstore_service.get_stats()
+    return HealthResponse(
+        status="ok",
+        chromadb="connected",
+        embedding_model=settings.embedding_model,
+        documents_count=stats["documents_count"],
+        chunks_count=stats["chunks_count"],
+    )
diff --git a/routers/query.py b/routers/query.py
new file mode 100644
index 0000000..cee837c
--- /dev/null
+++ b/routers/query.py
@@ -0,0 +1,47 @@
+import logging
+
+from fastapi import APIRouter, HTTPException
+
+from config import settings
+from models.requests import QueryRequest
+from models.responses import QueryResponse, SourceInfo
+from services.llm_client import LLMClient
+from services.rag_pipeline import rag_query
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(tags=["query"])
+
+
+@router.post("/query", response_model=QueryResponse)
+async def query_rag(request: QueryRequest):
+    from main import vectorstore_service
+
+    if vectorstore_service is None:
+        raise HTTPException(status_code=503, detail="Service not ready")
+
+    if not settings.deepseek_api_key:
+        raise HTTPException(status_code=500, detail="DEEPSEEK_API_KEY not configured")
+
+    llm_client = LLMClient()
+
+    try:
+        result = await rag_query(
+            vectorstore=vectorstore_service,
+            llm_client=llm_client,
+            question=request.text,
+            top_k=request.top_k,
+            document_ids=request.document_ids,
+            temperature=request.temperature,
+            max_tokens=request.max_tokens,
+        )
+    except Exception as e:
+        logger.exception("RAG query failed")
+        raise HTTPException(status_code=500, detail=f"RAG query error: {e}")
+
+    return QueryResponse(
+        answer=result["answer"],
+        sources=[SourceInfo(**s) for s in result["sources"]],
+        model_used=result["model_used"],
+        assembled_prompt=result.get("assembled_prompt", ""),
+    )
diff --git a/services/__init__.py b/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/services/document_processor.py b/services/document_processor.py
new file mode 100644
index 0000000..2659a05
--- /dev/null
+++ b/services/document_processor.py
@@ -0,0 +1,300 @@
+import io
+import logging
+import re
+import uuid
+from dataclasses import dataclass
+from pathlib import Path
+
+import fitz  # pymupdf
+from docx import Document as DocxDocument
+
+from config import settings
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ParsedSection:
+    heading: str
+    heading_level: int
+    body: str
+    page_number: int = 0
+
+
+@dataclass
+class Chunk:
+    text: str
+    section: str = ""
+    page_number: int = 0
+    chunk_index: int = 0
+
+
+# --- Parsers ---
+
+
+def parse_pdf(file_bytes: bytes) -> list[ParsedSection]:
+    doc = fitz.open(stream=file_bytes, filetype="pdf")
+    sections: list[ParsedSection] = []
+    current_heading = ""
+    current_body_lines: list[str] = []
+    current_page = 0
+
+    for page_num in range(len(doc)):
+        page = doc[page_num]
+        blocks = page.get_text("dict")["blocks"]
+
+        for block in blocks:
+            if "lines" not in block:
+                continue
+            for line in block["lines"]:
+                text = "".join(span["text"] for span in line["spans"]).strip()
+                if not text:
+                    continue
+
+                max_size = max(span["size"] for span in line["spans"])
+                is_bold = any("bold" in span["font"].lower() for span in line["spans"])
+
+                if (max_size >= 14 or (is_bold and max_size >= 12)) and len(text) < 200:
+                    if current_heading or current_body_lines:
+                        sections.append(ParsedSection(
+                            heading=current_heading,
+                            heading_level=1 if max_size >= 16 else 2,
+                            body="\n".join(current_body_lines).strip(),
+                            page_number=current_page,
+                        ))
+                    current_heading = text
+                    current_body_lines = []
+                    current_page = page_num + 1
+                else:
+                    current_body_lines.append(text)
+                    if not current_heading:
+                        current_page = page_num + 1
+
+    if current_heading or current_body_lines:
+        sections.append(ParsedSection(
+            heading=current_heading,
+            heading_level=2,
+            body="\n".join(current_body_lines).strip(),
+            page_number=current_page,
+        ))
+
+    doc.close()
+    return sections
+
+
+def parse_docx(file_bytes: bytes) -> list[ParsedSection]:
+    doc = DocxDocument(io.BytesIO(file_bytes))
+    sections: list[ParsedSection] = []
+    current_heading = ""
+    current_level = 0
+    current_body_lines: list[str] = []
+
+    for para in doc.paragraphs:
+        text = para.text.strip()
+        if not text:
+            continue
+
+        style_name = (para.style.name or "").lower()
+
+        if "heading" in style_name or "title" in style_name:
+            if current_heading or current_body_lines:
+                sections.append(ParsedSection(
+                    heading=current_heading,
+                    heading_level=current_level or 1,
+                    body="\n".join(current_body_lines).strip(),
+                ))
+            level_match = re.search(r"\d+", style_name)
+            current_level = int(level_match.group()) if level_match else 1
+            current_heading = text
+            current_body_lines = []
+        else:
+            current_body_lines.append(text)
+
+    if current_heading or current_body_lines:
+        sections.append(ParsedSection(
+            heading=current_heading,
+            heading_level=current_level or 1,
+            body="\n".join(current_body_lines).strip(),
+        ))
+
+    return sections
+
+
+def parse_text(file_bytes: bytes, is_markdown: bool = False) -> list[ParsedSection]:
+    """Parse wiki-style TXT/MD.
+
+    Эвристики под wiki операторов:
+    - markdown-заголовки (#, ##, ...)
+    - нумерованные пункты «1.», «1.1.», «1.1.1.»
+    - FAQ-паттерн «В:» / «Вопрос:» — воспринимаем как начало новой секции
+    - ALL-CAPS строки (короткие) — заголовок
+    """
+    text = file_bytes.decode("utf-8", errors="replace")
+    lines = text.split("\n")
+    sections: list[ParsedSection] = []
+    current_heading = ""
+    current_level = 0
+    current_body_lines: list[str] = []
+
+    md_heading_re = re.compile(r"^(#{1,6})\s+(.+)")
+    numbered_heading_re = re.compile(r"^(\d+(?:\.\d+)*\.?)\s+([А-ЯЁA-Z].*)")
+    faq_question_re = re.compile(r"^(В|Вопрос|Q|Question)\s*[:\.]\s*(.+)", re.IGNORECASE)
+
+    for line in lines:
+        stripped = line.strip()
+
+        heading_text = None
+        heading_level = 0
+
+        md_match = md_heading_re.match(stripped)
+        if md_match:
+            heading_level = len(md_match.group(1))
+            heading_text = md_match.group(2).strip()
+
+        if not heading_text:
+            num_match = numbered_heading_re.match(stripped)
+            if num_match and len(stripped) < 200:
+                dots = num_match.group(1).count(".")
+                heading_level = max(1, dots + 1)
+                heading_text = stripped
+
+        if not heading_text:
+            faq_match = faq_question_re.match(stripped)
+            if faq_match and len(stripped) < 300:
+                heading_text = faq_match.group(2).strip()
+                heading_level = 3
+
+        if not heading_text and stripped.isupper() and 3 < len(stripped) < 200:
+            heading_text = stripped
+            heading_level = 1
+
+        if heading_text:
+            if current_heading or current_body_lines:
+                sections.append(ParsedSection(
+                    heading=current_heading,
+                    heading_level=current_level or 1,
+                    body="\n".join(current_body_lines).strip(),
+                ))
+            current_heading = heading_text
+            current_level = heading_level
+            current_body_lines = []
+        else:
+            current_body_lines.append(line)
+
+    if current_heading or current_body_lines:
+        sections.append(ParsedSection(
+            heading=current_heading,
+            heading_level=current_level or 1,
+            body="\n".join(current_body_lines).strip(),
+        ))
+
+    return sections
+
+
+# --- Chunker ---
+
+
+def _split_sentences(text: str) -> list[str]:
+    sentences = re.split(r"(?<=[.!?])\s+", text)
+    return [s.strip() for s in sentences if s.strip()]
+
+
+def chunk_sections(
+    sections: list[ParsedSection],
+    max_chunk_size: int | None = None,
+    min_chunk_size: int | None = None,
+    overlap_sentences: int | None = None,
+) -> list[Chunk]:
+    """Чанкинг wiki-секций.
+
+    - Малые секции (FAQ-ответы) держим целиком — один чанк = одна тема.
+    - Большие секции (регламенты) режем по абзацам, с overlap последних N предложений.
+    - Мелкие соседние секции склеиваем, чтобы не плодить огрызки.
+    """
+    max_size = max_chunk_size or settings.max_chunk_size
+    min_size = min_chunk_size or settings.min_chunk_size
+    overlap = overlap_sentences or settings.overlap_sentences
+
+    raw_chunks: list[Chunk] = []
+
+    for section in sections:
+        heading_prefix = f"{section.heading}\n\n" if section.heading else ""
+        full_text = heading_prefix + section.body
+
+        if len(full_text) <= max_size:
+            raw_chunks.append(Chunk(
+                text=full_text.strip(),
+                section=section.heading,
+                page_number=section.page_number,
+            ))
+        else:
+            paragraphs = section.body.split("\n")
+            current_text = heading_prefix
+            for para in paragraphs:
+                if len(current_text) + len(para) + 1 > max_size and len(current_text) > len(heading_prefix):
+                    raw_chunks.append(Chunk(
+                        text=current_text.strip(),
+                        section=section.heading,
+                        page_number=section.page_number,
+                    ))
+                    current_text = heading_prefix + para + "\n"
+                else:
+                    current_text += para + "\n"
+            if current_text.strip() and current_text.strip() != heading_prefix.strip():
+                raw_chunks.append(Chunk(
+                    text=current_text.strip(),
+                    section=section.heading,
+                    page_number=section.page_number,
+                ))
+
+    merged: list[Chunk] = []
+    for chunk in raw_chunks:
+        if merged and len(merged[-1].text) < min_size:
+            merged[-1].text += "\n\n" + chunk.text
+            if not merged[-1].section:
+                merged[-1].section = chunk.section
+        else:
+            merged.append(Chunk(
+                text=chunk.text,
+                section=chunk.section,
+                page_number=chunk.page_number,
+            ))
+
+    final: list[Chunk] = []
+    for i, chunk in enumerate(merged):
+        if i > 0 and overlap > 0:
+            prev_sentences = _split_sentences(merged[i - 1].text)
+            overlap_text = " ".join(prev_sentences[-overlap:])
+            if overlap_text and overlap_text not in chunk.text:
+                chunk.text = overlap_text + "\n\n" + chunk.text
+        chunk.chunk_index = i
+        final.append(chunk)
+
+    return final
+
+
+# --- Main processor ---
+
+
+def process_document(file_bytes: bytes, filename: str) -> tuple[str, list[ParsedSection], list[Chunk]]:
+    document_id = str(uuid.uuid4())
+    ext = Path(filename).suffix.lower()
+
+    if ext == ".pdf":
+        sections = parse_pdf(file_bytes)
+    elif ext in (".docx", ".doc"):
+        sections = parse_docx(file_bytes)
+    elif ext == ".md":
+        sections = parse_text(file_bytes, is_markdown=True)
+    elif ext == ".txt":
+        sections = parse_text(file_bytes, is_markdown=False)
+    else:
+        raise ValueError(f"Unsupported file format: {ext}")
+
+    if not sections:
+        logger.warning("No sections found in %s", filename)
+        return document_id, [], []
+
+    chunks = chunk_sections(sections)
+    logger.info("Processed '%s': %d sections → %d chunks", filename, len(sections), len(chunks))
+    return document_id, sections, chunks
diff --git a/services/embeddings.py b/services/embeddings.py
new file mode 100644
index 0000000..3f9427a
--- /dev/null
+++ b/services/embeddings.py
@@ -0,0 +1,22 @@
+import logging
+
+from sentence_transformers import SentenceTransformer
+
+logger = logging.getLogger(__name__)
+
+
+class EmbeddingService:
+    def __init__(self, model_name: str = "intfloat/multilingual-e5-large"):
+        logger.info("Loading embedding model: %s", model_name)
+        self.model = SentenceTransformer(model_name)
+        self.model_name = model_name
+        logger.info("Embedding model loaded (dim=%d)", self.model.get_sentence_embedding_dimension())
+
+    def embed_documents(self, texts: list[str]) -> list[list[float]]:
+        prefixed = [f"passage: {t}" for t in texts]
+        embeddings = self.model.encode(prefixed, normalize_embeddings=True, show_progress_bar=False)
+        return embeddings.tolist()
+
+    def embed_query(self, query: str) -> list[float]:
+        embedding = self.model.encode(f"query: {query}", normalize_embeddings=True)
+        return embedding.tolist()
diff --git a/services/llm_client.py b/services/llm_client.py
new file mode 100644
index 0000000..fc7745c
--- /dev/null
+++ b/services/llm_client.py
@@ -0,0 +1,104 @@
+import logging
+
+import httpx
+
+from config import settings
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_SYSTEM_PROMPT = """Ты — виртуальный ассистент клиники, который первым отвечает пациентам в чате.
+
+Твоя задача — помочь пациенту по бытовым и организационным вопросам: запись, расписание врачей, подготовка к приёму, как проехать, документы, оплата, ДМС, детский приём и т. п.
+
+Правила:
+- Отвечай коротко, дружелюбно, на «вы», простым русским языком без медицинской латыни.
+- Опирайся ТОЛЬКО на предоставленные выдержки из базы знаний. Если ответа в них нет — честно скажи, что уточнишь у оператора, и предложи подключить оператора.
+- Не ставь диагнозы и не назначай лечение. Если вопрос про симптомы, лекарства, дозировки или «что со мной» — мягко предложи записаться к врачу и подключить оператора, если нужно.
+- Не выдумывай телефоны, адреса, цены, имена врачей, расписание. Только из источников.
+- Если пациент просит оператора — коротко подтверди, что сейчас его подключишь.
+- Источники указывать не нужно: пациент их не видит. Ответ — обычный текст, как в чате."""
+
+DEFAULT_USER_TEMPLATE = """Вопрос пациента:
+{question}
+
+Выдержки из базы знаний операторов:
+{sources}
+
+Ответь пациенту в чате по правилам из системного сообщения."""
+
+
+class LLMClient:
+    def __init__(
+        self,
+        api_key: str | None = None,
+        model: str | None = None,
+        base_url: str | None = None,
+    ):
+        self.api_key = api_key or settings.deepseek_api_key
+        self.model = model or settings.deepseek_model
+        self.base_url = (base_url or settings.deepseek_base_url).rstrip("/")
+
+    def _format_sources(self, sources: list[dict]) -> str:
+        if not sources:
+            return "(источники не найдены)"
+        lines = []
+        for i, src in enumerate(sources, 1):
+            meta = src.get("metadata", {})
+            doc_name = meta.get("document_name", "Документ")
+            section = meta.get("section", "")
+            lines.append(
+                f"[{i}] {src['text']}\n"
+                f"    (Источник: {doc_name}, раздел: {section})"
+            )
+        return "\n".join(lines)
+
+    async def answer(
+        self,
+        question: str,
+        sources: list[dict],
+        system_prompt: str | None = None,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+    ) -> dict:
+        """Generate a patient-facing answer using RAG context.
+
+        Returns dict with 'text' and 'assembled_prompt'.
+        """
+        effective_system = system_prompt or DEFAULT_SYSTEM_PROMPT
+        effective_temp = temperature if temperature is not None else 0.2
+        effective_max_tokens = max_tokens or 1200
+
+        formatted_sources = self._format_sources(sources)
+        user_message = DEFAULT_USER_TEMPLATE.format(
+            question=question,
+            sources=formatted_sources,
+        )
+
+        assembled_prompt = f"[SYSTEM]\n{effective_system}\n\n[USER]\n{user_message}"
+
+        url = f"{self.base_url}/chat/completions"
+        payload = {
+            "model": self.model,
+            "messages": [
+                {"role": "system", "content": effective_system},
+                {"role": "user", "content": user_message},
+            ],
+            "temperature": effective_temp,
+            "max_tokens": effective_max_tokens,
+        }
+
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            response = await client.post(
+                url,
+                json=payload,
+                headers={
+                    "Authorization": f"Bearer {self.api_key}",
+                    "Content-Type": "application/json",
+                },
+            )
+            response.raise_for_status()
+            data = response.json()
+
+        content = data["choices"][0]["message"]["content"]
+        logger.info("LLM response: %d chars, model=%s, temp=%.2f", len(content), self.model, effective_temp)
+        return {"text": content.strip(), "assembled_prompt": assembled_prompt}
diff --git a/services/rag_pipeline.py b/services/rag_pipeline.py
new file mode 100644
index 0000000..8efce7e
--- /dev/null
+++ b/services/rag_pipeline.py
@@ -0,0 +1,52 @@
+import logging
+
+from services.llm_client import LLMClient
+from services.vectorstore import VectorStoreService
+
+logger = logging.getLogger(__name__)
+
+
+async def rag_query(
+    vectorstore: VectorStoreService,
+    llm_client: LLMClient,
+    question: str,
+    top_k: int = 5,
+    document_ids: list[str] | None = None,
+    temperature: float | None = None,
+    max_tokens: int | None = None,
+) -> dict:
+    """Pipeline: retrieve → augment → generate для одиночного вопроса пациента."""
+    logger.info("RAG query: %s", question[:200])
+
+    retrieved = vectorstore.query(
+        query_text=question,
+        top_k=top_k,
+        document_ids=document_ids,
+    )
+    logger.info("Retrieved %d chunks", len(retrieved))
+
+    llm_result = await llm_client.answer(
+        question=question,
+        sources=retrieved,
+        temperature=temperature,
+        max_tokens=max_tokens,
+    )
+
+    sources = []
+    for item in retrieved:
+        meta = item.get("metadata", {})
+        sources.append({
+            "document_id": meta.get("document_id", ""),
+            "document_name": meta.get("document_name", ""),
+            "chunk_text": item["text"][:500],
+            "section": meta.get("section", ""),
+            "page": meta.get("page_number", 0),
+            "relevance_score": round(item.get("relevance_score", 0), 3),
+        })
+
+    return {
+        "answer": llm_result["text"],
+        "sources": sources,
+        "model_used": llm_client.model,
+        "assembled_prompt": llm_result["assembled_prompt"],
+    }
diff --git a/services/vectorstore.py b/services/vectorstore.py
new file mode 100644
index 0000000..33d7ea8
--- /dev/null
+++ b/services/vectorstore.py
@@ -0,0 +1,145 @@
+import logging
+from datetime import datetime, timezone
+
+import chromadb
+
+from services.embeddings import EmbeddingService
+
+logger = logging.getLogger(__name__)
+
+COLLECTION_NAME = "operators_wiki"
+
+
+class VectorStoreService:
+    def __init__(self, persist_dir: str, embedding_service: EmbeddingService):
+        self.client = chromadb.PersistentClient(path=persist_dir)
+        self.embedding_service = embedding_service
+        self.collection = self.client.get_or_create_collection(
+            name=COLLECTION_NAME,
+            metadata={"hnsw:space": "cosine"},
+        )
+        logger.info("ChromaDB collection '%s': %d items", COLLECTION_NAME, self.collection.count())
+
+    def add_document(
+        self,
+        document_id: str,
+        document_name: str,
+        file_type: str,
+        chunks: list[dict],
+    ) -> int:
+        if not chunks:
+            return 0
+
+        texts = [c["text"] for c in chunks]
+        embeddings = self.embedding_service.embed_documents(texts)
+
+        ids = []
+        metadatas = []
+        now = datetime.now(timezone.utc).isoformat()
+
+        for i, chunk in enumerate(chunks):
+            ids.append(f"{document_id}_chunk_{i}")
+            metadatas.append({
+                "document_id": document_id,
+                "document_name": document_name,
+                "file_type": file_type,
+                "section": chunk.get("section", ""),
+                "page_number": chunk.get("page_number", 0),
+                "chunk_index": i,
+                "created_at": now,
+            })
+
+        self.collection.add(
+            ids=ids,
+            embeddings=embeddings,
+            documents=texts,
+            metadatas=metadatas,
+        )
+        logger.info("Added %d chunks for document '%s'", len(chunks), document_name)
+        return len(chunks)
+
+    def query(
+        self,
+        query_text: str,
+        top_k: int = 5,
+        document_ids: list[str] | None = None,
+    ) -> list[dict]:
+        query_embedding = self.embedding_service.embed_query(query_text)
+
+        where_filter = None
+        if document_ids:
+            if len(document_ids) == 1:
+                where_filter = {"document_id": document_ids[0]}
+            else:
+                where_filter = {"document_id": {"$in": document_ids}}
+
+        results = self.collection.query(
+            query_embeddings=[query_embedding],
+            n_results=top_k,
+            where=where_filter,
+            include=["documents", "metadatas", "distances"],
+        )
+
+        items = []
+        if results["ids"] and results["ids"][0]:
+            for i, chunk_id in enumerate(results["ids"][0]):
+                items.append({
+                    "chunk_id": chunk_id,
+                    "text": results["documents"][0][i],
+                    "metadata": results["metadatas"][0][i],
+                    "distance": results["distances"][0][i],
+                    "relevance_score": 1 - results["distances"][0][i],
+                })
+        return items
+
+    def delete_document(self, document_id: str) -> int:
+        existing = self.collection.get(where={"document_id": document_id}, include=[])
+        count = len(existing["ids"])
+        if count > 0:
+            self.collection.delete(ids=existing["ids"])
+        logger.info("Deleted %d chunks for document_id=%s", count, document_id)
+        return count
+
+    def list_documents(self) -> list[dict]:
+        all_items = self.collection.get(include=["metadatas"])
+        docs: dict[str, dict] = {}
+        for meta in all_items["metadatas"]:
+            doc_id = meta["document_id"]
+            if doc_id not in docs:
+                docs[doc_id] = {
+                    "document_id": doc_id,
+                    "name": meta.get("document_name", ""),
+                    "file_type": meta.get("file_type", ""),
+                    "created_at": meta.get("created_at", ""),
+                    "chunks_count": 0,
+                    "metadata": {},
+                }
+            docs[doc_id]["chunks_count"] += 1
+        return list(docs.values())
+
+    def get_document_chunks(self, document_id: str) -> list[dict]:
+        """Return all chunks for a document, sorted by chunk_index."""
+        results = self.collection.get(
+            where={"document_id": document_id},
+            include=["documents", "metadatas"],
+        )
+        items = []
+        if results["ids"]:
+            for i, chunk_id in enumerate(results["ids"]):
+                items.append({
+                    "chunk_id": chunk_id,
+                    "text": results["documents"][i],
+                    "metadata": results["metadatas"][i],
+                })
+        items.sort(key=lambda x: x["metadata"].get("chunk_index", 0))
+        return items
+
+    def get_stats(self) -> dict:
+        all_items = self.collection.get(include=["metadatas"])
+        doc_ids = set()
+        for meta in all_items["metadatas"]:
+            doc_ids.add(meta.get("document_id", ""))
+        return {
+            "documents_count": len(doc_ids),
+            "chunks_count": self.collection.count(),
+        }
diff --git a/static/index.html b/static/index.html
new file mode 100644
index 0000000..efbb491
--- /dev/null
+++ b/static/index.html
@@ -0,0 +1,571 @@
+<!DOCTYPE html>
+<html lang="ru">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Chat Agent for Patients — Debug UI</title>
+<style>
+  :root {
+    --bg: #f5f6f8;
+    --panel: #ffffff;
+    --border: #e1e4ea;
+    --muted: #6b7280;
+    --fg: #111827;
+    --accent: #2563eb;
+    --accent-hover: #1d4ed8;
+    --ok: #16a34a;
+    --warn: #d97706;
+    --err: #dc2626;
+    --chip-bg: #eef2ff;
+    --mono: ui-monospace, SFMono-Regular, Menlo, monospace;
+  }
+  * { box-sizing: border-box; }
+  body {
+    margin: 0;
+    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+    background: var(--bg);
+    color: var(--fg);
+    font-size: 14px;
+    line-height: 1.5;
+  }
+  header {
+    background: var(--panel);
+    border-bottom: 1px solid var(--border);
+    padding: 14px 24px;
+    display: flex;
+    align-items: center;
+    gap: 16px;
+    position: sticky;
+    top: 0;
+    z-index: 10;
+  }
+  header h1 {
+    margin: 0;
+    font-size: 16px;
+    font-weight: 600;
+  }
+  .status {
+    display: inline-flex;
+    align-items: center;
+    gap: 6px;
+    padding: 4px 10px;
+    border-radius: 999px;
+    background: var(--chip-bg);
+    font-size: 13px;
+  }
+  .dot {
+    width: 8px;
+    height: 8px;
+    border-radius: 50%;
+    background: var(--muted);
+  }
+  .dot.ok { background: var(--ok); }
+  .dot.warn { background: var(--warn); }
+  .dot.err { background: var(--err); }
+  .stats {
+    margin-left: auto;
+    font-size: 13px;
+    color: var(--muted);
+  }
+  .stats b { color: var(--fg); }
+  main {
+    padding: 24px;
+    max-width: 1400px;
+    margin: 0 auto;
+    display: grid;
+    gap: 24px;
+  }
+  .panel {
+    background: var(--panel);
+    border: 1px solid var(--border);
+    border-radius: 12px;
+    padding: 20px;
+  }
+  .panel h2 {
+    margin: 0 0 16px 0;
+    font-size: 15px;
+    font-weight: 600;
+  }
+  .dropzone {
+    border: 2px dashed var(--border);
+    border-radius: 10px;
+    padding: 28px;
+    text-align: center;
+    color: var(--muted);
+    cursor: pointer;
+    transition: all 0.15s;
+  }
+  .dropzone.drag { border-color: var(--accent); background: var(--chip-bg); color: var(--fg); }
+  .dropzone input { display: none; }
+  table {
+    width: 100%;
+    border-collapse: collapse;
+    margin-top: 16px;
+    font-size: 13px;
+  }
+  th, td {
+    text-align: left;
+    padding: 8px 10px;
+    border-bottom: 1px solid var(--border);
+    vertical-align: top;
+  }
+  th {
+    font-weight: 600;
+    color: var(--muted);
+    font-size: 12px;
+    text-transform: uppercase;
+    letter-spacing: 0.03em;
+  }
+  tr:last-child td { border-bottom: none; }
+  tr.doc-row { cursor: pointer; }
+  tr.doc-row:hover td:not(:last-child) { background: #f9fafb; }
+  tr.doc-row .arrow { display: inline-block; transition: transform 0.15s; color: var(--muted); margin-right: 6px; }
+  tr.doc-row.open .arrow { transform: rotate(90deg); }
+  tr.chunks-row td { padding: 0; background: #fafbfd; }
+  tr.chunks-row .chunks-body { padding: 14px 16px; }
+  .chunk-card {
+    background: white;
+    border: 1px solid var(--border);
+    border-radius: 8px;
+    padding: 10px 12px;
+    margin-bottom: 8px;
+  }
+  .chunk-card-meta {
+    font-size: 11px;
+    color: var(--muted);
+    margin-bottom: 6px;
+    display: flex;
+    gap: 10px;
+    flex-wrap: wrap;
+  }
+  .chunk-card-text {
+    white-space: pre-wrap;
+    font-size: 13px;
+    word-break: break-word;
+  }
+  .chunk-card-toggle {
+    color: var(--accent);
+    cursor: pointer;
+    font-size: 12px;
+    border: none;
+    background: none;
+    padding: 4px 0 0 0;
+  }
+  .empty {
+    padding: 20px;
+    color: var(--muted);
+    text-align: center;
+    font-style: italic;
+  }
+  button {
+    font: inherit;
+    padding: 8px 14px;
+    border-radius: 8px;
+    border: 1px solid var(--border);
+    background: var(--panel);
+    cursor: pointer;
+  }
+  button.primary {
+    background: var(--accent);
+    border-color: var(--accent);
+    color: white;
+  }
+  button.primary:hover:not(:disabled) { background: var(--accent-hover); }
+  button.danger {
+    color: var(--err);
+    border-color: var(--err);
+    background: transparent;
+    font-size: 12px;
+    padding: 4px 10px;
+  }
+  button:disabled { opacity: 0.5; cursor: not-allowed; }
+  .row { display: flex; gap: 12px; align-items: center; flex-wrap: wrap; }
+  .row label { color: var(--muted); font-size: 13px; }
+  textarea, input[type=number], input[type=text] {
+    font: inherit;
+    padding: 8px 10px;
+    border: 1px solid var(--border);
+    border-radius: 8px;
+    width: 100%;
+    background: white;
+  }
+  textarea { min-height: 90px; resize: vertical; }
+  .num { width: 80px; }
+  .columns {
+    display: grid;
+    grid-template-columns: 1fr 1fr 1fr;
+    gap: 16px;
+    margin-top: 16px;
+  }
+  .col {
+    border: 1px solid var(--border);
+    border-radius: 10px;
+    padding: 14px;
+    background: #fafbfd;
+    min-height: 200px;
+    overflow: hidden;
+  }
+  .col h3 {
+    margin: 0 0 10px 0;
+    font-size: 12px;
+    font-weight: 600;
+    color: var(--muted);
+    text-transform: uppercase;
+    letter-spacing: 0.03em;
+  }
+  .chunk {
+    border: 1px solid var(--border);
+    border-radius: 8px;
+    padding: 10px;
+    margin-bottom: 8px;
+    background: white;
+  }
+  .chunk-head {
+    display: flex;
+    justify-content: space-between;
+    font-size: 11px;
+    color: var(--muted);
+    margin-bottom: 6px;
+  }
+  .score {
+    background: var(--chip-bg);
+    padding: 1px 6px;
+    border-radius: 999px;
+    color: var(--accent);
+    font-weight: 600;
+  }
+  pre {
+    font-family: var(--mono);
+    font-size: 12px;
+    white-space: pre-wrap;
+    word-break: break-word;
+    margin: 0;
+    background: white;
+    padding: 10px;
+    border: 1px solid var(--border);
+    border-radius: 8px;
+    max-height: 500px;
+    overflow: auto;
+  }
+  .answer {
+    background: white;
+    border: 1px solid var(--border);
+    border-radius: 8px;
+    padding: 14px;
+    white-space: pre-wrap;
+    word-break: break-word;
+    line-height: 1.6;
+  }
+  .answer-meta {
+    margin-top: 8px;
+    font-size: 11px;
+    color: var(--muted);
+  }
+  .toast {
+    position: fixed;
+    bottom: 20px;
+    right: 20px;
+    background: #111827;
+    color: white;
+    padding: 10px 16px;
+    border-radius: 8px;
+    font-size: 13px;
+    opacity: 0;
+    transition: opacity 0.2s;
+    pointer-events: none;
+  }
+  .toast.show { opacity: 1; }
+  .toast.err { background: var(--err); }
+  .mini { font-size: 12px; color: var(--muted); }
+  .spinner {
+    width: 14px; height: 14px;
+    border: 2px solid var(--border);
+    border-top-color: var(--accent);
+    border-radius: 50%;
+    display: inline-block;
+    animation: spin 0.8s linear infinite;
+    vertical-align: middle;
+  }
+  @keyframes spin { to { transform: rotate(360deg); } }
+  @media (max-width: 900px) {
+    .columns { grid-template-columns: 1fr; }
+  }
+</style>
+</head>
+<body>
+
+<header>
+  <h1>Chat Agent for Patients — Debug</h1>
+  <span class="status"><span class="dot" id="dot"></span><span id="status-text">проверяю…</span></span>
+  <span class="stats" id="stats"></span>
+</header>
+
+<main>
+
+  <section class="panel">
+    <h2>База знаний</h2>
+    <div id="dropzone" class="dropzone">
+      Перетащи файл (.pdf, .docx, .txt, .md) или кликни для выбора
+      <input type="file" id="file-input" accept=".pdf,.docx,.doc,.txt,.md">
+    </div>
+    <div id="upload-status" class="mini" style="margin-top:10px;"></div>
+    <table>
+      <thead>
+        <tr>
+          <th>Имя</th>
+          <th>Тип</th>
+          <th>Чанков</th>
+          <th>Загружен</th>
+          <th></th>
+        </tr>
+      </thead>
+      <tbody id="docs-tbody">
+        <tr><td colspan="5" class="empty">Документы ещё не загружены</td></tr>
+      </tbody>
+    </table>
+  </section>
+
+  <section class="panel">
+    <h2>Тест-вопрос от пациента</h2>
+    <textarea id="question" placeholder="Например: как записать ребёнка к лору?"></textarea>
+    <div class="row" style="margin-top:12px;">
+      <label>top_k <input type="number" class="num" id="top_k" value="5" min="1" max="20"></label>
+      <label>temperature <input type="number" class="num" id="temperature" value="0.2" min="0" max="2" step="0.1"></label>
+      <button class="primary" id="ask-btn">Отправить</button>
+      <span id="ask-status" class="mini"></span>
+    </div>
+
+    <div class="columns">
+      <div class="col">
+        <h3>Что нашёл RAG</h3>
+        <div id="col-chunks"><div class="mini">— пока пусто —</div></div>
+      </div>
+      <div class="col">
+        <h3>Собранный промпт</h3>
+        <div id="col-prompt"><div class="mini">— пока пусто —</div></div>
+      </div>
+      <div class="col">
+        <h3>Ответ агента</h3>
+        <div id="col-answer"><div class="mini">— пока пусто —</div></div>
+      </div>
+    </div>
+  </section>
+
+</main>
+
+<div class="toast" id="toast"></div>
+
+<script>
+const API = "";
+
+const $ = (id) => document.getElementById(id);
+const esc = (s) => String(s ?? "").replace(/[&<>"']/g, c => ({'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'}[c]));
+
+function toast(msg, kind = "ok") {
+  const t = $("toast");
+  t.textContent = msg;
+  t.className = "toast show" + (kind === "err" ? " err" : "");
+  setTimeout(() => t.className = "toast", 2500);
+}
+
+async function api(path, opts = {}) {
+  const res = await fetch(API + path, opts);
+  if (!res.ok) {
+    let detail = res.statusText;
+    try { const j = await res.json(); detail = j.detail || detail; } catch {}
+    throw new Error(detail);
+  }
+  return res.json();
+}
+
+async function refreshHealth() {
+  try {
+    const h = await api("/health");
+    $("dot").className = "dot " + (h.status === "ok" ? "ok" : "warn");
+    $("status-text").textContent = h.status === "ok" ? "готов" : h.status;
+    $("stats").innerHTML = `модель <b>${esc(h.embedding_model)}</b> · документов <b>${h.documents_count}</b> · чанков <b>${h.chunks_count}</b>`;
+  } catch (e) {
+    $("dot").className = "dot err";
+    $("status-text").textContent = "недоступен";
+    $("stats").textContent = "";
+  }
+}
+
+async function refreshDocs() {
+  try {
+    const r = await api("/documents");
+    const tbody = $("docs-tbody");
+    if (!r.documents.length) {
+      tbody.innerHTML = '<tr><td colspan="5" class="empty">Документы ещё не загружены</td></tr>';
+      return;
+    }
+    tbody.innerHTML = r.documents.map(d => `
+      <tr class="doc-row" id="doc-${d.document_id}" onclick="toggleChunks('${d.document_id}')">
+        <td><span class="arrow">▶</span>${esc(d.name)}</td>
+        <td>${esc(d.file_type)}</td>
+        <td>${d.chunks_count}</td>
+        <td class="mini">${esc((d.created_at || "").slice(0, 19).replace("T", " "))}</td>
+        <td><button class="danger" onclick="event.stopPropagation(); deleteDoc('${d.document_id}', '${esc(d.name)}')">удалить</button></td>
+      </tr>
+      <tr class="chunks-row" id="chunks-${d.document_id}" style="display:none;">
+        <td colspan="5"><div class="chunks-body"><div class="mini">загружаю…</div></div></td>
+      </tr>
+    `).join("");
+  } catch (e) {
+    toast("Не удалось загрузить список: " + e.message, "err");
+  }
+}
+
+async function toggleChunks(id) {
+  const row = $("doc-" + id);
+  const chunksRow = $("chunks-" + id);
+  const isOpen = chunksRow.style.display !== "none";
+  if (isOpen) {
+    chunksRow.style.display = "none";
+    row.classList.remove("open");
+    return;
+  }
+  chunksRow.style.display = "";
+  row.classList.add("open");
+  const body = chunksRow.querySelector(".chunks-body");
+  body.innerHTML = '<div class="mini">загружаю…</div>';
+  try {
+    const d = await api(`/documents/${id}/chunks`);
+    if (!d.chunks.length) {
+      body.innerHTML = '<div class="mini">чанков нет</div>';
+      return;
+    }
+    body.innerHTML = d.chunks.map(c => {
+      const long = c.text.length > 300;
+      const short = long ? c.text.slice(0, 300) + "…" : c.text;
+      const safeFull = esc(c.text).replace(/"/g, "&quot;");
+      return `
+        <div class="chunk-card">
+          <div class="chunk-card-meta">
+            <span>#${c.index}</span>
+            <span>раздел: ${esc(c.section || "—")}</span>
+            ${c.page_number ? `<span>стр. ${c.page_number}</span>` : ""}
+            <span>${c.char_length.toLocaleString("ru")} симв.</span>
+          </div>
+          <div class="chunk-card-text" data-short="${esc(short).replace(/"/g, "&quot;")}" data-full="${safeFull}" data-expanded="0">${esc(short)}</div>
+          ${long ? '<button class="chunk-card-toggle" onclick="toggleChunkText(this)">показать полностью</button>' : ""}
+        </div>
+      `;
+    }).join("");
+  } catch (e) {
+    body.innerHTML = `<div class="mini" style="color:var(--err)">Ошибка: ${esc(e.message)}</div>`;
+  }
+}
+
+function toggleChunkText(btn) {
+  const textEl = btn.previousElementSibling;
+  const expanded = textEl.getAttribute("data-expanded") === "1";
+  textEl.textContent = expanded ? textEl.getAttribute("data-short") : textEl.getAttribute("data-full");
+  textEl.setAttribute("data-expanded", expanded ? "0" : "1");
+  btn.textContent = expanded ? "показать полностью" : "свернуть";
+}
+
+async function deleteDoc(id, name) {
+  if (!confirm(`Удалить документ «${name}»?`)) return;
+  try {
+    await api(`/documents/${id}`, { method: "DELETE" });
+    toast("Удалён");
+    refreshDocs(); refreshHealth();
+  } catch (e) {
+    toast("Ошибка: " + e.message, "err");
+  }
+}
+
+async function uploadFile(file) {
+  $("upload-status").innerHTML = `<span class="spinner"></span> загружаю <b>${esc(file.name)}</b>…`;
+  const fd = new FormData();
+  fd.append("file", file);
+  try {
+    const r = await api("/documents/upload", { method: "POST", body: fd });
+    $("upload-status").innerHTML = `✓ <b>${esc(r.name)}</b> — ${r.chunks_count} чанков`;
+    toast("Загружено");
+    refreshDocs(); refreshHealth();
+  } catch (e) {
+    $("upload-status").innerHTML = `✕ ошибка: ${esc(e.message)}`;
+    toast("Ошибка загрузки: " + e.message, "err");
+  }
+}
+
+function initDropzone() {
+  const dz = $("dropzone");
+  const input = $("file-input");
+  dz.addEventListener("click", () => input.click());
+  input.addEventListener("change", () => {
+    if (input.files[0]) uploadFile(input.files[0]);
+    input.value = "";
+  });
+  ["dragenter", "dragover"].forEach(e =>
+    dz.addEventListener(e, ev => { ev.preventDefault(); dz.classList.add("drag"); })
+  );
+  ["dragleave", "drop"].forEach(e =>
+    dz.addEventListener(e, ev => { ev.preventDefault(); dz.classList.remove("drag"); })
+  );
+  dz.addEventListener("drop", ev => {
+    if (ev.dataTransfer.files[0]) uploadFile(ev.dataTransfer.files[0]);
+  });
+}
+
+async function ask() {
+  const question = $("question").value.trim();
+  if (!question) { toast("Введите вопрос", "err"); return; }
+  const btn = $("ask-btn");
+  btn.disabled = true;
+  $("ask-status").innerHTML = '<span class="spinner"></span> думаю…';
+  $("col-chunks").innerHTML = '<div class="mini">…</div>';
+  $("col-prompt").innerHTML = '<div class="mini">…</div>';
+  $("col-answer").innerHTML = '<div class="mini">…</div>';
+
+  try {
+    const r = await api("/query", {
+      method: "POST",
+      headers: {"Content-Type": "application/json"},
+      body: JSON.stringify({
+        text: question,
+        top_k: parseInt($("top_k").value, 10) || 5,
+        temperature: parseFloat($("temperature").value),
+      }),
+    });
+
+    $("col-chunks").innerHTML = r.sources.length
+      ? r.sources.map((s, i) => `
+          <div class="chunk">
+            <div class="chunk-head">
+              <span>[${i + 1}] ${esc(s.document_name)} · ${esc(s.section || "—")}</span>
+              <span class="score">${(s.relevance_score * 100).toFixed(1)}%</span>
+            </div>
+            <div>${esc(s.chunk_text)}</div>
+          </div>`).join("")
+      : '<div class="mini">— нет релевантных чанков —</div>';
+
+    $("col-prompt").innerHTML = `<pre>${esc(r.assembled_prompt)}</pre>`;
+    $("col-answer").innerHTML = `
+      <div class="answer">${esc(r.answer)}</div>
+      <div class="answer-meta">модель: ${esc(r.model_used)} · источников: ${r.sources.length}</div>
+    `;
+    $("ask-status").textContent = "";
+  } catch (e) {
+    $("col-answer").innerHTML = `<div class="mini" style="color:var(--err)">Ошибка: ${esc(e.message)}</div>`;
+    $("ask-status").textContent = "";
+    toast("Ошибка: " + e.message, "err");
+  } finally {
+    btn.disabled = false;
+  }
+}
+
+$("ask-btn").addEventListener("click", ask);
+$("question").addEventListener("keydown", e => {
+  if ((e.metaKey || e.ctrlKey) && e.key === "Enter") ask();
+});
+
+initDropzone();
+refreshHealth();
+refreshDocs();
+setInterval(refreshHealth, 15000);
+</script>
+</body>
+</html>