diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..a93d34a
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,6 @@
+CHROMA_PERSIST_DIR=./data/chroma
+EMBEDDING_MODEL=intfloat/multilingual-e5-large
+DEEPSEEK_API_KEY=sk-your-key-here
+DEEPSEEK_MODEL=deepseek-chat
+DEEPSEEK_BASE_URL=https://api.deepseek.com
+LOG_LEVEL=info
diff --git a/.gitignore b/.gitignore
index af23dc6..3431547 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@ data/chroma/
*.egg-info/
.venv/
.DS_Store
+server.log
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..69467fa
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,16 @@
+FROM python:3.11-slim
+
+RUN apt-get update && apt-get install -y \
+ curl \
+ && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+EXPOSE 8003
+
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8003"]
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..662e9e7
--- /dev/null
+++ b/config.py
@@ -0,0 +1,19 @@
+from pydantic_settings import BaseSettings
+
+
+class Settings(BaseSettings):
+ chroma_persist_dir: str = "./data/chroma"
+ embedding_model: str = "intfloat/multilingual-e5-large"
+ deepseek_api_key: str = ""
+ deepseek_model: str = "deepseek-chat"
+ deepseek_base_url: str = "https://api.deepseek.com"
+ log_level: str = "info"
+
+ max_chunk_size: int = 1200
+ min_chunk_size: int = 200
+ overlap_sentences: int = 2
+
+ model_config = {"env_file": ".env", "env_file_encoding": "utf-8"}
+
+
+settings = Settings()
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..9e617ab
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,24 @@
+services:
+ rag-service:
+ build: .
+ ports:
+ - "8003:8003"
+ volumes:
+ - chroma_data:/app/data
+ - embedding_cache:/root/.cache
+ environment:
+ CHROMA_PERSIST_DIR: /app/data/chroma
+ EMBEDDING_MODEL: intfloat/multilingual-e5-large
+ DEEPSEEK_API_KEY: ${DEEPSEEK_API_KEY}
+ DEEPSEEK_MODEL: ${DEEPSEEK_MODEL:-deepseek-chat}
+ LOG_LEVEL: ${LOG_LEVEL:-info}
+ healthcheck:
+ test: ["CMD-SHELL", "curl -sf http://localhost:8003/health || exit 1"]
+ interval: 15s
+ timeout: 10s
+ retries: 10
+ start_period: 180s
+
+volumes:
+ chroma_data:
+ embedding_cache:
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..11ada98
--- /dev/null
+++ b/main.py
@@ -0,0 +1,55 @@
+import logging
+from contextlib import asynccontextmanager
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+
+from config import settings
+from services.embeddings import EmbeddingService
+from services.vectorstore import VectorStoreService
+
+logger = logging.getLogger(__name__)
+
+embedding_service: EmbeddingService | None = None
+vectorstore_service: VectorStoreService | None = None
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+ global embedding_service, vectorstore_service
+ logging.basicConfig(level=getattr(logging, settings.log_level.upper(), logging.INFO))
+ logger.info("Loading embedding model: %s", settings.embedding_model)
+ embedding_service = EmbeddingService(settings.embedding_model)
+ logger.info("Embedding model loaded")
+ vectorstore_service = VectorStoreService(
+ persist_dir=settings.chroma_persist_dir,
+ embedding_service=embedding_service,
+ )
+ logger.info("ChromaDB initialized at %s", settings.chroma_persist_dir)
+ yield
+ logger.info("Shutting down")
+
+
+app = FastAPI(
+ title="Chat Agent for Patients — Tuning Tool",
+ description="RAG-ядро и инструмент настройки пациентского чат-агента",
+ version="0.1.0",
+ lifespan=lifespan,
+)
+
+app.add_middleware(
+ CORSMiddleware,
+ allow_origins=["*"],
+ allow_credentials=True,
+ allow_methods=["*"],
+ allow_headers=["*"],
+)
+
+from routers import documents, health, query # noqa: E402
+
+app.include_router(health.router)
+app.include_router(documents.router)
+app.include_router(query.router)
+
+app.mount("/", StaticFiles(directory="static", html=True), name="static")
diff --git a/models/__init__.py b/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/requests.py b/models/requests.py
new file mode 100644
index 0000000..afcc5d4
--- /dev/null
+++ b/models/requests.py
@@ -0,0 +1,9 @@
+from pydantic import BaseModel, Field
+
+
+class QueryRequest(BaseModel):
+ text: str = Field(..., description="Вопрос от лица пациента")
+ top_k: int = Field(5, ge=1, le=20, description="Количество чанков для retrieval")
+ document_ids: list[str] | None = Field(None, description="Ограничить поиск конкретными документами")
+ temperature: float | None = Field(None, ge=0.0, le=2.0)
+ max_tokens: int | None = Field(None, ge=100, le=8000)
diff --git a/models/responses.py b/models/responses.py
new file mode 100644
index 0000000..619f668
--- /dev/null
+++ b/models/responses.py
@@ -0,0 +1,77 @@
+from pydantic import BaseModel, Field
+
+
+class DocumentInfo(BaseModel):
+ document_id: str
+ name: str
+ chunks_count: int
+ file_type: str
+ created_at: str
+ metadata: dict = Field(default_factory=dict)
+
+
+class ChunkPreview(BaseModel):
+ index: int
+ section: str = ""
+ page_number: int = 0
+ text_preview: str = ""
+ char_length: int = 0
+
+
+class DocumentUploadResponse(BaseModel):
+ document_id: str
+ name: str
+ chunks_count: int
+ status: str = "indexed"
+ created_at: str
+ chunks_preview: list[ChunkPreview] = Field(default_factory=list)
+
+
+class DocumentListResponse(BaseModel):
+ documents: list[DocumentInfo]
+ total: int
+
+
+class ChunkDetail(BaseModel):
+ index: int
+ section: str = ""
+ page_number: int = 0
+ text: str = ""
+ char_length: int = 0
+
+
+class DocumentChunksResponse(BaseModel):
+ document_id: str
+ name: str
+ file_type: str
+ chunks_count: int
+ chunks: list[ChunkDetail] = Field(default_factory=list)
+
+
+class DocumentDeleteResponse(BaseModel):
+ ok: bool = True
+ deleted_chunks: int
+
+
+class SourceInfo(BaseModel):
+ document_id: str
+ document_name: str
+ chunk_text: str
+ section: str = ""
+ page: int = 0
+ relevance_score: float = 0.0
+
+
+class QueryResponse(BaseModel):
+ answer: str
+ sources: list[SourceInfo]
+ model_used: str
+ assembled_prompt: str = ""
+
+
+class HealthResponse(BaseModel):
+ status: str = "ok"
+ chromadb: str
+ embedding_model: str
+ documents_count: int
+ chunks_count: int
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..b2320e1
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,9 @@
+fastapi==0.115.5
+uvicorn[standard]==0.32.1
+python-multipart==0.0.12
+chromadb==0.5.23
+sentence-transformers==3.3.1
+pymupdf==1.25.1
+python-docx==1.1.2
+httpx==0.28.1
+pydantic-settings==2.7.1
diff --git a/routers/__init__.py b/routers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/routers/documents.py b/routers/documents.py
new file mode 100644
index 0000000..ab3c229
--- /dev/null
+++ b/routers/documents.py
@@ -0,0 +1,155 @@
+import logging
+from datetime import datetime, timezone
+
+from fastapi import APIRouter, File, Form, HTTPException, UploadFile
+
+from models.responses import (
+ ChunkDetail,
+ ChunkPreview,
+ DocumentChunksResponse,
+ DocumentDeleteResponse,
+ DocumentInfo,
+ DocumentListResponse,
+ DocumentUploadResponse,
+)
+from services.document_processor import process_document
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/documents", tags=["documents"])
+
+ALLOWED_EXTENSIONS = {".pdf", ".docx", ".doc", ".txt", ".md"}
+MAX_FILE_SIZE = 50 * 1024 * 1024 # 50 MB
+
+
+@router.post("/upload", response_model=DocumentUploadResponse)
+async def upload_document(
+ file: UploadFile = File(...),
+ document_name: str | None = Form(None),
+):
+ from main import vectorstore_service
+
+ if vectorstore_service is None:
+ raise HTTPException(status_code=503, detail="Service not ready")
+
+ filename = file.filename or "unknown"
+ ext = "." + filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
+ if ext not in ALLOWED_EXTENSIONS:
+ raise HTTPException(
+ status_code=400,
+ detail=f"Unsupported file format: {ext}. Allowed: {', '.join(ALLOWED_EXTENSIONS)}",
+ )
+
+ file_bytes = await file.read()
+ if len(file_bytes) > MAX_FILE_SIZE:
+ raise HTTPException(status_code=400, detail="File too large (max 50 MB)")
+ if len(file_bytes) == 0:
+ raise HTTPException(status_code=400, detail="Empty file")
+
+ display_name = document_name or filename
+ try:
+ document_id, sections, chunks = process_document(file_bytes, filename)
+ except ValueError as e:
+ raise HTTPException(status_code=400, detail=str(e))
+ except Exception as e:
+ logger.exception("Failed to process document: %s", filename)
+ raise HTTPException(status_code=500, detail=f"Error processing document: {e}")
+
+ if not chunks:
+ raise HTTPException(status_code=400, detail="No content could be extracted from the document")
+
+ file_type = ext.lstrip(".")
+ chunks_count = vectorstore_service.add_document(
+ document_id=document_id,
+ document_name=display_name,
+ file_type=file_type,
+ chunks=[
+ {
+ "text": c.text,
+ "section": c.section,
+ "page_number": c.page_number,
+ "chunk_index": c.chunk_index,
+ }
+ for c in chunks
+ ],
+ )
+
+ chunks_prev = [
+ ChunkPreview(
+ index=c.chunk_index,
+ section=c.section,
+ page_number=c.page_number,
+ text_preview=c.text[:300],
+ char_length=len(c.text),
+ )
+ for c in chunks[:3]
+ ]
+
+ return DocumentUploadResponse(
+ document_id=document_id,
+ name=display_name,
+ chunks_count=chunks_count,
+ status="indexed",
+ created_at=datetime.now(timezone.utc).isoformat(),
+ chunks_preview=chunks_prev,
+ )
+
+
+@router.get("", response_model=DocumentListResponse)
+async def list_documents():
+ from main import vectorstore_service
+
+ if vectorstore_service is None:
+ raise HTTPException(status_code=503, detail="Service not ready")
+
+ docs = vectorstore_service.list_documents()
+ return DocumentListResponse(
+ documents=[DocumentInfo(**d) for d in docs],
+ total=len(docs),
+ )
+
+
+@router.get("/{document_id}/chunks", response_model=DocumentChunksResponse)
+async def get_document_chunks(document_id: str):
+ from main import vectorstore_service
+
+ if vectorstore_service is None:
+ raise HTTPException(status_code=503, detail="Service not ready")
+
+ raw_chunks = vectorstore_service.get_document_chunks(document_id)
+ if not raw_chunks:
+ raise HTTPException(status_code=404, detail="Document not found")
+
+ meta0 = raw_chunks[0]["metadata"]
+ chunks = [
+ ChunkDetail(
+ index=c["metadata"].get("chunk_index", 0),
+ section=c["metadata"].get("section", ""),
+ page_number=c["metadata"].get("page_number", 0),
+ text=c["text"],
+ char_length=len(c["text"]),
+ )
+ for c in raw_chunks
+ ]
+
+ return DocumentChunksResponse(
+ document_id=document_id,
+ name=meta0.get("document_name", ""),
+ file_type=meta0.get("file_type", ""),
+ chunks_count=len(chunks),
+ chunks=chunks,
+ )
+
+
+@router.delete("/{document_id}", response_model=DocumentDeleteResponse)
+async def delete_document(document_id: str):
+ from main import vectorstore_service
+
+ if vectorstore_service is None:
+ raise HTTPException(status_code=503, detail="Service not ready")
+
+ deleted = vectorstore_service.delete_document(document_id)
+ if deleted == 0:
+ raise HTTPException(status_code=404, detail="Document not found")
+
+ return DocumentDeleteResponse(ok=True, deleted_chunks=deleted)
diff --git a/routers/health.py b/routers/health.py
new file mode 100644
index 0000000..85015f4
--- /dev/null
+++ b/routers/health.py
@@ -0,0 +1,29 @@
+from fastapi import APIRouter
+
+from config import settings
+from models.responses import HealthResponse
+
+router = APIRouter()
+
+
+@router.get("/health", response_model=HealthResponse)
+async def health():
+ from main import vectorstore_service
+
+ if vectorstore_service is None:
+ return HealthResponse(
+ status="loading",
+ chromadb="not_connected",
+ embedding_model=settings.embedding_model,
+ documents_count=0,
+ chunks_count=0,
+ )
+
+ stats = vectorstore_service.get_stats()
+ return HealthResponse(
+ status="ok",
+ chromadb="connected",
+ embedding_model=settings.embedding_model,
+ documents_count=stats["documents_count"],
+ chunks_count=stats["chunks_count"],
+ )
diff --git a/routers/query.py b/routers/query.py
new file mode 100644
index 0000000..cee837c
--- /dev/null
+++ b/routers/query.py
@@ -0,0 +1,47 @@
+import logging
+
+from fastapi import APIRouter, HTTPException
+
+from config import settings
+from models.requests import QueryRequest
+from models.responses import QueryResponse, SourceInfo
+from services.llm_client import LLMClient
+from services.rag_pipeline import rag_query
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(tags=["query"])
+
+
+@router.post("/query", response_model=QueryResponse)
+async def query_rag(request: QueryRequest):
+ from main import vectorstore_service
+
+ if vectorstore_service is None:
+ raise HTTPException(status_code=503, detail="Service not ready")
+
+ if not settings.deepseek_api_key:
+ raise HTTPException(status_code=500, detail="DEEPSEEK_API_KEY not configured")
+
+ llm_client = LLMClient()
+
+ try:
+ result = await rag_query(
+ vectorstore=vectorstore_service,
+ llm_client=llm_client,
+ question=request.text,
+ top_k=request.top_k,
+ document_ids=request.document_ids,
+ temperature=request.temperature,
+ max_tokens=request.max_tokens,
+ )
+ except Exception as e:
+ logger.exception("RAG query failed")
+ raise HTTPException(status_code=500, detail=f"RAG query error: {e}")
+
+ return QueryResponse(
+ answer=result["answer"],
+ sources=[SourceInfo(**s) for s in result["sources"]],
+ model_used=result["model_used"],
+ assembled_prompt=result.get("assembled_prompt", ""),
+ )
diff --git a/services/__init__.py b/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/services/document_processor.py b/services/document_processor.py
new file mode 100644
index 0000000..2659a05
--- /dev/null
+++ b/services/document_processor.py
@@ -0,0 +1,300 @@
+import io
+import logging
+import re
+import uuid
+from dataclasses import dataclass
+from pathlib import Path
+
+import fitz # pymupdf
+from docx import Document as DocxDocument
+
+from config import settings
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ParsedSection:
+ heading: str
+ heading_level: int
+ body: str
+ page_number: int = 0
+
+
+@dataclass
+class Chunk:
+ text: str
+ section: str = ""
+ page_number: int = 0
+ chunk_index: int = 0
+
+
+# --- Parsers ---
+
+
+def parse_pdf(file_bytes: bytes) -> list[ParsedSection]:
+ doc = fitz.open(stream=file_bytes, filetype="pdf")
+ sections: list[ParsedSection] = []
+ current_heading = ""
+ current_body_lines: list[str] = []
+ current_page = 0
+
+ for page_num in range(len(doc)):
+ page = doc[page_num]
+ blocks = page.get_text("dict")["blocks"]
+
+ for block in blocks:
+ if "lines" not in block:
+ continue
+ for line in block["lines"]:
+ text = "".join(span["text"] for span in line["spans"]).strip()
+ if not text:
+ continue
+
+ max_size = max(span["size"] for span in line["spans"])
+ is_bold = any("bold" in span["font"].lower() for span in line["spans"])
+
+ if (max_size >= 14 or (is_bold and max_size >= 12)) and len(text) < 200:
+ if current_heading or current_body_lines:
+ sections.append(ParsedSection(
+ heading=current_heading,
+ heading_level=1 if max_size >= 16 else 2,
+ body="\n".join(current_body_lines).strip(),
+ page_number=current_page,
+ ))
+ current_heading = text
+ current_body_lines = []
+ current_page = page_num + 1
+ else:
+ current_body_lines.append(text)
+ if not current_heading:
+ current_page = page_num + 1
+
+ if current_heading or current_body_lines:
+ sections.append(ParsedSection(
+ heading=current_heading,
+ heading_level=2,
+ body="\n".join(current_body_lines).strip(),
+ page_number=current_page,
+ ))
+
+ doc.close()
+ return sections
+
+
+def parse_docx(file_bytes: bytes) -> list[ParsedSection]:
+ doc = DocxDocument(io.BytesIO(file_bytes))
+ sections: list[ParsedSection] = []
+ current_heading = ""
+ current_level = 0
+ current_body_lines: list[str] = []
+
+ for para in doc.paragraphs:
+ text = para.text.strip()
+ if not text:
+ continue
+
+ style_name = (para.style.name or "").lower()
+
+ if "heading" in style_name or "title" in style_name:
+ if current_heading or current_body_lines:
+ sections.append(ParsedSection(
+ heading=current_heading,
+ heading_level=current_level or 1,
+ body="\n".join(current_body_lines).strip(),
+ ))
+ level_match = re.search(r"\d+", style_name)
+ current_level = int(level_match.group()) if level_match else 1
+ current_heading = text
+ current_body_lines = []
+ else:
+ current_body_lines.append(text)
+
+ if current_heading or current_body_lines:
+ sections.append(ParsedSection(
+ heading=current_heading,
+ heading_level=current_level or 1,
+ body="\n".join(current_body_lines).strip(),
+ ))
+
+ return sections
+
+
+def parse_text(file_bytes: bytes, is_markdown: bool = False) -> list[ParsedSection]:
+ """Parse wiki-style TXT/MD.
+
+ Эвристики под wiki операторов:
+ - markdown-заголовки (#, ##, ...)
+ - нумерованные пункты «1.», «1.1.», «1.1.1.»
+ - FAQ-паттерн «В:» / «Вопрос:» — воспринимаем как начало новой секции
+ - ALL-CAPS строки (короткие) — заголовок
+ """
+ text = file_bytes.decode("utf-8", errors="replace")
+ lines = text.split("\n")
+ sections: list[ParsedSection] = []
+ current_heading = ""
+ current_level = 0
+ current_body_lines: list[str] = []
+
+ md_heading_re = re.compile(r"^(#{1,6})\s+(.+)")
+ numbered_heading_re = re.compile(r"^(\d+(?:\.\d+)*\.?)\s+([А-ЯЁA-Z].*)")
+ faq_question_re = re.compile(r"^(В|Вопрос|Q|Question)\s*[:\.]\s*(.+)", re.IGNORECASE)
+
+ for line in lines:
+ stripped = line.strip()
+
+ heading_text = None
+ heading_level = 0
+
+ md_match = md_heading_re.match(stripped)
+ if md_match:
+ heading_level = len(md_match.group(1))
+ heading_text = md_match.group(2).strip()
+
+ if not heading_text:
+ num_match = numbered_heading_re.match(stripped)
+ if num_match and len(stripped) < 200:
+ dots = num_match.group(1).count(".")
+ heading_level = max(1, dots + 1)
+ heading_text = stripped
+
+ if not heading_text:
+ faq_match = faq_question_re.match(stripped)
+ if faq_match and len(stripped) < 300:
+ heading_text = faq_match.group(2).strip()
+ heading_level = 3
+
+ if not heading_text and stripped.isupper() and 3 < len(stripped) < 200:
+ heading_text = stripped
+ heading_level = 1
+
+ if heading_text:
+ if current_heading or current_body_lines:
+ sections.append(ParsedSection(
+ heading=current_heading,
+ heading_level=current_level or 1,
+ body="\n".join(current_body_lines).strip(),
+ ))
+ current_heading = heading_text
+ current_level = heading_level
+ current_body_lines = []
+ else:
+ current_body_lines.append(line)
+
+ if current_heading or current_body_lines:
+ sections.append(ParsedSection(
+ heading=current_heading,
+ heading_level=current_level or 1,
+ body="\n".join(current_body_lines).strip(),
+ ))
+
+ return sections
+
+
+# --- Chunker ---
+
+
+def _split_sentences(text: str) -> list[str]:
+ sentences = re.split(r"(?<=[.!?])\s+", text)
+ return [s.strip() for s in sentences if s.strip()]
+
+
+def chunk_sections(
+ sections: list[ParsedSection],
+ max_chunk_size: int | None = None,
+ min_chunk_size: int | None = None,
+ overlap_sentences: int | None = None,
+) -> list[Chunk]:
+ """Чанкинг wiki-секций.
+
+ - Малые секции (FAQ-ответы) держим целиком — один чанк = одна тема.
+ - Большие секции (регламенты) режем по абзацам, с overlap последних N предложений.
+ - Мелкие соседние секции склеиваем, чтобы не плодить огрызки.
+ """
+ max_size = max_chunk_size or settings.max_chunk_size
+ min_size = min_chunk_size or settings.min_chunk_size
+ overlap = overlap_sentences or settings.overlap_sentences
+
+ raw_chunks: list[Chunk] = []
+
+ for section in sections:
+ heading_prefix = f"{section.heading}\n\n" if section.heading else ""
+ full_text = heading_prefix + section.body
+
+ if len(full_text) <= max_size:
+ raw_chunks.append(Chunk(
+ text=full_text.strip(),
+ section=section.heading,
+ page_number=section.page_number,
+ ))
+ else:
+ paragraphs = section.body.split("\n")
+ current_text = heading_prefix
+ for para in paragraphs:
+ if len(current_text) + len(para) + 1 > max_size and len(current_text) > len(heading_prefix):
+ raw_chunks.append(Chunk(
+ text=current_text.strip(),
+ section=section.heading,
+ page_number=section.page_number,
+ ))
+ current_text = heading_prefix + para + "\n"
+ else:
+ current_text += para + "\n"
+ if current_text.strip() and current_text.strip() != heading_prefix.strip():
+ raw_chunks.append(Chunk(
+ text=current_text.strip(),
+ section=section.heading,
+ page_number=section.page_number,
+ ))
+
+ merged: list[Chunk] = []
+ for chunk in raw_chunks:
+ if merged and len(merged[-1].text) < min_size:
+ merged[-1].text += "\n\n" + chunk.text
+ if not merged[-1].section:
+ merged[-1].section = chunk.section
+ else:
+ merged.append(Chunk(
+ text=chunk.text,
+ section=chunk.section,
+ page_number=chunk.page_number,
+ ))
+
+ final: list[Chunk] = []
+ for i, chunk in enumerate(merged):
+ if i > 0 and overlap > 0:
+ prev_sentences = _split_sentences(merged[i - 1].text)
+ overlap_text = " ".join(prev_sentences[-overlap:])
+ if overlap_text and overlap_text not in chunk.text:
+ chunk.text = overlap_text + "\n\n" + chunk.text
+ chunk.chunk_index = i
+ final.append(chunk)
+
+ return final
+
+
+# --- Main processor ---
+
+
+def process_document(file_bytes: bytes, filename: str) -> tuple[str, list[ParsedSection], list[Chunk]]:
+ document_id = str(uuid.uuid4())
+ ext = Path(filename).suffix.lower()
+
+ if ext == ".pdf":
+ sections = parse_pdf(file_bytes)
+ elif ext in (".docx", ".doc"):
+ sections = parse_docx(file_bytes)
+ elif ext == ".md":
+ sections = parse_text(file_bytes, is_markdown=True)
+ elif ext == ".txt":
+ sections = parse_text(file_bytes, is_markdown=False)
+ else:
+ raise ValueError(f"Unsupported file format: {ext}")
+
+ if not sections:
+ logger.warning("No sections found in %s", filename)
+ return document_id, [], []
+
+ chunks = chunk_sections(sections)
+ logger.info("Processed '%s': %d sections → %d chunks", filename, len(sections), len(chunks))
+ return document_id, sections, chunks
diff --git a/services/embeddings.py b/services/embeddings.py
new file mode 100644
index 0000000..3f9427a
--- /dev/null
+++ b/services/embeddings.py
@@ -0,0 +1,22 @@
+import logging
+
+from sentence_transformers import SentenceTransformer
+
+logger = logging.getLogger(__name__)
+
+
+class EmbeddingService:
+ def __init__(self, model_name: str = "intfloat/multilingual-e5-large"):
+ logger.info("Loading embedding model: %s", model_name)
+ self.model = SentenceTransformer(model_name)
+ self.model_name = model_name
+ logger.info("Embedding model loaded (dim=%d)", self.model.get_sentence_embedding_dimension())
+
+ def embed_documents(self, texts: list[str]) -> list[list[float]]:
+ prefixed = [f"passage: {t}" for t in texts]
+ embeddings = self.model.encode(prefixed, normalize_embeddings=True, show_progress_bar=False)
+ return embeddings.tolist()
+
+ def embed_query(self, query: str) -> list[float]:
+ embedding = self.model.encode(f"query: {query}", normalize_embeddings=True)
+ return embedding.tolist()
diff --git a/services/llm_client.py b/services/llm_client.py
new file mode 100644
index 0000000..fc7745c
--- /dev/null
+++ b/services/llm_client.py
@@ -0,0 +1,104 @@
+import logging
+
+import httpx
+
+from config import settings
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_SYSTEM_PROMPT = """Ты — виртуальный ассистент клиники, который первым отвечает пациентам в чате.
+
+Твоя задача — помочь пациенту по бытовым и организационным вопросам: запись, расписание врачей, подготовка к приёму, как проехать, документы, оплата, ДМС, детский приём и т. п.
+
+Правила:
+- Отвечай коротко, дружелюбно, на «вы», простым русским языком без медицинской латыни.
+- Опирайся ТОЛЬКО на предоставленные выдержки из базы знаний. Если ответа в них нет — честно скажи, что уточнишь у оператора, и предложи подключить оператора.
+- Не ставь диагнозы и не назначай лечение. Если вопрос про симптомы, лекарства, дозировки или «что со мной» — мягко предложи записаться к врачу и подключить оператора, если нужно.
+- Не выдумывай телефоны, адреса, цены, имена врачей, расписание. Только из источников.
+- Если пациент просит оператора — коротко подтверди, что сейчас его подключишь.
+- Источники указывать не нужно: пациент их не видит. Ответ — обычный текст, как в чате."""
+
+DEFAULT_USER_TEMPLATE = """Вопрос пациента:
+{question}
+
+Выдержки из базы знаний операторов:
+{sources}
+
+Ответь пациенту в чате по правилам из системного сообщения."""
+
+
+class LLMClient:
+ def __init__(
+ self,
+ api_key: str | None = None,
+ model: str | None = None,
+ base_url: str | None = None,
+ ):
+ self.api_key = api_key or settings.deepseek_api_key
+ self.model = model or settings.deepseek_model
+ self.base_url = (base_url or settings.deepseek_base_url).rstrip("/")
+
+ def _format_sources(self, sources: list[dict]) -> str:
+ if not sources:
+ return "(источники не найдены)"
+ lines = []
+ for i, src in enumerate(sources, 1):
+ meta = src.get("metadata", {})
+ doc_name = meta.get("document_name", "Документ")
+ section = meta.get("section", "")
+ lines.append(
+ f"[{i}] {src['text']}\n"
+ f" (Источник: {doc_name}, раздел: {section})"
+ )
+ return "\n".join(lines)
+
+ async def answer(
+ self,
+ question: str,
+ sources: list[dict],
+ system_prompt: str | None = None,
+ temperature: float | None = None,
+ max_tokens: int | None = None,
+ ) -> dict:
+ """Generate a patient-facing answer using RAG context.
+
+ Returns dict with 'text' and 'assembled_prompt'.
+ """
+ effective_system = system_prompt or DEFAULT_SYSTEM_PROMPT
+ effective_temp = temperature if temperature is not None else 0.2
+ effective_max_tokens = max_tokens or 1200
+
+ formatted_sources = self._format_sources(sources)
+ user_message = DEFAULT_USER_TEMPLATE.format(
+ question=question,
+ sources=formatted_sources,
+ )
+
+ assembled_prompt = f"[SYSTEM]\n{effective_system}\n\n[USER]\n{user_message}"
+
+ url = f"{self.base_url}/chat/completions"
+ payload = {
+ "model": self.model,
+ "messages": [
+ {"role": "system", "content": effective_system},
+ {"role": "user", "content": user_message},
+ ],
+ "temperature": effective_temp,
+ "max_tokens": effective_max_tokens,
+ }
+
+ async with httpx.AsyncClient(timeout=60.0) as client:
+ response = await client.post(
+ url,
+ json=payload,
+ headers={
+ "Authorization": f"Bearer {self.api_key}",
+ "Content-Type": "application/json",
+ },
+ )
+ response.raise_for_status()
+ data = response.json()
+
+ content = data["choices"][0]["message"]["content"]
+ logger.info("LLM response: %d chars, model=%s, temp=%.2f", len(content), self.model, effective_temp)
+ return {"text": content.strip(), "assembled_prompt": assembled_prompt}
diff --git a/services/rag_pipeline.py b/services/rag_pipeline.py
new file mode 100644
index 0000000..8efce7e
--- /dev/null
+++ b/services/rag_pipeline.py
@@ -0,0 +1,52 @@
+import logging
+
+from services.llm_client import LLMClient
+from services.vectorstore import VectorStoreService
+
+logger = logging.getLogger(__name__)
+
+
+async def rag_query(
+ vectorstore: VectorStoreService,
+ llm_client: LLMClient,
+ question: str,
+ top_k: int = 5,
+ document_ids: list[str] | None = None,
+ temperature: float | None = None,
+ max_tokens: int | None = None,
+) -> dict:
+ """Pipeline: retrieve → augment → generate для одиночного вопроса пациента."""
+ logger.info("RAG query: %s", question[:200])
+
+ retrieved = vectorstore.query(
+ query_text=question,
+ top_k=top_k,
+ document_ids=document_ids,
+ )
+ logger.info("Retrieved %d chunks", len(retrieved))
+
+ llm_result = await llm_client.answer(
+ question=question,
+ sources=retrieved,
+ temperature=temperature,
+ max_tokens=max_tokens,
+ )
+
+ sources = []
+ for item in retrieved:
+ meta = item.get("metadata", {})
+ sources.append({
+ "document_id": meta.get("document_id", ""),
+ "document_name": meta.get("document_name", ""),
+ "chunk_text": item["text"][:500],
+ "section": meta.get("section", ""),
+ "page": meta.get("page_number", 0),
+ "relevance_score": round(item.get("relevance_score", 0), 3),
+ })
+
+ return {
+ "answer": llm_result["text"],
+ "sources": sources,
+ "model_used": llm_client.model,
+ "assembled_prompt": llm_result["assembled_prompt"],
+ }
diff --git a/services/vectorstore.py b/services/vectorstore.py
new file mode 100644
index 0000000..33d7ea8
--- /dev/null
+++ b/services/vectorstore.py
@@ -0,0 +1,145 @@
+import logging
+from datetime import datetime, timezone
+
+import chromadb
+
+from services.embeddings import EmbeddingService
+
+logger = logging.getLogger(__name__)
+
+COLLECTION_NAME = "operators_wiki"
+
+
+class VectorStoreService:
+ def __init__(self, persist_dir: str, embedding_service: EmbeddingService):
+ self.client = chromadb.PersistentClient(path=persist_dir)
+ self.embedding_service = embedding_service
+ self.collection = self.client.get_or_create_collection(
+ name=COLLECTION_NAME,
+ metadata={"hnsw:space": "cosine"},
+ )
+ logger.info("ChromaDB collection '%s': %d items", COLLECTION_NAME, self.collection.count())
+
+ def add_document(
+ self,
+ document_id: str,
+ document_name: str,
+ file_type: str,
+ chunks: list[dict],
+ ) -> int:
+ if not chunks:
+ return 0
+
+ texts = [c["text"] for c in chunks]
+ embeddings = self.embedding_service.embed_documents(texts)
+
+ ids = []
+ metadatas = []
+ now = datetime.now(timezone.utc).isoformat()
+
+ for i, chunk in enumerate(chunks):
+ ids.append(f"{document_id}_chunk_{i}")
+ metadatas.append({
+ "document_id": document_id,
+ "document_name": document_name,
+ "file_type": file_type,
+ "section": chunk.get("section", ""),
+ "page_number": chunk.get("page_number", 0),
+ "chunk_index": i,
+ "created_at": now,
+ })
+
+ self.collection.add(
+ ids=ids,
+ embeddings=embeddings,
+ documents=texts,
+ metadatas=metadatas,
+ )
+ logger.info("Added %d chunks for document '%s'", len(chunks), document_name)
+ return len(chunks)
+
+ def query(
+ self,
+ query_text: str,
+ top_k: int = 5,
+ document_ids: list[str] | None = None,
+ ) -> list[dict]:
+ query_embedding = self.embedding_service.embed_query(query_text)
+
+ where_filter = None
+ if document_ids:
+ if len(document_ids) == 1:
+ where_filter = {"document_id": document_ids[0]}
+ else:
+ where_filter = {"document_id": {"$in": document_ids}}
+
+ results = self.collection.query(
+ query_embeddings=[query_embedding],
+ n_results=top_k,
+ where=where_filter,
+ include=["documents", "metadatas", "distances"],
+ )
+
+ items = []
+ if results["ids"] and results["ids"][0]:
+ for i, chunk_id in enumerate(results["ids"][0]):
+ items.append({
+ "chunk_id": chunk_id,
+ "text": results["documents"][0][i],
+ "metadata": results["metadatas"][0][i],
+ "distance": results["distances"][0][i],
+ "relevance_score": 1 - results["distances"][0][i],
+ })
+ return items
+
+ def delete_document(self, document_id: str) -> int:
+ existing = self.collection.get(where={"document_id": document_id}, include=[])
+ count = len(existing["ids"])
+ if count > 0:
+ self.collection.delete(ids=existing["ids"])
+ logger.info("Deleted %d chunks for document_id=%s", count, document_id)
+ return count
+
+ def list_documents(self) -> list[dict]:
+ all_items = self.collection.get(include=["metadatas"])
+ docs: dict[str, dict] = {}
+ for meta in all_items["metadatas"]:
+ doc_id = meta["document_id"]
+ if doc_id not in docs:
+ docs[doc_id] = {
+ "document_id": doc_id,
+ "name": meta.get("document_name", ""),
+ "file_type": meta.get("file_type", ""),
+ "created_at": meta.get("created_at", ""),
+ "chunks_count": 0,
+ "metadata": {},
+ }
+ docs[doc_id]["chunks_count"] += 1
+ return list(docs.values())
+
+ def get_document_chunks(self, document_id: str) -> list[dict]:
+ """Return all chunks for a document, sorted by chunk_index."""
+ results = self.collection.get(
+ where={"document_id": document_id},
+ include=["documents", "metadatas"],
+ )
+ items = []
+ if results["ids"]:
+ for i, chunk_id in enumerate(results["ids"]):
+ items.append({
+ "chunk_id": chunk_id,
+ "text": results["documents"][i],
+ "metadata": results["metadatas"][i],
+ })
+ items.sort(key=lambda x: x["metadata"].get("chunk_index", 0))
+ return items
+
+ def get_stats(self) -> dict:
+ all_items = self.collection.get(include=["metadatas"])
+ doc_ids = set()
+ for meta in all_items["metadatas"]:
+ doc_ids.add(meta.get("document_id", ""))
+ return {
+ "documents_count": len(doc_ids),
+ "chunks_count": self.collection.count(),
+ }
diff --git a/static/index.html b/static/index.html
new file mode 100644
index 0000000..efbb491
--- /dev/null
+++ b/static/index.html
@@ -0,0 +1,571 @@
+
+
+
+
+
+Chat Agent for Patients — Debug UI
+
+
+
+
+
+ Chat Agent for Patients — Debug
+ проверяю…
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+