feat: Спринт 1 — RAG-ядро, загрузка wiki и Debug UI

FastAPI + ChromaDB + E5-large + DeepSeek по паттерну work-pcs-dr-cdss,
адаптированному под пациентский контекст:

- services: embeddings (E5-large с префиксами), vectorstore (коллекция
  operators_wiki), document_processor (PDF/DOCX/TXT/MD + чанкер с FAQ-
  паттерном под wiki), llm_client (системный промпт ассистента клиники),
  rag_pipeline (одиночный вопрос → retrieval → ответ).
- routers: /health, /documents (upload, list, chunks, delete), /query.
- static/index.html: шапка со статусом, блок базы знаний с раскрытием
  чанков по клику, блок тест-вопроса с 3-колоночным ответом
  (чанки со score / собранный промпт / ответ LLM).
- Порт 8003 (8001 занят CDSS, 8002 — voicenote).

E2E проверен: загрузка wiki_test.md → 2 чанка, вопрос «как записать
ребёнка к лору?» → top score 84.8%, корректный ответ DeepSeek.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
AR 15 M4
2026-04-22 14:57:34 +05:00
parent d1e7749605
commit a7f78d71b2
21 changed files with 1641 additions and 0 deletions
+6
View File
@@ -0,0 +1,6 @@
CHROMA_PERSIST_DIR=./data/chroma
EMBEDDING_MODEL=intfloat/multilingual-e5-large
DEEPSEEK_API_KEY=sk-your-key-here
DEEPSEEK_MODEL=deepseek-chat
DEEPSEEK_BASE_URL=https://api.deepseek.com
LOG_LEVEL=info
+1
View File
@@ -5,3 +5,4 @@ data/chroma/
*.egg-info/
.venv/
.DS_Store
server.log
+16
View File
@@ -0,0 +1,16 @@
FROM python:3.11-slim
RUN apt-get update && apt-get install -y \
curl \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
EXPOSE 8003
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8003"]
+19
View File
@@ -0,0 +1,19 @@
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
chroma_persist_dir: str = "./data/chroma"
embedding_model: str = "intfloat/multilingual-e5-large"
deepseek_api_key: str = ""
deepseek_model: str = "deepseek-chat"
deepseek_base_url: str = "https://api.deepseek.com"
log_level: str = "info"
max_chunk_size: int = 1200
min_chunk_size: int = 200
overlap_sentences: int = 2
model_config = {"env_file": ".env", "env_file_encoding": "utf-8"}
settings = Settings()
+24
View File
@@ -0,0 +1,24 @@
services:
rag-service:
build: .
ports:
- "8003:8003"
volumes:
- chroma_data:/app/data
- embedding_cache:/root/.cache
environment:
CHROMA_PERSIST_DIR: /app/data/chroma
EMBEDDING_MODEL: intfloat/multilingual-e5-large
DEEPSEEK_API_KEY: ${DEEPSEEK_API_KEY}
DEEPSEEK_MODEL: ${DEEPSEEK_MODEL:-deepseek-chat}
LOG_LEVEL: ${LOG_LEVEL:-info}
healthcheck:
test: ["CMD-SHELL", "curl -sf http://localhost:8003/health || exit 1"]
interval: 15s
timeout: 10s
retries: 10
start_period: 180s
volumes:
chroma_data:
embedding_cache:
+55
View File
@@ -0,0 +1,55 @@
import logging
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from config import settings
from services.embeddings import EmbeddingService
from services.vectorstore import VectorStoreService
logger = logging.getLogger(__name__)
embedding_service: EmbeddingService | None = None
vectorstore_service: VectorStoreService | None = None
@asynccontextmanager
async def lifespan(app: FastAPI):
global embedding_service, vectorstore_service
logging.basicConfig(level=getattr(logging, settings.log_level.upper(), logging.INFO))
logger.info("Loading embedding model: %s", settings.embedding_model)
embedding_service = EmbeddingService(settings.embedding_model)
logger.info("Embedding model loaded")
vectorstore_service = VectorStoreService(
persist_dir=settings.chroma_persist_dir,
embedding_service=embedding_service,
)
logger.info("ChromaDB initialized at %s", settings.chroma_persist_dir)
yield
logger.info("Shutting down")
app = FastAPI(
title="Chat Agent for Patients — Tuning Tool",
description="RAG-ядро и инструмент настройки пациентского чат-агента",
version="0.1.0",
lifespan=lifespan,
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
from routers import documents, health, query # noqa: E402
app.include_router(health.router)
app.include_router(documents.router)
app.include_router(query.router)
app.mount("/", StaticFiles(directory="static", html=True), name="static")
View File
+9
View File
@@ -0,0 +1,9 @@
from pydantic import BaseModel, Field
class QueryRequest(BaseModel):
text: str = Field(..., description="Вопрос от лица пациента")
top_k: int = Field(5, ge=1, le=20, description="Количество чанков для retrieval")
document_ids: list[str] | None = Field(None, description="Ограничить поиск конкретными документами")
temperature: float | None = Field(None, ge=0.0, le=2.0)
max_tokens: int | None = Field(None, ge=100, le=8000)
+77
View File
@@ -0,0 +1,77 @@
from pydantic import BaseModel, Field
class DocumentInfo(BaseModel):
document_id: str
name: str
chunks_count: int
file_type: str
created_at: str
metadata: dict = Field(default_factory=dict)
class ChunkPreview(BaseModel):
index: int
section: str = ""
page_number: int = 0
text_preview: str = ""
char_length: int = 0
class DocumentUploadResponse(BaseModel):
document_id: str
name: str
chunks_count: int
status: str = "indexed"
created_at: str
chunks_preview: list[ChunkPreview] = Field(default_factory=list)
class DocumentListResponse(BaseModel):
documents: list[DocumentInfo]
total: int
class ChunkDetail(BaseModel):
index: int
section: str = ""
page_number: int = 0
text: str = ""
char_length: int = 0
class DocumentChunksResponse(BaseModel):
document_id: str
name: str
file_type: str
chunks_count: int
chunks: list[ChunkDetail] = Field(default_factory=list)
class DocumentDeleteResponse(BaseModel):
ok: bool = True
deleted_chunks: int
class SourceInfo(BaseModel):
document_id: str
document_name: str
chunk_text: str
section: str = ""
page: int = 0
relevance_score: float = 0.0
class QueryResponse(BaseModel):
answer: str
sources: list[SourceInfo]
model_used: str
assembled_prompt: str = ""
class HealthResponse(BaseModel):
status: str = "ok"
chromadb: str
embedding_model: str
documents_count: int
chunks_count: int
+9
View File
@@ -0,0 +1,9 @@
fastapi==0.115.5
uvicorn[standard]==0.32.1
python-multipart==0.0.12
chromadb==0.5.23
sentence-transformers==3.3.1
pymupdf==1.25.1
python-docx==1.1.2
httpx==0.28.1
pydantic-settings==2.7.1
View File
+155
View File
@@ -0,0 +1,155 @@
import logging
from datetime import datetime, timezone
from fastapi import APIRouter, File, Form, HTTPException, UploadFile
from models.responses import (
ChunkDetail,
ChunkPreview,
DocumentChunksResponse,
DocumentDeleteResponse,
DocumentInfo,
DocumentListResponse,
DocumentUploadResponse,
)
from services.document_processor import process_document
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/documents", tags=["documents"])
ALLOWED_EXTENSIONS = {".pdf", ".docx", ".doc", ".txt", ".md"}
MAX_FILE_SIZE = 50 * 1024 * 1024 # 50 MB
@router.post("/upload", response_model=DocumentUploadResponse)
async def upload_document(
file: UploadFile = File(...),
document_name: str | None = Form(None),
):
from main import vectorstore_service
if vectorstore_service is None:
raise HTTPException(status_code=503, detail="Service not ready")
filename = file.filename or "unknown"
ext = "." + filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
if ext not in ALLOWED_EXTENSIONS:
raise HTTPException(
status_code=400,
detail=f"Unsupported file format: {ext}. Allowed: {', '.join(ALLOWED_EXTENSIONS)}",
)
file_bytes = await file.read()
if len(file_bytes) > MAX_FILE_SIZE:
raise HTTPException(status_code=400, detail="File too large (max 50 MB)")
if len(file_bytes) == 0:
raise HTTPException(status_code=400, detail="Empty file")
display_name = document_name or filename
try:
document_id, sections, chunks = process_document(file_bytes, filename)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.exception("Failed to process document: %s", filename)
raise HTTPException(status_code=500, detail=f"Error processing document: {e}")
if not chunks:
raise HTTPException(status_code=400, detail="No content could be extracted from the document")
file_type = ext.lstrip(".")
chunks_count = vectorstore_service.add_document(
document_id=document_id,
document_name=display_name,
file_type=file_type,
chunks=[
{
"text": c.text,
"section": c.section,
"page_number": c.page_number,
"chunk_index": c.chunk_index,
}
for c in chunks
],
)
chunks_prev = [
ChunkPreview(
index=c.chunk_index,
section=c.section,
page_number=c.page_number,
text_preview=c.text[:300],
char_length=len(c.text),
)
for c in chunks[:3]
]
return DocumentUploadResponse(
document_id=document_id,
name=display_name,
chunks_count=chunks_count,
status="indexed",
created_at=datetime.now(timezone.utc).isoformat(),
chunks_preview=chunks_prev,
)
@router.get("", response_model=DocumentListResponse)
async def list_documents():
from main import vectorstore_service
if vectorstore_service is None:
raise HTTPException(status_code=503, detail="Service not ready")
docs = vectorstore_service.list_documents()
return DocumentListResponse(
documents=[DocumentInfo(**d) for d in docs],
total=len(docs),
)
@router.get("/{document_id}/chunks", response_model=DocumentChunksResponse)
async def get_document_chunks(document_id: str):
from main import vectorstore_service
if vectorstore_service is None:
raise HTTPException(status_code=503, detail="Service not ready")
raw_chunks = vectorstore_service.get_document_chunks(document_id)
if not raw_chunks:
raise HTTPException(status_code=404, detail="Document not found")
meta0 = raw_chunks[0]["metadata"]
chunks = [
ChunkDetail(
index=c["metadata"].get("chunk_index", 0),
section=c["metadata"].get("section", ""),
page_number=c["metadata"].get("page_number", 0),
text=c["text"],
char_length=len(c["text"]),
)
for c in raw_chunks
]
return DocumentChunksResponse(
document_id=document_id,
name=meta0.get("document_name", ""),
file_type=meta0.get("file_type", ""),
chunks_count=len(chunks),
chunks=chunks,
)
@router.delete("/{document_id}", response_model=DocumentDeleteResponse)
async def delete_document(document_id: str):
from main import vectorstore_service
if vectorstore_service is None:
raise HTTPException(status_code=503, detail="Service not ready")
deleted = vectorstore_service.delete_document(document_id)
if deleted == 0:
raise HTTPException(status_code=404, detail="Document not found")
return DocumentDeleteResponse(ok=True, deleted_chunks=deleted)
+29
View File
@@ -0,0 +1,29 @@
from fastapi import APIRouter
from config import settings
from models.responses import HealthResponse
router = APIRouter()
@router.get("/health", response_model=HealthResponse)
async def health():
from main import vectorstore_service
if vectorstore_service is None:
return HealthResponse(
status="loading",
chromadb="not_connected",
embedding_model=settings.embedding_model,
documents_count=0,
chunks_count=0,
)
stats = vectorstore_service.get_stats()
return HealthResponse(
status="ok",
chromadb="connected",
embedding_model=settings.embedding_model,
documents_count=stats["documents_count"],
chunks_count=stats["chunks_count"],
)
+47
View File
@@ -0,0 +1,47 @@
import logging
from fastapi import APIRouter, HTTPException
from config import settings
from models.requests import QueryRequest
from models.responses import QueryResponse, SourceInfo
from services.llm_client import LLMClient
from services.rag_pipeline import rag_query
logger = logging.getLogger(__name__)
router = APIRouter(tags=["query"])
@router.post("/query", response_model=QueryResponse)
async def query_rag(request: QueryRequest):
from main import vectorstore_service
if vectorstore_service is None:
raise HTTPException(status_code=503, detail="Service not ready")
if not settings.deepseek_api_key:
raise HTTPException(status_code=500, detail="DEEPSEEK_API_KEY not configured")
llm_client = LLMClient()
try:
result = await rag_query(
vectorstore=vectorstore_service,
llm_client=llm_client,
question=request.text,
top_k=request.top_k,
document_ids=request.document_ids,
temperature=request.temperature,
max_tokens=request.max_tokens,
)
except Exception as e:
logger.exception("RAG query failed")
raise HTTPException(status_code=500, detail=f"RAG query error: {e}")
return QueryResponse(
answer=result["answer"],
sources=[SourceInfo(**s) for s in result["sources"]],
model_used=result["model_used"],
assembled_prompt=result.get("assembled_prompt", ""),
)
View File
+300
View File
@@ -0,0 +1,300 @@
import io
import logging
import re
import uuid
from dataclasses import dataclass
from pathlib import Path
import fitz # pymupdf
from docx import Document as DocxDocument
from config import settings
logger = logging.getLogger(__name__)
@dataclass
class ParsedSection:
heading: str
heading_level: int
body: str
page_number: int = 0
@dataclass
class Chunk:
text: str
section: str = ""
page_number: int = 0
chunk_index: int = 0
# --- Parsers ---
def parse_pdf(file_bytes: bytes) -> list[ParsedSection]:
doc = fitz.open(stream=file_bytes, filetype="pdf")
sections: list[ParsedSection] = []
current_heading = ""
current_body_lines: list[str] = []
current_page = 0
for page_num in range(len(doc)):
page = doc[page_num]
blocks = page.get_text("dict")["blocks"]
for block in blocks:
if "lines" not in block:
continue
for line in block["lines"]:
text = "".join(span["text"] for span in line["spans"]).strip()
if not text:
continue
max_size = max(span["size"] for span in line["spans"])
is_bold = any("bold" in span["font"].lower() for span in line["spans"])
if (max_size >= 14 or (is_bold and max_size >= 12)) and len(text) < 200:
if current_heading or current_body_lines:
sections.append(ParsedSection(
heading=current_heading,
heading_level=1 if max_size >= 16 else 2,
body="\n".join(current_body_lines).strip(),
page_number=current_page,
))
current_heading = text
current_body_lines = []
current_page = page_num + 1
else:
current_body_lines.append(text)
if not current_heading:
current_page = page_num + 1
if current_heading or current_body_lines:
sections.append(ParsedSection(
heading=current_heading,
heading_level=2,
body="\n".join(current_body_lines).strip(),
page_number=current_page,
))
doc.close()
return sections
def parse_docx(file_bytes: bytes) -> list[ParsedSection]:
doc = DocxDocument(io.BytesIO(file_bytes))
sections: list[ParsedSection] = []
current_heading = ""
current_level = 0
current_body_lines: list[str] = []
for para in doc.paragraphs:
text = para.text.strip()
if not text:
continue
style_name = (para.style.name or "").lower()
if "heading" in style_name or "title" in style_name:
if current_heading or current_body_lines:
sections.append(ParsedSection(
heading=current_heading,
heading_level=current_level or 1,
body="\n".join(current_body_lines).strip(),
))
level_match = re.search(r"\d+", style_name)
current_level = int(level_match.group()) if level_match else 1
current_heading = text
current_body_lines = []
else:
current_body_lines.append(text)
if current_heading or current_body_lines:
sections.append(ParsedSection(
heading=current_heading,
heading_level=current_level or 1,
body="\n".join(current_body_lines).strip(),
))
return sections
def parse_text(file_bytes: bytes, is_markdown: bool = False) -> list[ParsedSection]:
"""Parse wiki-style TXT/MD.
Эвристики под wiki операторов:
- markdown-заголовки (#, ##, ...)
- нумерованные пункты «1.», «1.1.», «1.1.1.»
- FAQ-паттерн «В:» / «Вопрос:» — воспринимаем как начало новой секции
- ALL-CAPS строки (короткие) — заголовок
"""
text = file_bytes.decode("utf-8", errors="replace")
lines = text.split("\n")
sections: list[ParsedSection] = []
current_heading = ""
current_level = 0
current_body_lines: list[str] = []
md_heading_re = re.compile(r"^(#{1,6})\s+(.+)")
numbered_heading_re = re.compile(r"^(\d+(?:\.\d+)*\.?)\s+([А-ЯЁA-Z].*)")
faq_question_re = re.compile(r"^(В|Вопрос|Q|Question)\s*[:\.]\s*(.+)", re.IGNORECASE)
for line in lines:
stripped = line.strip()
heading_text = None
heading_level = 0
md_match = md_heading_re.match(stripped)
if md_match:
heading_level = len(md_match.group(1))
heading_text = md_match.group(2).strip()
if not heading_text:
num_match = numbered_heading_re.match(stripped)
if num_match and len(stripped) < 200:
dots = num_match.group(1).count(".")
heading_level = max(1, dots + 1)
heading_text = stripped
if not heading_text:
faq_match = faq_question_re.match(stripped)
if faq_match and len(stripped) < 300:
heading_text = faq_match.group(2).strip()
heading_level = 3
if not heading_text and stripped.isupper() and 3 < len(stripped) < 200:
heading_text = stripped
heading_level = 1
if heading_text:
if current_heading or current_body_lines:
sections.append(ParsedSection(
heading=current_heading,
heading_level=current_level or 1,
body="\n".join(current_body_lines).strip(),
))
current_heading = heading_text
current_level = heading_level
current_body_lines = []
else:
current_body_lines.append(line)
if current_heading or current_body_lines:
sections.append(ParsedSection(
heading=current_heading,
heading_level=current_level or 1,
body="\n".join(current_body_lines).strip(),
))
return sections
# --- Chunker ---
def _split_sentences(text: str) -> list[str]:
sentences = re.split(r"(?<=[.!?])\s+", text)
return [s.strip() for s in sentences if s.strip()]
def chunk_sections(
sections: list[ParsedSection],
max_chunk_size: int | None = None,
min_chunk_size: int | None = None,
overlap_sentences: int | None = None,
) -> list[Chunk]:
"""Чанкинг wiki-секций.
- Малые секции (FAQ-ответы) держим целиком — один чанк = одна тема.
- Большие секции (регламенты) режем по абзацам, с overlap последних N предложений.
- Мелкие соседние секции склеиваем, чтобы не плодить огрызки.
"""
max_size = max_chunk_size or settings.max_chunk_size
min_size = min_chunk_size or settings.min_chunk_size
overlap = overlap_sentences or settings.overlap_sentences
raw_chunks: list[Chunk] = []
for section in sections:
heading_prefix = f"{section.heading}\n\n" if section.heading else ""
full_text = heading_prefix + section.body
if len(full_text) <= max_size:
raw_chunks.append(Chunk(
text=full_text.strip(),
section=section.heading,
page_number=section.page_number,
))
else:
paragraphs = section.body.split("\n")
current_text = heading_prefix
for para in paragraphs:
if len(current_text) + len(para) + 1 > max_size and len(current_text) > len(heading_prefix):
raw_chunks.append(Chunk(
text=current_text.strip(),
section=section.heading,
page_number=section.page_number,
))
current_text = heading_prefix + para + "\n"
else:
current_text += para + "\n"
if current_text.strip() and current_text.strip() != heading_prefix.strip():
raw_chunks.append(Chunk(
text=current_text.strip(),
section=section.heading,
page_number=section.page_number,
))
merged: list[Chunk] = []
for chunk in raw_chunks:
if merged and len(merged[-1].text) < min_size:
merged[-1].text += "\n\n" + chunk.text
if not merged[-1].section:
merged[-1].section = chunk.section
else:
merged.append(Chunk(
text=chunk.text,
section=chunk.section,
page_number=chunk.page_number,
))
final: list[Chunk] = []
for i, chunk in enumerate(merged):
if i > 0 and overlap > 0:
prev_sentences = _split_sentences(merged[i - 1].text)
overlap_text = " ".join(prev_sentences[-overlap:])
if overlap_text and overlap_text not in chunk.text:
chunk.text = overlap_text + "\n\n" + chunk.text
chunk.chunk_index = i
final.append(chunk)
return final
# --- Main processor ---
def process_document(file_bytes: bytes, filename: str) -> tuple[str, list[ParsedSection], list[Chunk]]:
document_id = str(uuid.uuid4())
ext = Path(filename).suffix.lower()
if ext == ".pdf":
sections = parse_pdf(file_bytes)
elif ext in (".docx", ".doc"):
sections = parse_docx(file_bytes)
elif ext == ".md":
sections = parse_text(file_bytes, is_markdown=True)
elif ext == ".txt":
sections = parse_text(file_bytes, is_markdown=False)
else:
raise ValueError(f"Unsupported file format: {ext}")
if not sections:
logger.warning("No sections found in %s", filename)
return document_id, [], []
chunks = chunk_sections(sections)
logger.info("Processed '%s': %d sections → %d chunks", filename, len(sections), len(chunks))
return document_id, sections, chunks
+22
View File
@@ -0,0 +1,22 @@
import logging
from sentence_transformers import SentenceTransformer
logger = logging.getLogger(__name__)
class EmbeddingService:
def __init__(self, model_name: str = "intfloat/multilingual-e5-large"):
logger.info("Loading embedding model: %s", model_name)
self.model = SentenceTransformer(model_name)
self.model_name = model_name
logger.info("Embedding model loaded (dim=%d)", self.model.get_sentence_embedding_dimension())
def embed_documents(self, texts: list[str]) -> list[list[float]]:
prefixed = [f"passage: {t}" for t in texts]
embeddings = self.model.encode(prefixed, normalize_embeddings=True, show_progress_bar=False)
return embeddings.tolist()
def embed_query(self, query: str) -> list[float]:
embedding = self.model.encode(f"query: {query}", normalize_embeddings=True)
return embedding.tolist()
+104
View File
@@ -0,0 +1,104 @@
import logging
import httpx
from config import settings
logger = logging.getLogger(__name__)
DEFAULT_SYSTEM_PROMPT = """Ты — виртуальный ассистент клиники, который первым отвечает пациентам в чате.
Твоя задача — помочь пациенту по бытовым и организационным вопросам: запись, расписание врачей, подготовка к приёму, как проехать, документы, оплата, ДМС, детский приём и т. п.
Правила:
- Отвечай коротко, дружелюбно, на «вы», простым русским языком без медицинской латыни.
- Опирайся ТОЛЬКО на предоставленные выдержки из базы знаний. Если ответа в них нет — честно скажи, что уточнишь у оператора, и предложи подключить оператора.
- Не ставь диагнозы и не назначай лечение. Если вопрос про симптомы, лекарства, дозировки или «что со мной» — мягко предложи записаться к врачу и подключить оператора, если нужно.
- Не выдумывай телефоны, адреса, цены, имена врачей, расписание. Только из источников.
- Если пациент просит оператора — коротко подтверди, что сейчас его подключишь.
- Источники указывать не нужно: пациент их не видит. Ответ — обычный текст, как в чате."""
DEFAULT_USER_TEMPLATE = """Вопрос пациента:
{question}
Выдержки из базы знаний операторов:
{sources}
Ответь пациенту в чате по правилам из системного сообщения."""
class LLMClient:
def __init__(
self,
api_key: str | None = None,
model: str | None = None,
base_url: str | None = None,
):
self.api_key = api_key or settings.deepseek_api_key
self.model = model or settings.deepseek_model
self.base_url = (base_url or settings.deepseek_base_url).rstrip("/")
def _format_sources(self, sources: list[dict]) -> str:
if not sources:
return "(источники не найдены)"
lines = []
for i, src in enumerate(sources, 1):
meta = src.get("metadata", {})
doc_name = meta.get("document_name", "Документ")
section = meta.get("section", "")
lines.append(
f"[{i}] {src['text']}\n"
f" (Источник: {doc_name}, раздел: {section})"
)
return "\n".join(lines)
async def answer(
self,
question: str,
sources: list[dict],
system_prompt: str | None = None,
temperature: float | None = None,
max_tokens: int | None = None,
) -> dict:
"""Generate a patient-facing answer using RAG context.
Returns dict with 'text' and 'assembled_prompt'.
"""
effective_system = system_prompt or DEFAULT_SYSTEM_PROMPT
effective_temp = temperature if temperature is not None else 0.2
effective_max_tokens = max_tokens or 1200
formatted_sources = self._format_sources(sources)
user_message = DEFAULT_USER_TEMPLATE.format(
question=question,
sources=formatted_sources,
)
assembled_prompt = f"[SYSTEM]\n{effective_system}\n\n[USER]\n{user_message}"
url = f"{self.base_url}/chat/completions"
payload = {
"model": self.model,
"messages": [
{"role": "system", "content": effective_system},
{"role": "user", "content": user_message},
],
"temperature": effective_temp,
"max_tokens": effective_max_tokens,
}
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(
url,
json=payload,
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
},
)
response.raise_for_status()
data = response.json()
content = data["choices"][0]["message"]["content"]
logger.info("LLM response: %d chars, model=%s, temp=%.2f", len(content), self.model, effective_temp)
return {"text": content.strip(), "assembled_prompt": assembled_prompt}
+52
View File
@@ -0,0 +1,52 @@
import logging
from services.llm_client import LLMClient
from services.vectorstore import VectorStoreService
logger = logging.getLogger(__name__)
async def rag_query(
vectorstore: VectorStoreService,
llm_client: LLMClient,
question: str,
top_k: int = 5,
document_ids: list[str] | None = None,
temperature: float | None = None,
max_tokens: int | None = None,
) -> dict:
"""Pipeline: retrieve → augment → generate для одиночного вопроса пациента."""
logger.info("RAG query: %s", question[:200])
retrieved = vectorstore.query(
query_text=question,
top_k=top_k,
document_ids=document_ids,
)
logger.info("Retrieved %d chunks", len(retrieved))
llm_result = await llm_client.answer(
question=question,
sources=retrieved,
temperature=temperature,
max_tokens=max_tokens,
)
sources = []
for item in retrieved:
meta = item.get("metadata", {})
sources.append({
"document_id": meta.get("document_id", ""),
"document_name": meta.get("document_name", ""),
"chunk_text": item["text"][:500],
"section": meta.get("section", ""),
"page": meta.get("page_number", 0),
"relevance_score": round(item.get("relevance_score", 0), 3),
})
return {
"answer": llm_result["text"],
"sources": sources,
"model_used": llm_client.model,
"assembled_prompt": llm_result["assembled_prompt"],
}
+145
View File
@@ -0,0 +1,145 @@
import logging
from datetime import datetime, timezone
import chromadb
from services.embeddings import EmbeddingService
logger = logging.getLogger(__name__)
COLLECTION_NAME = "operators_wiki"
class VectorStoreService:
def __init__(self, persist_dir: str, embedding_service: EmbeddingService):
self.client = chromadb.PersistentClient(path=persist_dir)
self.embedding_service = embedding_service
self.collection = self.client.get_or_create_collection(
name=COLLECTION_NAME,
metadata={"hnsw:space": "cosine"},
)
logger.info("ChromaDB collection '%s': %d items", COLLECTION_NAME, self.collection.count())
def add_document(
self,
document_id: str,
document_name: str,
file_type: str,
chunks: list[dict],
) -> int:
if not chunks:
return 0
texts = [c["text"] for c in chunks]
embeddings = self.embedding_service.embed_documents(texts)
ids = []
metadatas = []
now = datetime.now(timezone.utc).isoformat()
for i, chunk in enumerate(chunks):
ids.append(f"{document_id}_chunk_{i}")
metadatas.append({
"document_id": document_id,
"document_name": document_name,
"file_type": file_type,
"section": chunk.get("section", ""),
"page_number": chunk.get("page_number", 0),
"chunk_index": i,
"created_at": now,
})
self.collection.add(
ids=ids,
embeddings=embeddings,
documents=texts,
metadatas=metadatas,
)
logger.info("Added %d chunks for document '%s'", len(chunks), document_name)
return len(chunks)
def query(
self,
query_text: str,
top_k: int = 5,
document_ids: list[str] | None = None,
) -> list[dict]:
query_embedding = self.embedding_service.embed_query(query_text)
where_filter = None
if document_ids:
if len(document_ids) == 1:
where_filter = {"document_id": document_ids[0]}
else:
where_filter = {"document_id": {"$in": document_ids}}
results = self.collection.query(
query_embeddings=[query_embedding],
n_results=top_k,
where=where_filter,
include=["documents", "metadatas", "distances"],
)
items = []
if results["ids"] and results["ids"][0]:
for i, chunk_id in enumerate(results["ids"][0]):
items.append({
"chunk_id": chunk_id,
"text": results["documents"][0][i],
"metadata": results["metadatas"][0][i],
"distance": results["distances"][0][i],
"relevance_score": 1 - results["distances"][0][i],
})
return items
def delete_document(self, document_id: str) -> int:
existing = self.collection.get(where={"document_id": document_id}, include=[])
count = len(existing["ids"])
if count > 0:
self.collection.delete(ids=existing["ids"])
logger.info("Deleted %d chunks for document_id=%s", count, document_id)
return count
def list_documents(self) -> list[dict]:
all_items = self.collection.get(include=["metadatas"])
docs: dict[str, dict] = {}
for meta in all_items["metadatas"]:
doc_id = meta["document_id"]
if doc_id not in docs:
docs[doc_id] = {
"document_id": doc_id,
"name": meta.get("document_name", ""),
"file_type": meta.get("file_type", ""),
"created_at": meta.get("created_at", ""),
"chunks_count": 0,
"metadata": {},
}
docs[doc_id]["chunks_count"] += 1
return list(docs.values())
def get_document_chunks(self, document_id: str) -> list[dict]:
"""Return all chunks for a document, sorted by chunk_index."""
results = self.collection.get(
where={"document_id": document_id},
include=["documents", "metadatas"],
)
items = []
if results["ids"]:
for i, chunk_id in enumerate(results["ids"]):
items.append({
"chunk_id": chunk_id,
"text": results["documents"][i],
"metadata": results["metadatas"][i],
})
items.sort(key=lambda x: x["metadata"].get("chunk_index", 0))
return items
def get_stats(self) -> dict:
all_items = self.collection.get(include=["metadatas"])
doc_ids = set()
for meta in all_items["metadatas"]:
doc_ids.add(meta.get("document_id", ""))
return {
"documents_count": len(doc_ids),
"chunks_count": self.collection.count(),
}
+571
View File
@@ -0,0 +1,571 @@
<!DOCTYPE html>
<html lang="ru">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Chat Agent for Patients — Debug UI</title>
<style>
:root {
--bg: #f5f6f8;
--panel: #ffffff;
--border: #e1e4ea;
--muted: #6b7280;
--fg: #111827;
--accent: #2563eb;
--accent-hover: #1d4ed8;
--ok: #16a34a;
--warn: #d97706;
--err: #dc2626;
--chip-bg: #eef2ff;
--mono: ui-monospace, SFMono-Regular, Menlo, monospace;
}
* { box-sizing: border-box; }
body {
margin: 0;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
background: var(--bg);
color: var(--fg);
font-size: 14px;
line-height: 1.5;
}
header {
background: var(--panel);
border-bottom: 1px solid var(--border);
padding: 14px 24px;
display: flex;
align-items: center;
gap: 16px;
position: sticky;
top: 0;
z-index: 10;
}
header h1 {
margin: 0;
font-size: 16px;
font-weight: 600;
}
.status {
display: inline-flex;
align-items: center;
gap: 6px;
padding: 4px 10px;
border-radius: 999px;
background: var(--chip-bg);
font-size: 13px;
}
.dot {
width: 8px;
height: 8px;
border-radius: 50%;
background: var(--muted);
}
.dot.ok { background: var(--ok); }
.dot.warn { background: var(--warn); }
.dot.err { background: var(--err); }
.stats {
margin-left: auto;
font-size: 13px;
color: var(--muted);
}
.stats b { color: var(--fg); }
main {
padding: 24px;
max-width: 1400px;
margin: 0 auto;
display: grid;
gap: 24px;
}
.panel {
background: var(--panel);
border: 1px solid var(--border);
border-radius: 12px;
padding: 20px;
}
.panel h2 {
margin: 0 0 16px 0;
font-size: 15px;
font-weight: 600;
}
.dropzone {
border: 2px dashed var(--border);
border-radius: 10px;
padding: 28px;
text-align: center;
color: var(--muted);
cursor: pointer;
transition: all 0.15s;
}
.dropzone.drag { border-color: var(--accent); background: var(--chip-bg); color: var(--fg); }
.dropzone input { display: none; }
table {
width: 100%;
border-collapse: collapse;
margin-top: 16px;
font-size: 13px;
}
th, td {
text-align: left;
padding: 8px 10px;
border-bottom: 1px solid var(--border);
vertical-align: top;
}
th {
font-weight: 600;
color: var(--muted);
font-size: 12px;
text-transform: uppercase;
letter-spacing: 0.03em;
}
tr:last-child td { border-bottom: none; }
tr.doc-row { cursor: pointer; }
tr.doc-row:hover td:not(:last-child) { background: #f9fafb; }
tr.doc-row .arrow { display: inline-block; transition: transform 0.15s; color: var(--muted); margin-right: 6px; }
tr.doc-row.open .arrow { transform: rotate(90deg); }
tr.chunks-row td { padding: 0; background: #fafbfd; }
tr.chunks-row .chunks-body { padding: 14px 16px; }
.chunk-card {
background: white;
border: 1px solid var(--border);
border-radius: 8px;
padding: 10px 12px;
margin-bottom: 8px;
}
.chunk-card-meta {
font-size: 11px;
color: var(--muted);
margin-bottom: 6px;
display: flex;
gap: 10px;
flex-wrap: wrap;
}
.chunk-card-text {
white-space: pre-wrap;
font-size: 13px;
word-break: break-word;
}
.chunk-card-toggle {
color: var(--accent);
cursor: pointer;
font-size: 12px;
border: none;
background: none;
padding: 4px 0 0 0;
}
.empty {
padding: 20px;
color: var(--muted);
text-align: center;
font-style: italic;
}
button {
font: inherit;
padding: 8px 14px;
border-radius: 8px;
border: 1px solid var(--border);
background: var(--panel);
cursor: pointer;
}
button.primary {
background: var(--accent);
border-color: var(--accent);
color: white;
}
button.primary:hover:not(:disabled) { background: var(--accent-hover); }
button.danger {
color: var(--err);
border-color: var(--err);
background: transparent;
font-size: 12px;
padding: 4px 10px;
}
button:disabled { opacity: 0.5; cursor: not-allowed; }
.row { display: flex; gap: 12px; align-items: center; flex-wrap: wrap; }
.row label { color: var(--muted); font-size: 13px; }
textarea, input[type=number], input[type=text] {
font: inherit;
padding: 8px 10px;
border: 1px solid var(--border);
border-radius: 8px;
width: 100%;
background: white;
}
textarea { min-height: 90px; resize: vertical; }
.num { width: 80px; }
.columns {
display: grid;
grid-template-columns: 1fr 1fr 1fr;
gap: 16px;
margin-top: 16px;
}
.col {
border: 1px solid var(--border);
border-radius: 10px;
padding: 14px;
background: #fafbfd;
min-height: 200px;
overflow: hidden;
}
.col h3 {
margin: 0 0 10px 0;
font-size: 12px;
font-weight: 600;
color: var(--muted);
text-transform: uppercase;
letter-spacing: 0.03em;
}
.chunk {
border: 1px solid var(--border);
border-radius: 8px;
padding: 10px;
margin-bottom: 8px;
background: white;
}
.chunk-head {
display: flex;
justify-content: space-between;
font-size: 11px;
color: var(--muted);
margin-bottom: 6px;
}
.score {
background: var(--chip-bg);
padding: 1px 6px;
border-radius: 999px;
color: var(--accent);
font-weight: 600;
}
pre {
font-family: var(--mono);
font-size: 12px;
white-space: pre-wrap;
word-break: break-word;
margin: 0;
background: white;
padding: 10px;
border: 1px solid var(--border);
border-radius: 8px;
max-height: 500px;
overflow: auto;
}
.answer {
background: white;
border: 1px solid var(--border);
border-radius: 8px;
padding: 14px;
white-space: pre-wrap;
word-break: break-word;
line-height: 1.6;
}
.answer-meta {
margin-top: 8px;
font-size: 11px;
color: var(--muted);
}
.toast {
position: fixed;
bottom: 20px;
right: 20px;
background: #111827;
color: white;
padding: 10px 16px;
border-radius: 8px;
font-size: 13px;
opacity: 0;
transition: opacity 0.2s;
pointer-events: none;
}
.toast.show { opacity: 1; }
.toast.err { background: var(--err); }
.mini { font-size: 12px; color: var(--muted); }
.spinner {
width: 14px; height: 14px;
border: 2px solid var(--border);
border-top-color: var(--accent);
border-radius: 50%;
display: inline-block;
animation: spin 0.8s linear infinite;
vertical-align: middle;
}
@keyframes spin { to { transform: rotate(360deg); } }
@media (max-width: 900px) {
.columns { grid-template-columns: 1fr; }
}
</style>
</head>
<body>
<header>
<h1>Chat Agent for Patients — Debug</h1>
<span class="status"><span class="dot" id="dot"></span><span id="status-text">проверяю…</span></span>
<span class="stats" id="stats"></span>
</header>
<main>
<section class="panel">
<h2>База знаний</h2>
<div id="dropzone" class="dropzone">
Перетащи файл (.pdf, .docx, .txt, .md) или кликни для выбора
<input type="file" id="file-input" accept=".pdf,.docx,.doc,.txt,.md">
</div>
<div id="upload-status" class="mini" style="margin-top:10px;"></div>
<table>
<thead>
<tr>
<th>Имя</th>
<th>Тип</th>
<th>Чанков</th>
<th>Загружен</th>
<th></th>
</tr>
</thead>
<tbody id="docs-tbody">
<tr><td colspan="5" class="empty">Документы ещё не загружены</td></tr>
</tbody>
</table>
</section>
<section class="panel">
<h2>Тест-вопрос от пациента</h2>
<textarea id="question" placeholder="Например: как записать ребёнка к лору?"></textarea>
<div class="row" style="margin-top:12px;">
<label>top_k <input type="number" class="num" id="top_k" value="5" min="1" max="20"></label>
<label>temperature <input type="number" class="num" id="temperature" value="0.2" min="0" max="2" step="0.1"></label>
<button class="primary" id="ask-btn">Отправить</button>
<span id="ask-status" class="mini"></span>
</div>
<div class="columns">
<div class="col">
<h3>Что нашёл RAG</h3>
<div id="col-chunks"><div class="mini">— пока пусто —</div></div>
</div>
<div class="col">
<h3>Собранный промпт</h3>
<div id="col-prompt"><div class="mini">— пока пусто —</div></div>
</div>
<div class="col">
<h3>Ответ агента</h3>
<div id="col-answer"><div class="mini">— пока пусто —</div></div>
</div>
</div>
</section>
</main>
<div class="toast" id="toast"></div>
<script>
const API = "";
const $ = (id) => document.getElementById(id);
const esc = (s) => String(s ?? "").replace(/[&<>"']/g, c => ({'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'}[c]));
function toast(msg, kind = "ok") {
const t = $("toast");
t.textContent = msg;
t.className = "toast show" + (kind === "err" ? " err" : "");
setTimeout(() => t.className = "toast", 2500);
}
async function api(path, opts = {}) {
const res = await fetch(API + path, opts);
if (!res.ok) {
let detail = res.statusText;
try { const j = await res.json(); detail = j.detail || detail; } catch {}
throw new Error(detail);
}
return res.json();
}
async function refreshHealth() {
try {
const h = await api("/health");
$("dot").className = "dot " + (h.status === "ok" ? "ok" : "warn");
$("status-text").textContent = h.status === "ok" ? "готов" : h.status;
$("stats").innerHTML = `модель <b>${esc(h.embedding_model)}</b> · документов <b>${h.documents_count}</b> · чанков <b>${h.chunks_count}</b>`;
} catch (e) {
$("dot").className = "dot err";
$("status-text").textContent = "недоступен";
$("stats").textContent = "";
}
}
async function refreshDocs() {
try {
const r = await api("/documents");
const tbody = $("docs-tbody");
if (!r.documents.length) {
tbody.innerHTML = '<tr><td colspan="5" class="empty">Документы ещё не загружены</td></tr>';
return;
}
tbody.innerHTML = r.documents.map(d => `
<tr class="doc-row" id="doc-${d.document_id}" onclick="toggleChunks('${d.document_id}')">
<td><span class="arrow">▶</span>${esc(d.name)}</td>
<td>${esc(d.file_type)}</td>
<td>${d.chunks_count}</td>
<td class="mini">${esc((d.created_at || "").slice(0, 19).replace("T", " "))}</td>
<td><button class="danger" onclick="event.stopPropagation(); deleteDoc('${d.document_id}', '${esc(d.name)}')">удалить</button></td>
</tr>
<tr class="chunks-row" id="chunks-${d.document_id}" style="display:none;">
<td colspan="5"><div class="chunks-body"><div class="mini">загружаю…</div></div></td>
</tr>
`).join("");
} catch (e) {
toast("Не удалось загрузить список: " + e.message, "err");
}
}
async function toggleChunks(id) {
const row = $("doc-" + id);
const chunksRow = $("chunks-" + id);
const isOpen = chunksRow.style.display !== "none";
if (isOpen) {
chunksRow.style.display = "none";
row.classList.remove("open");
return;
}
chunksRow.style.display = "";
row.classList.add("open");
const body = chunksRow.querySelector(".chunks-body");
body.innerHTML = '<div class="mini">загружаю…</div>';
try {
const d = await api(`/documents/${id}/chunks`);
if (!d.chunks.length) {
body.innerHTML = '<div class="mini">чанков нет</div>';
return;
}
body.innerHTML = d.chunks.map(c => {
const long = c.text.length > 300;
const short = long ? c.text.slice(0, 300) + "…" : c.text;
const safeFull = esc(c.text).replace(/"/g, "&quot;");
return `
<div class="chunk-card">
<div class="chunk-card-meta">
<span>#${c.index}</span>
<span>раздел: ${esc(c.section || "—")}</span>
${c.page_number ? `<span>стр. ${c.page_number}</span>` : ""}
<span>${c.char_length.toLocaleString("ru")} симв.</span>
</div>
<div class="chunk-card-text" data-short="${esc(short).replace(/"/g, "&quot;")}" data-full="${safeFull}" data-expanded="0">${esc(short)}</div>
${long ? '<button class="chunk-card-toggle" onclick="toggleChunkText(this)">показать полностью</button>' : ""}
</div>
`;
}).join("");
} catch (e) {
body.innerHTML = `<div class="mini" style="color:var(--err)">Ошибка: ${esc(e.message)}</div>`;
}
}
function toggleChunkText(btn) {
const textEl = btn.previousElementSibling;
const expanded = textEl.getAttribute("data-expanded") === "1";
textEl.textContent = expanded ? textEl.getAttribute("data-short") : textEl.getAttribute("data-full");
textEl.setAttribute("data-expanded", expanded ? "0" : "1");
btn.textContent = expanded ? "показать полностью" : "свернуть";
}
async function deleteDoc(id, name) {
if (!confirm(`Удалить документ «${name}»?`)) return;
try {
await api(`/documents/${id}`, { method: "DELETE" });
toast("Удалён");
refreshDocs(); refreshHealth();
} catch (e) {
toast("Ошибка: " + e.message, "err");
}
}
async function uploadFile(file) {
$("upload-status").innerHTML = `<span class="spinner"></span> загружаю <b>${esc(file.name)}</b>…`;
const fd = new FormData();
fd.append("file", file);
try {
const r = await api("/documents/upload", { method: "POST", body: fd });
$("upload-status").innerHTML = `✓ <b>${esc(r.name)}</b> — ${r.chunks_count} чанков`;
toast("Загружено");
refreshDocs(); refreshHealth();
} catch (e) {
$("upload-status").innerHTML = `✕ ошибка: ${esc(e.message)}`;
toast("Ошибка загрузки: " + e.message, "err");
}
}
function initDropzone() {
const dz = $("dropzone");
const input = $("file-input");
dz.addEventListener("click", () => input.click());
input.addEventListener("change", () => {
if (input.files[0]) uploadFile(input.files[0]);
input.value = "";
});
["dragenter", "dragover"].forEach(e =>
dz.addEventListener(e, ev => { ev.preventDefault(); dz.classList.add("drag"); })
);
["dragleave", "drop"].forEach(e =>
dz.addEventListener(e, ev => { ev.preventDefault(); dz.classList.remove("drag"); })
);
dz.addEventListener("drop", ev => {
if (ev.dataTransfer.files[0]) uploadFile(ev.dataTransfer.files[0]);
});
}
async function ask() {
const question = $("question").value.trim();
if (!question) { toast("Введите вопрос", "err"); return; }
const btn = $("ask-btn");
btn.disabled = true;
$("ask-status").innerHTML = '<span class="spinner"></span> думаю…';
$("col-chunks").innerHTML = '<div class="mini">…</div>';
$("col-prompt").innerHTML = '<div class="mini">…</div>';
$("col-answer").innerHTML = '<div class="mini">…</div>';
try {
const r = await api("/query", {
method: "POST",
headers: {"Content-Type": "application/json"},
body: JSON.stringify({
text: question,
top_k: parseInt($("top_k").value, 10) || 5,
temperature: parseFloat($("temperature").value),
}),
});
$("col-chunks").innerHTML = r.sources.length
? r.sources.map((s, i) => `
<div class="chunk">
<div class="chunk-head">
<span>[${i + 1}] ${esc(s.document_name)} · ${esc(s.section || "—")}</span>
<span class="score">${(s.relevance_score * 100).toFixed(1)}%</span>
</div>
<div>${esc(s.chunk_text)}</div>
</div>`).join("")
: '<div class="mini">— нет релевантных чанков —</div>';
$("col-prompt").innerHTML = `<pre>${esc(r.assembled_prompt)}</pre>`;
$("col-answer").innerHTML = `
<div class="answer">${esc(r.answer)}</div>
<div class="answer-meta">модель: ${esc(r.model_used)} · источников: ${r.sources.length}</div>
`;
$("ask-status").textContent = "";
} catch (e) {
$("col-answer").innerHTML = `<div class="mini" style="color:var(--err)">Ошибка: ${esc(e.message)}</div>`;
$("ask-status").textContent = "";
toast("Ошибка: " + e.message, "err");
} finally {
btn.disabled = false;
}
}
$("ask-btn").addEventListener("click", ask);
$("question").addEventListener("keydown", e => {
if ((e.metaKey || e.ctrlKey) && e.key === "Enter") ask();
});
initDropzone();
refreshHealth();
refreshDocs();
setInterval(refreshHealth, 15000);
</script>
</body>
</html>