Files
RAG_helper/models/responses.py
T
AR 15 M4 bb5e3f5eb3 feat(sprint8b): регрессия ответов веток · general_info + фикс PRAGMA foreign_keys
Параллель к 8a, но проверяем не код intent от роутера, а содержимое ответа
конкретной ветки на одиночную реплику. Старт — general_info, 46 кейсов.

Логика pass/fail (для одного кейса):
- A — RAG-секция: среди retrieved-чанков есть кусок с
  section == expected_doc_section (точное совпадение). Если поле не задано —
  пропускаем.
- B — keywords: обязательные expected_keywords встречаются в predicted_answer
  (case-insensitive). По умолчанию все; поддерживаются keywords_min: N
  и keywords_any: true. Запрещённые expected_must_not — ни одного.
- Pass = A ∧ B. Незаданные поля не проверяются.
- Кэш: (text_hash, branch_config_id) → {answer_text, retrieved_sections}.
  Привязан к версии промпта ветки. Смена версии = пустой кэш = свежий прогон.
  Правка JSONL без изменения text → pass/fail пересчитывается без LLM.

Backend:
- Таблицы eval_branch_runs / eval_branch_run_cases / eval_branch_predictions.
  Миграция m9g1f7e89j56.
- services/eval_branch_run_service.py: загрузка JSONL, фоновый прогон через
  asyncio.create_task, кэш, оценка A+B с поддержкой keywords_min/keywords_any.
- chat_service.run_branch_single_turn — изолированный single-turn без
  роутера и треда (использует существующий config_service + vectorstore + llm).
- API: POST /eval/branch-runs, GET /eval/branch-runs?intent_code=,
  GET /eval/branch-runs/{id}, GET /eval/branch-cases-with-status?intent_code=.

UI (static/regression.html):
- Селектор режима «Роутер / Ветка · general_info». Логика пикера переиспользуется
  (фильтры, диапазон, массовый выбор, счётчик «новые / в кэше»).
- Для режима «Ветка»: фильтр по coverage, колонки секция/coverage, keywords,
  частота, кэш. Drill-down прогона: ожидание, retrieved-секции, причины fail,
  полный ответ ветки.

База кейсов (eval/branch_cases_general_info.jsonl) — от пользователя, 46 кейсов
по схеме {text, intent, coverage, expected_doc_section?, expected_keywords?,
expected_must_not?, keywords_min?, keywords_any?, count?, note?}.

Связанная правка SQLite (нашли при удалении документа в этом спринте):
- db/session.py: connect-listener PRAGMA foreign_keys=ON на каждое подключение.
  Без этого ondelete=CASCADE в SQLite не enforced, и удаление документа
  оставляло подписки в intent_documents висячими (что давало пустой RAG
  и fail регрессии).
- Миграция n0h2g8f9a0k67 — одноразовая чистка существующих висячих подписок.

docs/SPRINTS.md: Спринт 8b →  Закрыт. Diff vs предыдущий прогон для веток
и кнопка «Сбросить кэш регрессии» вынесены в docs/BACKLOG.md.

Также включены обновлённые data/datasets/general_info.md и price_question.md
(рабочий материал оператора), и черновик eval/branch_cases_price_question.jsonl
для следующего захода (8b на price_question).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 01:20:59 +05:00

350 lines
7.7 KiB
Python

from pydantic import BaseModel, Field
class DocumentInfo(BaseModel):
document_id: str
name: str
chunks_count: int
file_type: str
created_at: str
metadata: dict = Field(default_factory=dict)
class ChunkPreview(BaseModel):
index: int
section: str = ""
page_number: int = 0
text_preview: str = ""
char_length: int = 0
class DocumentUploadResponse(BaseModel):
document_id: str
name: str
chunks_count: int
status: str = "indexed"
created_at: str
chunks_preview: list[ChunkPreview] = Field(default_factory=list)
class DocumentListResponse(BaseModel):
documents: list[DocumentInfo]
total: int
class ChunkDetail(BaseModel):
index: int
section: str = ""
page_number: int = 0
text: str = ""
char_length: int = 0
embedding: list[float] = Field(default_factory=list)
embedding_dim: int = 0
class DocumentChunksResponse(BaseModel):
document_id: str
name: str
file_type: str
chunks_count: int
chunks: list[ChunkDetail] = Field(default_factory=list)
class DocumentDeleteResponse(BaseModel):
ok: bool = True
deleted_chunks: int
class IntentDocumentsResponse(BaseModel):
intent_code: str
document_ids: list[str] = Field(default_factory=list)
class DocumentIntentsResponse(BaseModel):
document_id: str
intent_codes: list[str] = Field(default_factory=list)
class SourceInfo(BaseModel):
document_id: str
document_name: str
chunk_text: str
section: str = ""
page: int = 0
relevance_score: float = 0.0
class QueryResponse(BaseModel):
answer: str
sources: list[SourceInfo]
model_used: str
assembled_prompt: str = ""
intent_code: str = "_debug"
config_version: int | None = None
rag_subscription: dict | None = None # {"subscribed_count": int, "found_count": int} — Спринт 7
class HealthResponse(BaseModel):
status: str = "ok"
chromadb: str
embedding_model: str
documents_count: int
chunks_count: int
class MessageInfo(BaseModel):
id: int
role: str
text: str
created_at: str
sources: list[SourceInfo] = Field(default_factory=list)
assembled_prompt: str = ""
intent_code: str = ""
intent_name: str = ""
meta: dict | None = None
escalation_reason: str | None = None
class ThreadInfo(BaseModel):
id: int
name: str
created_at: str
updated_at: str
messages_count: int
first_message_preview: str = ""
class ThreadListResponse(BaseModel):
threads: list[ThreadInfo]
total: int
class ThreadStateInfo(BaseModel):
current_intent_code: str | None = None
current_step: int = 0
current_step_code: str | None = None
slots: dict = Field(default_factory=dict)
handoff_count: int = 0
soft_insertion_count: int = 0
suspended_intent: str | None = None
resumable_step_code: str | None = None
resumable_slots: dict = Field(default_factory=dict)
pending_guard: dict | None = None
class BounceInfo(BaseModel):
from_: str = Field(alias="from")
to: str
preface: str = ""
model_config = {"populate_by_name": True}
class ValidationEventInfo(BaseModel):
current_step: str
requested_step: str
reason: str
guard_name: str | None = None
missing_slots: list[str] = Field(default_factory=list)
guard_description: str = ""
class ThreadDetailResponse(BaseModel):
id: int
name: str
created_at: str
updated_at: str
messages: list[MessageInfo] = Field(default_factory=list)
thread_state: ThreadStateInfo | None = None
class ChatResponse(BaseModel):
thread_id: int
thread_name: str
message_id: int
intent_code: str = ""
intent_name: str = ""
router_intent_code: str = ""
config_version: int = 0
router_version: int | None = None
answer: str
sources: list[SourceInfo]
model_used: str
assembled_prompt: str = ""
thread_state: ThreadStateInfo = Field(default_factory=ThreadStateInfo)
bounces: list[BounceInfo] = Field(default_factory=list)
validation_events: list[ValidationEventInfo] = Field(default_factory=list)
parse_error: str | None = None
routing_loop_triggered: bool = False
resumed_from_suspended: bool = False
message_meta: dict | None = None
escalation_reason: str | None = None
operator_summary: dict | None = None
router_assembled_prompt: str = ""
rag_subscription: dict | None = None # {"subscribed_count": int, "found_count": int} — Спринт 7
class ThreadDeleteResponse(BaseModel):
ok: bool = True
deleted_messages: int
class AgentConfigInfo(BaseModel):
id: int
intent_id: int | None = None
intent_code: str = ""
intent_name: str = ""
version: int
name: str | None = None
system_prompt: str
rules_text: str = ""
exit_conditions_text: str = ""
is_active: bool
created_at: str
class AgentConfigListResponse(BaseModel):
configs: list[AgentConfigInfo]
total: int
class AgentConfigDeleteResponse(BaseModel):
ok: bool = True
class IntentInfo(BaseModel):
id: int
code: str
name: str
description: str = ""
is_enabled: bool
order_index: int
active_config_id: int | None = None
active_config_version: int | None = None
class IntentListResponse(BaseModel):
intents: list[IntentInfo]
total: int
class IntentStepInfo(BaseModel):
id: int
intent_id: int
intent_code: str = ""
code: str
name: str
order_index: int
system_prompt: str = ""
allowed_next: list[str] = Field(default_factory=list)
guards: dict = Field(default_factory=dict)
updated_at: str
class IntentStepListResponse(BaseModel):
intent_code: str
steps: list[IntentStepInfo]
total: int
class IntentStepGraphInfo(BaseModel):
id: int
intent_code: str
version: int
name: str
is_active: bool
steps_count: int
created_at: str
class IntentStepGraphListResponse(BaseModel):
intent_code: str
graphs: list[IntentStepGraphInfo]
active_graph_id: int | None
total: int
# ---------- Прогоны регрессии (Спринт 8a) ----------
class EvalRunInfo(BaseModel):
id: int
suite: str
router_config_id: int | None
router_config_version: int | None
min_count: int
status: str
total: int
passed: int
failed: int
cache_hits: int
error_text: str | None
started_at: str
finished_at: str | None
class EvalRunCaseInfo(BaseModel):
text: str
expected_intent: str
predicted_intent: str
count_weight: int
is_pass: bool = True
class EvalRunDiffInfo(BaseModel):
prev_run_id: int | None
new_fails: list[EvalRunCaseInfo]
new_passes: list[EvalRunCaseInfo]
class EvalRunDetailResponse(BaseModel):
run: EvalRunInfo
cases: list[EvalRunCaseInfo]
diff: EvalRunDiffInfo
class EvalRunListResponse(BaseModel):
runs: list[EvalRunInfo]
total: int
# ---------- Регрессия веток (Спринт 8b) ----------
class EvalBranchRunInfo(BaseModel):
id: int
suite: str
intent_code: str
branch_config_id: int | None
branch_config_version: int | None
status: str
total: int
passed: int
failed: int
cache_hits: int
error_text: str | None
started_at: str
finished_at: str | None
class EvalBranchRunCaseInfo(BaseModel):
text: str
coverage: str
expected_doc_section: str | None
expected_keywords: list[str]
expected_must_not: list[str]
keywords_min: int | None
predicted_answer: str
predicted_sections: list[dict]
is_pass: bool
fail_reasons: list[str]
count_weight: int
class EvalBranchRunDetailResponse(BaseModel):
run: EvalBranchRunInfo
cases: list[EvalBranchRunCaseInfo]
class EvalBranchRunListResponse(BaseModel):
runs: list[EvalBranchRunInfo]
total: int