bb5e3f5eb3
Параллель к 8a, но проверяем не код intent от роутера, а содержимое ответа
конкретной ветки на одиночную реплику. Старт — general_info, 46 кейсов.
Логика pass/fail (для одного кейса):
- A — RAG-секция: среди retrieved-чанков есть кусок с
section == expected_doc_section (точное совпадение). Если поле не задано —
пропускаем.
- B — keywords: обязательные expected_keywords встречаются в predicted_answer
(case-insensitive). По умолчанию все; поддерживаются keywords_min: N
и keywords_any: true. Запрещённые expected_must_not — ни одного.
- Pass = A ∧ B. Незаданные поля не проверяются.
- Кэш: (text_hash, branch_config_id) → {answer_text, retrieved_sections}.
Привязан к версии промпта ветки. Смена версии = пустой кэш = свежий прогон.
Правка JSONL без изменения text → pass/fail пересчитывается без LLM.
Backend:
- Таблицы eval_branch_runs / eval_branch_run_cases / eval_branch_predictions.
Миграция m9g1f7e89j56.
- services/eval_branch_run_service.py: загрузка JSONL, фоновый прогон через
asyncio.create_task, кэш, оценка A+B с поддержкой keywords_min/keywords_any.
- chat_service.run_branch_single_turn — изолированный single-turn без
роутера и треда (использует существующий config_service + vectorstore + llm).
- API: POST /eval/branch-runs, GET /eval/branch-runs?intent_code=,
GET /eval/branch-runs/{id}, GET /eval/branch-cases-with-status?intent_code=.
UI (static/regression.html):
- Селектор режима «Роутер / Ветка · general_info». Логика пикера переиспользуется
(фильтры, диапазон, массовый выбор, счётчик «новые / в кэше»).
- Для режима «Ветка»: фильтр по coverage, колонки секция/coverage, keywords,
частота, кэш. Drill-down прогона: ожидание, retrieved-секции, причины fail,
полный ответ ветки.
База кейсов (eval/branch_cases_general_info.jsonl) — от пользователя, 46 кейсов
по схеме {text, intent, coverage, expected_doc_section?, expected_keywords?,
expected_must_not?, keywords_min?, keywords_any?, count?, note?}.
Связанная правка SQLite (нашли при удалении документа в этом спринте):
- db/session.py: connect-listener PRAGMA foreign_keys=ON на каждое подключение.
Без этого ondelete=CASCADE в SQLite не enforced, и удаление документа
оставляло подписки в intent_documents висячими (что давало пустой RAG
и fail регрессии).
- Миграция n0h2g8f9a0k67 — одноразовая чистка существующих висячих подписок.
docs/SPRINTS.md: Спринт 8b → ✅ Закрыт. Diff vs предыдущий прогон для веток
и кнопка «Сбросить кэш регрессии» вынесены в docs/BACKLOG.md.
Также включены обновлённые data/datasets/general_info.md и price_question.md
(рабочий материал оператора), и черновик eval/branch_cases_price_question.jsonl
для следующего захода (8b на price_question).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
350 lines
7.7 KiB
Python
350 lines
7.7 KiB
Python
from pydantic import BaseModel, Field
|
|
|
|
|
|
class DocumentInfo(BaseModel):
|
|
document_id: str
|
|
name: str
|
|
chunks_count: int
|
|
file_type: str
|
|
created_at: str
|
|
metadata: dict = Field(default_factory=dict)
|
|
|
|
|
|
class ChunkPreview(BaseModel):
|
|
index: int
|
|
section: str = ""
|
|
page_number: int = 0
|
|
text_preview: str = ""
|
|
char_length: int = 0
|
|
|
|
|
|
class DocumentUploadResponse(BaseModel):
|
|
document_id: str
|
|
name: str
|
|
chunks_count: int
|
|
status: str = "indexed"
|
|
created_at: str
|
|
chunks_preview: list[ChunkPreview] = Field(default_factory=list)
|
|
|
|
|
|
class DocumentListResponse(BaseModel):
|
|
documents: list[DocumentInfo]
|
|
total: int
|
|
|
|
|
|
class ChunkDetail(BaseModel):
|
|
index: int
|
|
section: str = ""
|
|
page_number: int = 0
|
|
text: str = ""
|
|
char_length: int = 0
|
|
embedding: list[float] = Field(default_factory=list)
|
|
embedding_dim: int = 0
|
|
|
|
|
|
class DocumentChunksResponse(BaseModel):
|
|
document_id: str
|
|
name: str
|
|
file_type: str
|
|
chunks_count: int
|
|
chunks: list[ChunkDetail] = Field(default_factory=list)
|
|
|
|
|
|
class DocumentDeleteResponse(BaseModel):
|
|
ok: bool = True
|
|
deleted_chunks: int
|
|
|
|
|
|
class IntentDocumentsResponse(BaseModel):
|
|
intent_code: str
|
|
document_ids: list[str] = Field(default_factory=list)
|
|
|
|
|
|
class DocumentIntentsResponse(BaseModel):
|
|
document_id: str
|
|
intent_codes: list[str] = Field(default_factory=list)
|
|
|
|
|
|
class SourceInfo(BaseModel):
|
|
document_id: str
|
|
document_name: str
|
|
chunk_text: str
|
|
section: str = ""
|
|
page: int = 0
|
|
relevance_score: float = 0.0
|
|
|
|
|
|
class QueryResponse(BaseModel):
|
|
answer: str
|
|
sources: list[SourceInfo]
|
|
model_used: str
|
|
assembled_prompt: str = ""
|
|
intent_code: str = "_debug"
|
|
config_version: int | None = None
|
|
rag_subscription: dict | None = None # {"subscribed_count": int, "found_count": int} — Спринт 7
|
|
|
|
|
|
class HealthResponse(BaseModel):
|
|
status: str = "ok"
|
|
chromadb: str
|
|
embedding_model: str
|
|
documents_count: int
|
|
chunks_count: int
|
|
|
|
|
|
class MessageInfo(BaseModel):
|
|
id: int
|
|
role: str
|
|
text: str
|
|
created_at: str
|
|
sources: list[SourceInfo] = Field(default_factory=list)
|
|
assembled_prompt: str = ""
|
|
intent_code: str = ""
|
|
intent_name: str = ""
|
|
meta: dict | None = None
|
|
escalation_reason: str | None = None
|
|
|
|
|
|
class ThreadInfo(BaseModel):
|
|
id: int
|
|
name: str
|
|
created_at: str
|
|
updated_at: str
|
|
messages_count: int
|
|
first_message_preview: str = ""
|
|
|
|
|
|
class ThreadListResponse(BaseModel):
|
|
threads: list[ThreadInfo]
|
|
total: int
|
|
|
|
|
|
class ThreadStateInfo(BaseModel):
|
|
current_intent_code: str | None = None
|
|
current_step: int = 0
|
|
current_step_code: str | None = None
|
|
slots: dict = Field(default_factory=dict)
|
|
handoff_count: int = 0
|
|
soft_insertion_count: int = 0
|
|
suspended_intent: str | None = None
|
|
resumable_step_code: str | None = None
|
|
resumable_slots: dict = Field(default_factory=dict)
|
|
pending_guard: dict | None = None
|
|
|
|
|
|
class BounceInfo(BaseModel):
|
|
from_: str = Field(alias="from")
|
|
to: str
|
|
preface: str = ""
|
|
|
|
model_config = {"populate_by_name": True}
|
|
|
|
|
|
class ValidationEventInfo(BaseModel):
|
|
current_step: str
|
|
requested_step: str
|
|
reason: str
|
|
guard_name: str | None = None
|
|
missing_slots: list[str] = Field(default_factory=list)
|
|
guard_description: str = ""
|
|
|
|
|
|
class ThreadDetailResponse(BaseModel):
|
|
id: int
|
|
name: str
|
|
created_at: str
|
|
updated_at: str
|
|
messages: list[MessageInfo] = Field(default_factory=list)
|
|
thread_state: ThreadStateInfo | None = None
|
|
|
|
|
|
class ChatResponse(BaseModel):
|
|
thread_id: int
|
|
thread_name: str
|
|
message_id: int
|
|
intent_code: str = ""
|
|
intent_name: str = ""
|
|
router_intent_code: str = ""
|
|
config_version: int = 0
|
|
router_version: int | None = None
|
|
answer: str
|
|
sources: list[SourceInfo]
|
|
model_used: str
|
|
assembled_prompt: str = ""
|
|
thread_state: ThreadStateInfo = Field(default_factory=ThreadStateInfo)
|
|
bounces: list[BounceInfo] = Field(default_factory=list)
|
|
validation_events: list[ValidationEventInfo] = Field(default_factory=list)
|
|
parse_error: str | None = None
|
|
routing_loop_triggered: bool = False
|
|
resumed_from_suspended: bool = False
|
|
message_meta: dict | None = None
|
|
escalation_reason: str | None = None
|
|
operator_summary: dict | None = None
|
|
router_assembled_prompt: str = ""
|
|
rag_subscription: dict | None = None # {"subscribed_count": int, "found_count": int} — Спринт 7
|
|
|
|
|
|
class ThreadDeleteResponse(BaseModel):
|
|
ok: bool = True
|
|
deleted_messages: int
|
|
|
|
|
|
class AgentConfigInfo(BaseModel):
|
|
id: int
|
|
intent_id: int | None = None
|
|
intent_code: str = ""
|
|
intent_name: str = ""
|
|
version: int
|
|
name: str | None = None
|
|
system_prompt: str
|
|
rules_text: str = ""
|
|
exit_conditions_text: str = ""
|
|
is_active: bool
|
|
created_at: str
|
|
|
|
|
|
class AgentConfigListResponse(BaseModel):
|
|
configs: list[AgentConfigInfo]
|
|
total: int
|
|
|
|
|
|
class AgentConfigDeleteResponse(BaseModel):
|
|
ok: bool = True
|
|
|
|
|
|
class IntentInfo(BaseModel):
|
|
id: int
|
|
code: str
|
|
name: str
|
|
description: str = ""
|
|
is_enabled: bool
|
|
order_index: int
|
|
active_config_id: int | None = None
|
|
active_config_version: int | None = None
|
|
|
|
|
|
class IntentListResponse(BaseModel):
|
|
intents: list[IntentInfo]
|
|
total: int
|
|
|
|
|
|
class IntentStepInfo(BaseModel):
|
|
id: int
|
|
intent_id: int
|
|
intent_code: str = ""
|
|
code: str
|
|
name: str
|
|
order_index: int
|
|
system_prompt: str = ""
|
|
allowed_next: list[str] = Field(default_factory=list)
|
|
guards: dict = Field(default_factory=dict)
|
|
updated_at: str
|
|
|
|
|
|
class IntentStepListResponse(BaseModel):
|
|
intent_code: str
|
|
steps: list[IntentStepInfo]
|
|
total: int
|
|
|
|
|
|
class IntentStepGraphInfo(BaseModel):
|
|
id: int
|
|
intent_code: str
|
|
version: int
|
|
name: str
|
|
is_active: bool
|
|
steps_count: int
|
|
created_at: str
|
|
|
|
|
|
class IntentStepGraphListResponse(BaseModel):
|
|
intent_code: str
|
|
graphs: list[IntentStepGraphInfo]
|
|
active_graph_id: int | None
|
|
total: int
|
|
|
|
|
|
# ---------- Прогоны регрессии (Спринт 8a) ----------
|
|
|
|
class EvalRunInfo(BaseModel):
|
|
id: int
|
|
suite: str
|
|
router_config_id: int | None
|
|
router_config_version: int | None
|
|
min_count: int
|
|
status: str
|
|
total: int
|
|
passed: int
|
|
failed: int
|
|
cache_hits: int
|
|
error_text: str | None
|
|
started_at: str
|
|
finished_at: str | None
|
|
|
|
|
|
class EvalRunCaseInfo(BaseModel):
|
|
text: str
|
|
expected_intent: str
|
|
predicted_intent: str
|
|
count_weight: int
|
|
is_pass: bool = True
|
|
|
|
|
|
class EvalRunDiffInfo(BaseModel):
|
|
prev_run_id: int | None
|
|
new_fails: list[EvalRunCaseInfo]
|
|
new_passes: list[EvalRunCaseInfo]
|
|
|
|
|
|
class EvalRunDetailResponse(BaseModel):
|
|
run: EvalRunInfo
|
|
cases: list[EvalRunCaseInfo]
|
|
diff: EvalRunDiffInfo
|
|
|
|
|
|
class EvalRunListResponse(BaseModel):
|
|
runs: list[EvalRunInfo]
|
|
total: int
|
|
|
|
|
|
# ---------- Регрессия веток (Спринт 8b) ----------
|
|
|
|
class EvalBranchRunInfo(BaseModel):
|
|
id: int
|
|
suite: str
|
|
intent_code: str
|
|
branch_config_id: int | None
|
|
branch_config_version: int | None
|
|
status: str
|
|
total: int
|
|
passed: int
|
|
failed: int
|
|
cache_hits: int
|
|
error_text: str | None
|
|
started_at: str
|
|
finished_at: str | None
|
|
|
|
|
|
class EvalBranchRunCaseInfo(BaseModel):
|
|
text: str
|
|
coverage: str
|
|
expected_doc_section: str | None
|
|
expected_keywords: list[str]
|
|
expected_must_not: list[str]
|
|
keywords_min: int | None
|
|
predicted_answer: str
|
|
predicted_sections: list[dict]
|
|
is_pass: bool
|
|
fail_reasons: list[str]
|
|
count_weight: int
|
|
|
|
|
|
class EvalBranchRunDetailResponse(BaseModel):
|
|
run: EvalBranchRunInfo
|
|
cases: list[EvalBranchRunCaseInfo]
|
|
|
|
|
|
class EvalBranchRunListResponse(BaseModel):
|
|
runs: list[EvalBranchRunInfo]
|
|
total: int
|