a8f7e68795
Оператор-настройщик после правки промпта _router нажимает «Прогнать выбранное»
на странице «Регрессия» и видит, что сломалось. Не CLI, не в обход
интерфейса — встроено в верхнюю навигацию рядом с Настройками.
Backend:
- Таблицы eval_runs / eval_run_cases (с is_pass) / eval_router_predictions
(кэш text_hash + router_config_id → predicted_intent). Миграции
k7e9d5c67h34 и l8f0e6d78i45.
- services/eval_run_service.py: start_router_run(text_hashes) запускает
фоновую корутину через asyncio.create_task, фиксирует активную версию
_router. Кэш привязан к версии: повторный прогон на той же версии —
мгновенный, на новой — пересчитывается. compute_diff_vs_previous
сравнивает с предыдущим прогоном на той же версии (новые fail / pass).
- API: POST /eval/runs (фон, body text_hashes), GET /eval/runs,
GET /eval/runs/{id}, GET /eval/router-cases-with-status (все 1573 кейса
+ кэш на активной версии).
Frontend (static/regression.html — новая страница, ссылка добавлена в
шапки index/sandbox/settings/docs):
- Сворачиваемый блок «Выбор кейсов»: фильтр по intent, ввод диапазона
(1-50, 200-300), кнопки «Все видимые», «Снять все», «Только без кэша»,
«Только FAIL в кэше», «Снять кэшированные». Чекбокс в шапке.
- Таблица 1573 кейсов отсортирована по count desc: #, чекбокс, запрос,
intent, частота, кэш (PASS / FAIL → predicted / —). Цветной фон строки
по статусу кэша.
- Счётчик «выбрано N (новых: X, в кэше: Y)»; кнопка
«Прогнать выбранное (X новых + Y из кэша)» — сразу видно реальный
объём LLM-работы.
- Polling /eval/runs/{id} раз в 2 секунды, прогресс-бар, drill-down:
все кейсы прогона + фильтр pass/fail + поиск + diff vs предыдущий
(новые fail / новые pass).
docs/SPRINTS.md: Спринт 8 разбит на 8a (✅ закрыт), 8b (регрессия ответов
веток, ждёт базу кейсов от пользователя), 8c (handoff/resumable/loop/
guard/rag — позже).
docs/BACKLOG.md: новый файл для идей на потом. Записаны: просмотр
архивного графа без активации (из 7.7), варианты C (LLM-judge) и D
(эталон + embeddings) для регрессии веток в 8b.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
308 lines
6.8 KiB
Python
308 lines
6.8 KiB
Python
from pydantic import BaseModel, Field
|
|
|
|
|
|
class DocumentInfo(BaseModel):
|
|
document_id: str
|
|
name: str
|
|
chunks_count: int
|
|
file_type: str
|
|
created_at: str
|
|
metadata: dict = Field(default_factory=dict)
|
|
|
|
|
|
class ChunkPreview(BaseModel):
|
|
index: int
|
|
section: str = ""
|
|
page_number: int = 0
|
|
text_preview: str = ""
|
|
char_length: int = 0
|
|
|
|
|
|
class DocumentUploadResponse(BaseModel):
|
|
document_id: str
|
|
name: str
|
|
chunks_count: int
|
|
status: str = "indexed"
|
|
created_at: str
|
|
chunks_preview: list[ChunkPreview] = Field(default_factory=list)
|
|
|
|
|
|
class DocumentListResponse(BaseModel):
|
|
documents: list[DocumentInfo]
|
|
total: int
|
|
|
|
|
|
class ChunkDetail(BaseModel):
|
|
index: int
|
|
section: str = ""
|
|
page_number: int = 0
|
|
text: str = ""
|
|
char_length: int = 0
|
|
embedding: list[float] = Field(default_factory=list)
|
|
embedding_dim: int = 0
|
|
|
|
|
|
class DocumentChunksResponse(BaseModel):
|
|
document_id: str
|
|
name: str
|
|
file_type: str
|
|
chunks_count: int
|
|
chunks: list[ChunkDetail] = Field(default_factory=list)
|
|
|
|
|
|
class DocumentDeleteResponse(BaseModel):
|
|
ok: bool = True
|
|
deleted_chunks: int
|
|
|
|
|
|
class IntentDocumentsResponse(BaseModel):
|
|
intent_code: str
|
|
document_ids: list[str] = Field(default_factory=list)
|
|
|
|
|
|
class DocumentIntentsResponse(BaseModel):
|
|
document_id: str
|
|
intent_codes: list[str] = Field(default_factory=list)
|
|
|
|
|
|
class SourceInfo(BaseModel):
|
|
document_id: str
|
|
document_name: str
|
|
chunk_text: str
|
|
section: str = ""
|
|
page: int = 0
|
|
relevance_score: float = 0.0
|
|
|
|
|
|
class QueryResponse(BaseModel):
|
|
answer: str
|
|
sources: list[SourceInfo]
|
|
model_used: str
|
|
assembled_prompt: str = ""
|
|
intent_code: str = "_debug"
|
|
config_version: int | None = None
|
|
rag_subscription: dict | None = None # {"subscribed_count": int, "found_count": int} — Спринт 7
|
|
|
|
|
|
class HealthResponse(BaseModel):
|
|
status: str = "ok"
|
|
chromadb: str
|
|
embedding_model: str
|
|
documents_count: int
|
|
chunks_count: int
|
|
|
|
|
|
class MessageInfo(BaseModel):
|
|
id: int
|
|
role: str
|
|
text: str
|
|
created_at: str
|
|
sources: list[SourceInfo] = Field(default_factory=list)
|
|
assembled_prompt: str = ""
|
|
intent_code: str = ""
|
|
intent_name: str = ""
|
|
meta: dict | None = None
|
|
escalation_reason: str | None = None
|
|
|
|
|
|
class ThreadInfo(BaseModel):
|
|
id: int
|
|
name: str
|
|
created_at: str
|
|
updated_at: str
|
|
messages_count: int
|
|
first_message_preview: str = ""
|
|
|
|
|
|
class ThreadListResponse(BaseModel):
|
|
threads: list[ThreadInfo]
|
|
total: int
|
|
|
|
|
|
class ThreadStateInfo(BaseModel):
|
|
current_intent_code: str | None = None
|
|
current_step: int = 0
|
|
current_step_code: str | None = None
|
|
slots: dict = Field(default_factory=dict)
|
|
handoff_count: int = 0
|
|
soft_insertion_count: int = 0
|
|
suspended_intent: str | None = None
|
|
resumable_step_code: str | None = None
|
|
resumable_slots: dict = Field(default_factory=dict)
|
|
pending_guard: dict | None = None
|
|
|
|
|
|
class BounceInfo(BaseModel):
|
|
from_: str = Field(alias="from")
|
|
to: str
|
|
preface: str = ""
|
|
|
|
model_config = {"populate_by_name": True}
|
|
|
|
|
|
class ValidationEventInfo(BaseModel):
|
|
current_step: str
|
|
requested_step: str
|
|
reason: str
|
|
guard_name: str | None = None
|
|
missing_slots: list[str] = Field(default_factory=list)
|
|
guard_description: str = ""
|
|
|
|
|
|
class ThreadDetailResponse(BaseModel):
|
|
id: int
|
|
name: str
|
|
created_at: str
|
|
updated_at: str
|
|
messages: list[MessageInfo] = Field(default_factory=list)
|
|
thread_state: ThreadStateInfo | None = None
|
|
|
|
|
|
class ChatResponse(BaseModel):
|
|
thread_id: int
|
|
thread_name: str
|
|
message_id: int
|
|
intent_code: str = ""
|
|
intent_name: str = ""
|
|
router_intent_code: str = ""
|
|
config_version: int = 0
|
|
router_version: int | None = None
|
|
answer: str
|
|
sources: list[SourceInfo]
|
|
model_used: str
|
|
assembled_prompt: str = ""
|
|
thread_state: ThreadStateInfo = Field(default_factory=ThreadStateInfo)
|
|
bounces: list[BounceInfo] = Field(default_factory=list)
|
|
validation_events: list[ValidationEventInfo] = Field(default_factory=list)
|
|
parse_error: str | None = None
|
|
routing_loop_triggered: bool = False
|
|
resumed_from_suspended: bool = False
|
|
message_meta: dict | None = None
|
|
escalation_reason: str | None = None
|
|
operator_summary: dict | None = None
|
|
router_assembled_prompt: str = ""
|
|
rag_subscription: dict | None = None # {"subscribed_count": int, "found_count": int} — Спринт 7
|
|
|
|
|
|
class ThreadDeleteResponse(BaseModel):
|
|
ok: bool = True
|
|
deleted_messages: int
|
|
|
|
|
|
class AgentConfigInfo(BaseModel):
|
|
id: int
|
|
intent_id: int | None = None
|
|
intent_code: str = ""
|
|
intent_name: str = ""
|
|
version: int
|
|
name: str | None = None
|
|
system_prompt: str
|
|
rules_text: str = ""
|
|
exit_conditions_text: str = ""
|
|
is_active: bool
|
|
created_at: str
|
|
|
|
|
|
class AgentConfigListResponse(BaseModel):
|
|
configs: list[AgentConfigInfo]
|
|
total: int
|
|
|
|
|
|
class AgentConfigDeleteResponse(BaseModel):
|
|
ok: bool = True
|
|
|
|
|
|
class IntentInfo(BaseModel):
|
|
id: int
|
|
code: str
|
|
name: str
|
|
description: str = ""
|
|
is_enabled: bool
|
|
order_index: int
|
|
active_config_id: int | None = None
|
|
active_config_version: int | None = None
|
|
|
|
|
|
class IntentListResponse(BaseModel):
|
|
intents: list[IntentInfo]
|
|
total: int
|
|
|
|
|
|
class IntentStepInfo(BaseModel):
|
|
id: int
|
|
intent_id: int
|
|
intent_code: str = ""
|
|
code: str
|
|
name: str
|
|
order_index: int
|
|
system_prompt: str = ""
|
|
allowed_next: list[str] = Field(default_factory=list)
|
|
guards: dict = Field(default_factory=dict)
|
|
updated_at: str
|
|
|
|
|
|
class IntentStepListResponse(BaseModel):
|
|
intent_code: str
|
|
steps: list[IntentStepInfo]
|
|
total: int
|
|
|
|
|
|
class IntentStepGraphInfo(BaseModel):
|
|
id: int
|
|
intent_code: str
|
|
version: int
|
|
name: str
|
|
is_active: bool
|
|
steps_count: int
|
|
created_at: str
|
|
|
|
|
|
class IntentStepGraphListResponse(BaseModel):
|
|
intent_code: str
|
|
graphs: list[IntentStepGraphInfo]
|
|
active_graph_id: int | None
|
|
total: int
|
|
|
|
|
|
# ---------- Прогоны регрессии (Спринт 8a) ----------
|
|
|
|
class EvalRunInfo(BaseModel):
|
|
id: int
|
|
suite: str
|
|
router_config_id: int | None
|
|
router_config_version: int | None
|
|
min_count: int
|
|
status: str
|
|
total: int
|
|
passed: int
|
|
failed: int
|
|
cache_hits: int
|
|
error_text: str | None
|
|
started_at: str
|
|
finished_at: str | None
|
|
|
|
|
|
class EvalRunCaseInfo(BaseModel):
|
|
text: str
|
|
expected_intent: str
|
|
predicted_intent: str
|
|
count_weight: int
|
|
is_pass: bool = True
|
|
|
|
|
|
class EvalRunDiffInfo(BaseModel):
|
|
prev_run_id: int | None
|
|
new_fails: list[EvalRunCaseInfo]
|
|
new_passes: list[EvalRunCaseInfo]
|
|
|
|
|
|
class EvalRunDetailResponse(BaseModel):
|
|
run: EvalRunInfo
|
|
cases: list[EvalRunCaseInfo]
|
|
diff: EvalRunDiffInfo
|
|
|
|
|
|
class EvalRunListResponse(BaseModel):
|
|
runs: list[EvalRunInfo]
|
|
total: int
|