feat(sprint6a): блоки A2, B, C — exit_conditions, handoff_count, suspended/resume

Блок A2: вынос условий выхода из основного промпта в отдельное поле agent_configs.exit_conditions_text. compose_full_system_prompt склеивает system_prompt + rules_text + exit_conditions_text перед отправкой в модель. Одноразовая миграция данных при старте: пытаемся выделить блок «Условия выхода» из хвоста существующих system_prompt-ов и перенести в новое поле (поддерживаются три формы заголовка: «## Условия выхода», «**Условия выхода**», просто «Условия выхода:»). В UI «Настройки» — третья textarea с подсказкой ⓘ на отдельной кнопке. Блок B: защита от петель маршрутизации (v2 §4.3). В thread_state добавлена колонка handoff_count, инкрементируется на каждом hard-handoff: либо когда роутер переключает не-sm-ветку (state reset), либо когда sm-ветка сама выдаёт [INTENT_CHANGE: …] (bouncing). При превышении HANDOFF_CAP=3 диалог автоматически уводится в escalate_human с шаблонным ответом «Уточню детали с администратором клиники, свяжемся с вами в течение ближайшего часа», LLM не вызывается, handoff_count сбрасывается. В Песочнице видны счётчик «переключений ветки в диалоге» и красная плашка при срабатывании защиты. Также пофикшен баг: для не-sm-веток snapshot.current_intent_code теперь финализируется на served_code, иначе на следующей реплике prev_intent_code терялся и handoff_count не считался. Блок C: suspended_intent / resumable_step_code / resumable_slots_json в thread_state (v2 §4.4). При hard-handoff из sm-ветки через [INTENT_CHANGE] текущий сценарий запоминается (если suspended ещё не занят). Когда роутер на следующих репликах возвращает intent = suspended_intent — RESUME: восстанавливаем current_intent_code, current_step_code, slots; suspended_* очищается, handoff_count=0. Возврат имеет приоритет над sticky-логикой. В Песочнице — синяя плашка «📌 отложен сценарий X (шаг Y)» во время detour'а и зелёная «↩️ возврат к отложенному сценарию» в момент resume. Routing-loop guard и роутер-driven handoff не теряют suspended (только при authoritative сценариях вроде эскалации он сбрасывается). Прогон вручную: detour из new_booking/qualify в price_question и обратно восстанавливает name=Алексей, reason=болит ухо на исходном шаге. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 12:46:10 +05:00
parent 9eef2dab3a
commit 932b488bcb
16 changed files with 547 additions and 106 deletions
@@ -16,7 +16,13 @@ logger = logging.getLogger(__name__)

 HISTORY_LIMIT = 20
 FALLBACK_INTENT_CODE = "general_info"
+ESCALATE_INTENT_CODE = "escalate_human"
 MAX_BOUNCES = 1
+HANDOFF_CAP = 3  # столько hard-handoff'ов разрешено за диалог; четвёртое — авто-эскалация
+ROUTING_LOOP_REPLY = (
+    "Уточню детали с администратором клиники, свяжемся с вами "
+    "в течение ближайшего часа."
+)


 def _auto_thread_name(first_user_text: str) -> str:
@@ -138,17 +144,46 @@ async def send_message(
    router_code = routing["code"]
    router_version = routing.get("version")

-    # 2. Снимок состояния. Важное правило (sticky state machine, мини-G из Спринта 6b):
-    # если тред уже идёт по state-machine-ветке и роутер предлагает другую —
-    # НЕ сбрасываем state. Передадим LLM подсказку «роутер думает так», и пусть
-    # она сама решает: выдать `[INTENT_CHANGE: ...]` или удержать сценарий.
-    # Это нужно, чтобы фраза-повод («болит ухо») внутри записи не сбрасывала слоты.
+    # 2. Снимок состояния. Логика выбора effective_code:
+    #   2.1. Если есть suspended_intent и роутер вернулся в него — RESUME: восстанавливаем
+    #        прерванный сценарий, очищаем suspended_*, handoff_count=0.
+    #   2.2. Иначе если диалог идёт по sm-ветке и роутер предлагает другую — sticky:
+    #        НЕ сбрасываем state, передаём LLM [ПОДСКАЗКА РОУТЕРА].
+    #   2.3. Иначе если prev — не-sm и роутер ведёт в другую ветку — hard-handoff.
    snapshot = await thread_state_service.load_snapshot(session, thread.id)
    prev_intent_code = snapshot["current_intent_code"]
+    handoff_count = snapshot.get("handoff_count", 0)
+    suspended_intent = snapshot.get("suspended_intent")
+    resumable_step_code = snapshot.get("resumable_step_code")
+    resumable_slots = snapshot.get("resumable_slots", {}) or {}
    router_hint: str | None = None
    effective_code = router_code
+    routing_loop_triggered = False
+    resumed_from_suspended = False

-    if prev_intent_code and prev_intent_code != router_code:
+    if suspended_intent and suspended_intent == router_code and prev_intent_code != suspended_intent:
+        logger.info(
+            "Resume from suspended in thread %d: %s (step=%s, %d slots)",
+            thread.id, suspended_intent, resumable_step_code, len(resumable_slots),
+        )
+        snapshot = {
+            "current_intent_code": suspended_intent,
+            "current_step": 0,
+            "current_step_code": resumable_step_code,
+            "slots": dict(resumable_slots),
+            "handoff_count": 0,
+            "suspended_intent": None,
+            "resumable_step_code": None,
+            "resumable_slots": {},
+        }
+        prev_intent_code = suspended_intent
+        handoff_count = 0
+        suspended_intent = None
+        resumable_step_code = None
+        resumable_slots = {}
+        effective_code = snapshot["current_intent_code"]
+        resumed_from_suspended = True
+    elif prev_intent_code and prev_intent_code != router_code:
        if intent_step_service.has_state_machine(prev_intent_code):
            logger.info(
                "Router suggested %s but thread %d is in sm %s — sticky, hint only",
@@ -164,17 +199,51 @@ async def send_message(
            )
            effective_code = prev_intent_code
        else:
+            # Реальный hard-handoff: prev — не sm-ветка, роутер ведёт.
            logger.info(
                "Router switched intent for thread %d: %s → %s (state reset)",
                thread.id, prev_intent_code, router_code,
            )
+            handoff_count += 1
            snapshot = {
                "current_intent_code": router_code,
                "current_step": 0,
                "current_step_code": None,
                "slots": {},
+                "handoff_count": handoff_count,
+                # suspended_* не трогаем — там может лежать прерванная sm-ветка,
+                # к которой пациент ещё захочет вернуться.
+                "suspended_intent": suspended_intent,
+                "resumable_step_code": resumable_step_code,
+                "resumable_slots": resumable_slots,
            }

+    # 2b. Защита от петли (v2 §4.3): если за диалог накопилось много handoff'ов и
+    # сейчас ещё одно переключение — забираем диалог в escalate_human с заглушкой,
+    # без вызова LLM. После авто-эскалации сбрасываем handoff_count и suspended_*
+    # (диалог переходит к оператору, прерванный сценарий не продолжаем).
+    if handoff_count > HANDOFF_CAP and effective_code != ESCALATE_INTENT_CODE:
+        logger.warning(
+            "Routing loop guard tripped for thread %d (handoff_count=%d), forcing %s",
+            thread.id, handoff_count, ESCALATE_INTENT_CODE,
+        )
+        effective_code = ESCALATE_INTENT_CODE
+        snapshot = {
+            "current_intent_code": ESCALATE_INTENT_CODE,
+            "current_step": 0,
+            "current_step_code": None,
+            "slots": {},
+            "handoff_count": 0,
+            "suspended_intent": None,
+            "resumable_step_code": None,
+            "resumable_slots": {},
+        }
+        handoff_count = 0
+        suspended_intent = None
+        resumable_step_code = None
+        resumable_slots = {}
+        routing_loop_triggered = True
+
    # 3. Разрешаем ветку (с fallback) и шаг.
    served_code, intent, active_cfg = await _resolve_intent_with_fallback(session, effective_code)
    if served_code != effective_code:
@@ -183,8 +252,18 @@ async def send_message(
            "current_step": 0,
            "current_step_code": None,
            "slots": {},
+            "handoff_count": handoff_count,
+            "suspended_intent": suspended_intent,
+            "resumable_step_code": resumable_step_code,
+            "resumable_slots": resumable_slots,
        }
        router_hint = None
+    # Финализируем snapshot.current_intent_code на served_code: для не-sm-веток
+    # (general_info / price_question / ...) state_update от LLM не приходит, и без
+    # этого snapshot["current_intent_code"] осталось бы None для нового треда —
+    # тогда на следующей реплике prev_intent_code не определится и handoff_count
+    # не инкрементится.
+    snapshot["current_intent_code"] = served_code

    retrieved = vectorstore.query(query_text=text, top_k=top_k)
    sources = _retrieved_to_sources(retrieved)
@@ -196,91 +275,164 @@ async def send_message(
    parse_error: str | None = None
    is_state_machine = False

-    for attempt in range(MAX_BOUNCES + 1):
-        current_step = await _resolve_current_step(
-            session, intent.id, served_code, snapshot.get("current_step_code"),
+    # Если уже сработала защита от петли — не зовём LLM, формируем заглушку.
+    if routing_loop_triggered:
+        visible_text = ROUTING_LOOP_REPLY
+        last_assembled_prompt = (
+            "[ROUTING LOOP GUARD]\n"
+            f"handoff_count превысил {HANDOFF_CAP}, диалог автоматически уведён в "
+            f"{ESCALATE_INTENT_CODE}. LLM не вызывался."
        )
-        is_state_machine = current_step is not None
-        if current_step is not None and snapshot.get("current_step_code") != current_step.code:
-            snapshot["current_step_code"] = current_step.code
-
-        base_prompt = config_service.compose_full_system_prompt(active_cfg)
-        step_prompt = f"\n\n{current_step.system_prompt}" if current_step else ""
-        state_context = _format_state_context(snapshot, current_step, router_hint)
-        system_prompt = base_prompt + step_prompt + state_context
-
-        llm_result = await llm.chat(
-            question=text,
-            sources=retrieved,
-            history=history,
-            system_prompt=system_prompt,
-            temperature=temperature,
-            max_tokens=max_tokens,
-        )
-        last_assembled_prompt = llm_result["assembled_prompt"]
-        parsed = parse_branch_response(llm_result["text"])
-        visible_text = parsed["visible_text"] or llm_result["text"]
-        # STATE_JSON-блок ждём только от state-machine-веток. У остальных (general_info,
-        # price_question и т.п.) «no STATE_JSON» — ожидаемое состояние, не ошибка.
-        parse_error = parsed["parse_error"] if is_state_machine else None
-
-        if parsed["intent_change"] and attempt < MAX_BOUNCES:
-            new_code = parsed["intent_change"]
-            bounce_log.append({
-                "from": served_code,
-                "to": new_code,
-                "preface": parsed["visible_text"],
-            })
-            logger.info("Intent bounce in thread %d: %s → %s", thread.id, served_code, new_code)
-            served_code, intent, active_cfg = await _resolve_intent_with_fallback(session, new_code)
-            snapshot = {
-                "current_intent_code": served_code,
-                "current_step": 0,
-                "current_step_code": None,
-                "slots": {},
-            }
-            router_hint = None  # новая ветка — подсказка больше неактуальна
-            continue
-
-        if parsed["state_update"] is not None and current_step is not None:
-            requested = parsed["state_update"]["state_after"]
-            allowed = intent_step_service.parse_allowed_next(current_step)
-            ok, reason = validate_transition(
-                current_step=current_step.code,
-                requested_step=requested,
-                allowed_next=allowed,
+    else:
+        for attempt in range(MAX_BOUNCES + 1):
+            current_step = await _resolve_current_step(
+                session, intent.id, served_code, snapshot.get("current_step_code"),
            )
-            slots_updated = parsed["state_update"]["slots_updated"]
-            merged_slots = {**snapshot.get("slots", {}), **slots_updated}
-            if ok:
-                snapshot = {
-                    "current_intent_code": served_code,
-                    "current_step": snapshot["current_step"] + (1 if requested != current_step.code else 0),
-                    "current_step_code": requested,
-                    "slots": merged_slots,
-                }
-            else:
-                logger.warning(
-                    "Illegal state_after in thread %d (%s): %s", thread.id, served_code, reason,
-                )
-                validation_events.append({
-                    "current_step": current_step.code,
-                    "requested_step": requested,
-                    "reason": reason,
+            is_state_machine = current_step is not None
+            if current_step is not None and snapshot.get("current_step_code") != current_step.code:
+                snapshot["current_step_code"] = current_step.code
+
+            base_prompt = config_service.compose_full_system_prompt(active_cfg)
+            step_prompt = f"\n\n{current_step.system_prompt}" if current_step else ""
+            state_context = _format_state_context(snapshot, current_step, router_hint)
+            system_prompt = base_prompt + step_prompt + state_context
+
+            llm_result = await llm.chat(
+                question=text,
+                sources=retrieved,
+                history=history,
+                system_prompt=system_prompt,
+                temperature=temperature,
+                max_tokens=max_tokens,
+            )
+            last_assembled_prompt = llm_result["assembled_prompt"]
+            parsed = parse_branch_response(llm_result["text"])
+            visible_text = parsed["visible_text"] or llm_result["text"]
+            # STATE_JSON-блок ждём только от state-machine-веток. У остальных
+            # «no STATE_JSON» — ожидаемое состояние, не ошибка.
+            parse_error = parsed["parse_error"] if is_state_machine else None
+
+            if parsed["intent_change"] and attempt < MAX_BOUNCES:
+                new_code = parsed["intent_change"]
+                bounce_log.append({
+                    "from": served_code,
+                    "to": new_code,
+                    "preface": parsed["visible_text"],
                })
-                # Слоты всё равно мёржим (информация полезная), шаг не двигаем.
+                logger.info(
+                    "Intent bounce in thread %d: %s → %s", thread.id, served_code, new_code,
+                )
+                # Если уходим из sm-ветки и suspended_* ещё свободно — запоминаем,
+                # чтобы вернуться к прерванному сценарию, когда роутер увидит,
+                # что пациент возвращается к теме (см. блок 2.1 в начале send_message).
+                if (
+                    is_state_machine
+                    and current_step is not None
+                    and not suspended_intent
+                    and new_code != served_code
+                ):
+                    suspended_intent = served_code
+                    resumable_step_code = current_step.code
+                    resumable_slots = dict(snapshot.get("slots", {}))
+                    logger.info(
+                        "Suspending sm scenario for thread %d: %s (step=%s, %d slots)",
+                        thread.id, suspended_intent, resumable_step_code, len(resumable_slots),
+                    )
+                handoff_count += 1
+                # Защита от петли работает и здесь — на bouncing'е.
+                if handoff_count > HANDOFF_CAP and new_code != ESCALATE_INTENT_CODE:
+                    logger.warning(
+                        "Routing loop guard tripped on bounce in thread %d (handoff_count=%d)",
+                        thread.id, handoff_count,
+                    )
+                    served_code, intent, active_cfg = await _resolve_intent_with_fallback(
+                        session, ESCALATE_INTENT_CODE,
+                    )
+                    snapshot = {
+                        "current_intent_code": served_code,
+                        "current_step": 0,
+                        "current_step_code": None,
+                        "slots": {},
+                        "handoff_count": 0,
+                        "suspended_intent": None,
+                        "resumable_step_code": None,
+                        "resumable_slots": {},
+                    }
+                    handoff_count = 0
+                    suspended_intent = None
+                    resumable_step_code = None
+                    resumable_slots = {}
+                    visible_text = ROUTING_LOOP_REPLY
+                    last_assembled_prompt = (
+                        "[ROUTING LOOP GUARD]\n"
+                        f"handoff_count превысил {HANDOFF_CAP} на bouncing'е, "
+                        f"диалог автоматически уведён в {ESCALATE_INTENT_CODE}."
+                    )
+                    routing_loop_triggered = True
+                    parse_error = None
+                    is_state_machine = False
+                    parsed = {"visible_text": visible_text, "intent_change": None, "state_update": None, "parse_error": None}
+                    break
+
+                served_code, intent, active_cfg = await _resolve_intent_with_fallback(session, new_code)
                snapshot = {
                    "current_intent_code": served_code,
-                    "current_step": snapshot["current_step"],
-                    "current_step_code": current_step.code,
-                    "slots": merged_slots,
+                    "current_step": 0,
+                    "current_step_code": None,
+                    "slots": {},
+                    "handoff_count": handoff_count,
+                    "suspended_intent": suspended_intent,
+                    "resumable_step_code": resumable_step_code,
+                    "resumable_slots": resumable_slots,
                }
-        elif parsed["state_update"] is None and current_step is not None and parse_error:
-            logger.warning(
-                "State machine branch %s returned no STATE_JSON: %s", served_code, parse_error,
-            )
+                router_hint = None  # новая ветка — подсказка больше неактуальна
+                continue

-        break
+            if parsed["state_update"] is not None and current_step is not None:
+                requested = parsed["state_update"]["state_after"]
+                allowed = intent_step_service.parse_allowed_next(current_step)
+                ok, reason = validate_transition(
+                    current_step=current_step.code,
+                    requested_step=requested,
+                    allowed_next=allowed,
+                )
+                slots_updated = parsed["state_update"]["slots_updated"]
+                merged_slots = {**snapshot.get("slots", {}), **slots_updated}
+                base_state = {
+                    "current_intent_code": served_code,
+                    "slots": merged_slots,
+                    "handoff_count": handoff_count,
+                    "suspended_intent": suspended_intent,
+                    "resumable_step_code": resumable_step_code,
+                    "resumable_slots": resumable_slots,
+                }
+                if ok:
+                    snapshot = {
+                        **base_state,
+                        "current_step": snapshot["current_step"] + (1 if requested != current_step.code else 0),
+                        "current_step_code": requested,
+                    }
+                else:
+                    logger.warning(
+                        "Illegal state_after in thread %d (%s): %s", thread.id, served_code, reason,
+                    )
+                    validation_events.append({
+                        "current_step": current_step.code,
+                        "requested_step": requested,
+                        "reason": reason,
+                    })
+                    # Слоты всё равно мёржим (информация полезная), шаг не двигаем.
+                    snapshot = {
+                        **base_state,
+                        "current_step": snapshot["current_step"],
+                        "current_step_code": current_step.code,
+                    }
+            elif parsed["state_update"] is None and current_step is not None and parse_error:
+                logger.warning(
+                    "State machine branch %s returned no STATE_JSON: %s", served_code, parse_error,
+                )
+
+            break

    # 4. Сохраняем: thread_state пишется ПОСЛЕ всей логики, коммит — единой транзакцией.
    await thread_state_service.upsert(
@@ -289,6 +441,10 @@ async def send_message(
        step=snapshot["current_step"],
        step_code=snapshot.get("current_step_code"),
        slots=snapshot["slots"],
+        handoff_count=snapshot.get("handoff_count", handoff_count),
+        suspended_intent=snapshot.get("suspended_intent"),
+        resumable_step_code=snapshot.get("resumable_step_code"),
+        resumable_slots=snapshot.get("resumable_slots"),
    )

    user_msg.intent_id = intent.id
@@ -312,12 +468,15 @@ async def send_message(
    await session.refresh(thread)

    logger.info(
-        "Chat: thread=%d, router=%s, served=%s (v%d), step=%s, slots=%d keys, bounces=%d, validation_events=%d",
+        "Chat: thread=%d, router=%s, served=%s (v%d), step=%s, slots=%d keys, "
+        "bounces=%d, validation=%d, handoff=%d, routing_loop=%s",
        thread.id, router_code, served_code, active_cfg.version,
        snapshot.get("current_step_code") or "-",
        len(snapshot["slots"]),
        len(bounce_log),
        len(validation_events),
+        snapshot.get("handoff_count", 0),
+        routing_loop_triggered,
    )

    return {
@@ -338,10 +497,16 @@ async def send_message(
            "current_step": snapshot["current_step"],
            "current_step_code": snapshot.get("current_step_code"),
            "slots": snapshot["slots"],
+            "handoff_count": snapshot.get("handoff_count", handoff_count),
+            "suspended_intent": snapshot.get("suspended_intent"),
+            "resumable_step_code": snapshot.get("resumable_step_code"),
+            "resumable_slots": snapshot.get("resumable_slots", {}),
        },
        "bounces": bounce_log,
        "validation_events": validation_events,
        "parse_error": parse_error,
+        "routing_loop_triggered": routing_loop_triggered,
+        "resumed_from_suspended": resumed_from_suspended,
    }


@@ -4,6 +4,7 @@
 Активна одна версия в пределах ветки, не глобально.
 """
 import logging
+import re

 from sqlalchemy import func, select, update
 from sqlalchemy.ext.asyncio import AsyncSession
@@ -124,6 +125,7 @@ async def create_config(
    rules_text: str,
    name: str | None = None,
    activate: bool = False,
+    exit_conditions_text: str | None = None,
 ) -> AgentConfig:
    """Создать новую версию в рамках ветки. При activate=True — сразу активна в этой ветке."""
    next_version = (await session.execute(
@@ -143,6 +145,7 @@ async def create_config(
        name=(name or "").strip() or None,
        system_prompt=system_prompt,
        rules_text=rules_text or "",
+        exit_conditions_text=(exit_conditions_text or None),
        is_active=activate,
    )
    session.add(cfg)
@@ -178,8 +181,74 @@ async def delete_config(session: AsyncSession, config_id: int) -> tuple[bool, st


 def compose_full_system_prompt(cfg: AgentConfig) -> str:
+    """Склейка системного промпта для модели: каркас + правила + условия выхода.
+
+    Все три поля редактируются оператором отдельно (с Спринта 6a, блок A2),
+    но в LLM улетают одной строкой.
+    """
    base = (cfg.system_prompt or "").strip()
    rules = (cfg.rules_text or "").strip()
-    if not rules:
-        return base
-    return f"{base}\n\nДополнительные правила:\n{rules}"
+    exits = (cfg.exit_conditions_text or "").strip()
+    parts = [base] if base else []
+    if rules:
+        parts.append(f"## Дополнительные правила\n\n{rules}")
+    if exits:
+        parts.append(f"## Условия выхода (exit conditions)\n\n{exits}")
+    return "\n\n".join(parts)
+
+
+# Регэкспы для одноразовой миграции: ищем заголовок «Условия выхода» в трёх вариантах:
+# (а) markdown-заголовок `## Условия выхода` (с любым уровнем 1–3),
+# (б) жирный `**Условия выхода**` на отдельной строке,
+# (в) просто «Условия выхода:» / «Условия выхода» на отдельной строке.
+# Блок длится до следующего markdown-заголовка или до конца текста.
+_EXITS_HEADER_RE = re.compile(
+    r"(?im)^[ \t]*(?:#{1,3}[ \t]*|\*\*\s*)?Условия\s+выхода\b.*?$"
+)
+_NEXT_TOP_HEADER_RE = re.compile(r"(?m)^[ \t]*#{1,3}[ \t]+\S")
+
+
+def _split_exit_conditions(system_prompt: str) -> tuple[str, str | None]:
+    """Попробовать выделить блок «Условия выхода» из конца промпта.
+
+    Возвращает (новый_system_prompt, exit_conditions_text_или_None).
+    Если блок не нашёлся — возвращает исходный текст и None.
+    """
+    if not system_prompt:
+        return system_prompt, None
+    m = _EXITS_HEADER_RE.search(system_prompt)
+    if m is None:
+        return system_prompt, None
+
+    after_header = m.end()
+    # Ищем следующий заголовок ПОСЛЕ блока — если есть, обрезаем им; иначе до конца.
+    nxt = _NEXT_TOP_HEADER_RE.search(system_prompt, after_header)
+    end_of_block = nxt.start() if nxt else len(system_prompt)
+
+    exits_body = system_prompt[after_header:end_of_block].strip()
+    if not exits_body:
+        return system_prompt, None
+
+    new_prompt = (system_prompt[:m.start()] + system_prompt[end_of_block:]).strip()
+    return new_prompt, exits_body
+
+
+async def migrate_exit_conditions_to_field(session: AsyncSession) -> None:
+    """Одноразовая миграция данных: вынуть «Условия выхода» из system_prompt в поле.
+
+    Идём по всем конфигам, где exit_conditions_text пуст, и пытаемся отрезать блок
+    из хвоста system_prompt. Если не нашлось — оставляем как есть.
+    """
+    stmt = select(AgentConfig).where(AgentConfig.exit_conditions_text.is_(None))
+    cfgs = list((await session.execute(stmt)).scalars().all())
+    moved = 0
+    for cfg in cfgs:
+        new_prompt, exits = _split_exit_conditions(cfg.system_prompt)
+        if exits is None:
+            continue
+        cfg.system_prompt = new_prompt
+        cfg.exit_conditions_text = exits
+        moved += 1
+    if moved:
+        await session.commit()
+        logger.info("Migrated exit_conditions out of system_prompt for %d config(s)", moved)
@@ -30,7 +30,7 @@ def _parse_slots(raw: str) -> dict:


 async def load_snapshot(session: AsyncSession, thread_id: int) -> dict:
-    """Удобный снимок состояния для чтения (intent, step_code, step, slots)."""
+    """Снимок состояния диалога: текущая ветка/шаг/слоты + handoff_count + suspended_*."""
    state = await get_state(session, thread_id)
    if state is None:
        return {
@@ -38,12 +38,28 @@ async def load_snapshot(session: AsyncSession, thread_id: int) -> dict:
            "current_step": 0,
            "current_step_code": None,
            "slots": {},
+            "handoff_count": 0,
+            "suspended_intent": None,
+            "resumable_step_code": None,
+            "resumable_slots": {},
        }
+    resumable_slots = {}
+    if state.resumable_slots_json:
+        try:
+            value = json.loads(state.resumable_slots_json)
+            if isinstance(value, dict):
+                resumable_slots = value
+        except json.JSONDecodeError:
+            logger.warning("Bad resumable_slots_json for thread_state, ignoring")
    return {
        "current_intent_code": state.current_intent_code,
        "current_step": state.current_step,
        "current_step_code": state.current_step_code,
        "slots": _parse_slots(state.slots_json),
+        "handoff_count": state.handoff_count,
+        "suspended_intent": state.suspended_intent,
+        "resumable_step_code": state.resumable_step_code,
+        "resumable_slots": resumable_slots,
    }


@@ -55,11 +71,20 @@ async def upsert(
    step: int,
    slots: dict,
    step_code: str | None = None,
+    handoff_count: int = 0,
+    suspended_intent: str | None = None,
+    resumable_step_code: str | None = None,
+    resumable_slots: dict | None = None,
 ) -> ThreadState:
    """Создать или обновить состояние треда. Коммит — на совести вызывающего."""
    state = await get_state(session, thread_id)
    now = datetime.now(timezone.utc)
    slots_raw = json.dumps(slots or {}, ensure_ascii=False)
+    resumable_raw = (
+        json.dumps(resumable_slots, ensure_ascii=False)
+        if resumable_slots is not None and len(resumable_slots) > 0
+        else None
+    )
    if state is None:
        state = ThreadState(
            thread_id=thread_id,
@@ -67,6 +92,10 @@ async def upsert(
            current_step=step,
            current_step_code=step_code,
            slots_json=slots_raw,
+            handoff_count=handoff_count,
+            suspended_intent=suspended_intent,
+            resumable_step_code=resumable_step_code,
+            resumable_slots_json=resumable_raw,
            updated_at=now,
        )
        session.add(state)
@@ -75,6 +104,10 @@ async def upsert(
        state.current_step = step
        state.current_step_code = step_code
        state.slots_json = slots_raw
+        state.handoff_count = handoff_count
+        state.suspended_intent = suspended_intent
+        state.resumable_step_code = resumable_step_code
+        state.resumable_slots_json = resumable_raw
        state.updated_at = now
    return state

@@ -90,4 +123,5 @@ async def reset(
    return await upsert(
        session, thread_id,
        intent_code=new_intent_code, step=0, step_code=new_step_code, slots={},
+        handoff_count=0,
    )