diff --git a/db/models/message.py b/db/models/message.py
index eddec78..d74cd9c 100644
--- a/db/models/message.py
+++ b/db/models/message.py
@@ -29,6 +29,9 @@ class Message(Base):
     intent_id: Mapped[int | None] = mapped_column(
         ForeignKey("intents.id", ondelete="SET NULL"), nullable=True, index=True
     )
+    # JSON со снимком обработки реплики: решение роутера, шаг, список событий.
+    # Используется в Песочнице для отображения подробных пилюль (со Спринта 6b).
+    meta_json: Mapped[str | None] = mapped_column(Text, nullable=True)
     created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow, nullable=False)
 
     thread: Mapped["Thread"] = relationship(back_populates="messages")
diff --git a/db/models/thread_state.py b/db/models/thread_state.py
index 315f922..b431c3d 100644
--- a/db/models/thread_state.py
+++ b/db/models/thread_state.py
@@ -27,6 +27,8 @@ class ThreadState(Base):
     current_step_code: Mapped[str | None] = mapped_column(String(50), nullable=True)
     slots_json: Mapped[str] = mapped_column(Text, nullable=False, default="{}")
     handoff_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
+    # Счётчик «боковых вопросов» подряд — блок D Спринта 6b (v2 §4.2).
+    soft_insertion_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
     # Состояние прерванного сценария — блок C Спринта 6a (v2 §4.4).
     suspended_intent: Mapped[str | None] = mapped_column(String(50), nullable=True)
     resumable_step_code: Mapped[str | None] = mapped_column(String(50), nullable=True)
diff --git a/migrations/versions/f2d50c8b91a7_add_soft_insertion_count.py b/migrations/versions/f2d50c8b91a7_add_soft_insertion_count.py
new file mode 100644
index 0000000..e918a2f
--- /dev/null
+++ b/migrations/versions/f2d50c8b91a7_add_soft_insertion_count.py
@@ -0,0 +1,31 @@
+"""add soft_insertion_count to thread_state (Спринт 6b, блок D)
+
+Revision ID: f2d50c8b91a7
+Revises: e1a4f7c83b29
+Create Date: 2026-04-25 13:30:00.000000
+
+Счётчик «боковых вопросов» подряд внутри одной sm-ветки (мягкая вставка vs.
+жёсткое переключение, v2 §4.2). Растёт, когда модель отвечает на побочный
+вопрос без смены шага (отметка `soft_insertion: true` в STATE_JSON).
+Сбрасывается на продвижении по шагам или при смене ветки.
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+revision: str = 'f2d50c8b91a7'
+down_revision: Union[str, None] = 'e1a4f7c83b29'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    with op.batch_alter_table('thread_state', recreate='always') as batch:
+        batch.add_column(sa.Column('soft_insertion_count', sa.Integer(), nullable=False, server_default='0'))
+
+
+def downgrade() -> None:
+    with op.batch_alter_table('thread_state', recreate='always') as batch:
+        batch.drop_column('soft_insertion_count')
diff --git a/migrations/versions/g3a71d4fc285_add_message_meta_json.py b/migrations/versions/g3a71d4fc285_add_message_meta_json.py
new file mode 100644
index 0000000..88ac19b
--- /dev/null
+++ b/migrations/versions/g3a71d4fc285_add_message_meta_json.py
@@ -0,0 +1,31 @@
+"""add meta_json to messages (Спринт 6b — расширенная мета на сообщениях)
+
+Revision ID: g3a71d4fc285
+Revises: f2d50c8b91a7
+Create Date: 2026-04-25 19:00:00.000000
+
+JSON-метаданные конкретной реплики ассистента: решение роутера, шаг state machine,
+список событий (sticky / hard_handoff / soft_insertion / resumed / routing_loop /
+validation_blocked). Используется в Песочнице для отображения подробных пилюль
+рядом с бейджем ветки.
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+revision: str = 'g3a71d4fc285'
+down_revision: Union[str, None] = 'f2d50c8b91a7'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    with op.batch_alter_table('messages', recreate='always') as batch:
+        batch.add_column(sa.Column('meta_json', sa.Text(), nullable=True))
+
+
+def downgrade() -> None:
+    with op.batch_alter_table('messages', recreate='always') as batch:
+        batch.drop_column('meta_json')
diff --git a/models/responses.py b/models/responses.py
index d928cbe..09e0be6 100644
--- a/models/responses.py
+++ b/models/responses.py
@@ -88,6 +88,7 @@ class MessageInfo(BaseModel):
     assembled_prompt: str = ""
     intent_code: str = ""
     intent_name: str = ""
+    meta: dict | None = None
 
 
 class ThreadInfo(BaseModel):
@@ -110,6 +111,7 @@ class ThreadStateInfo(BaseModel):
     current_step_code: str | None = None
     slots: dict = Field(default_factory=dict)
     handoff_count: int = 0
+    soft_insertion_count: int = 0
     suspended_intent: str | None = None
     resumable_step_code: str | None = None
     resumable_slots: dict = Field(default_factory=dict)
@@ -157,6 +159,7 @@ class ChatResponse(BaseModel):
     parse_error: str | None = None
     routing_loop_triggered: bool = False
     resumed_from_suspended: bool = False
+    message_meta: dict | None = None
 
 
 class ThreadDeleteResponse(BaseModel):
diff --git a/prompts/intents/new_booking.md b/prompts/intents/new_booking.md
index 080ecef..dd55408 100644
--- a/prompts/intents/new_booking.md
+++ b/prompts/intents/new_booking.md
@@ -16,15 +16,30 @@
 3. Ровно одна служебная строка, начинающаяся с `STATE_JSON:` и валидным JSON-объектом:
 
 ```
-STATE_JSON: {"state_after": "<код_следующего_шага>", "slots_updated": {"slot1": "value1", ...}}
+STATE_JSON: {"state_after": "<код_следующего_шага>", "slots_updated": {"slot1": "value1"}, "soft_insertion": false}
 ```
 
 - `state_after` — код шага, на котором пациент окажется ПОСЛЕ твоей реплики. Должен быть из списка допустимых переходов текущего шага (тебе это передаётся в блоке `[ТЕКУЩЕЕ СОСТОЯНИЕ]`).
 - `slots_updated` — только те слоты, которые узнал из этой реплики. Старые не перечисляй.
+- `soft_insertion` — `true`, если ты ответил на короткий боковой вопрос пациента (см. ниже), не двигая сценарий вперёд. Иначе — `false` или поле опускается.
 - Значения — строки или примитивы. Неизвестное не придумывай.
 
 Служебная строка `STATE_JSON:` вырезается парсером, пациент её не видит.
 
+## Боковые вопросы (soft-insertion)
+
+Пациент посреди записи может спросить что-то «параллельное», не относящееся к текущему шагу: цена приёма, адрес клиники, часы работы, длительность приёма, какие документы взять. Это не повод уходить в другую ветку — отвечай сам, на одну-две фразы, опираясь на выдержки из базы знаний (если поданы), и тут же мягко возвращай пациента к вопросу текущего шага.
+
+В таком ответе:
+
+- `state_after` оставь равным текущему шагу (мы остаёмся на месте).
+- `slots_updated` — пустой объект (новые сценарные слоты не собрали).
+- Поставь `soft_insertion: true`.
+
+Пример: на шаге `qualify` пациент спросил «а сколько стоит приём?» — ответь коротко по цене, верни внимание на повод и специалиста, в STATE_JSON: `{"state_after": "qualify", "slots_updated": {}, "soft_insertion": true}`.
+
+Если в системном сообщении присутствует блок `[ВОЗВРАТ К СЦЕНАРИЮ]` — это значит, пациент уже подряд несколько раз отклонялся в боковые вопросы. На этой реплике уверенно верни его к вопросу шага одной фразой и не давай длинных пояснений по сторонней теме.
+
 ## Условия выхода (exit conditions)
 
 Важно: обычные бытовые жалобы пациента («болит горло», «болит ухо», «насморк», «плохо слышу», «болит зуб») — это **повод записи**, а не смена темы. Такие реплики внутри сценария не уводят в другие ветки — они фиксируются в слот `reason` и сопровождаются коротким выражением сочувствия на шаге `qualify`.
diff --git a/routers/chat.py b/routers/chat.py
index 177609e..7493508 100644
--- a/routers/chat.py
+++ b/routers/chat.py
@@ -75,4 +75,5 @@ async def chat(req: ChatRequest, session: AsyncSession = Depends(get_session)):
         parse_error=result.get("parse_error"),
         routing_loop_triggered=result.get("routing_loop_triggered", False),
         resumed_from_suspended=result.get("resumed_from_suspended", False),
+        message_meta=result.get("message_meta"),
     )
diff --git a/routers/threads.py b/routers/threads.py
index ac10fbe..dc59f67 100644
--- a/routers/threads.py
+++ b/routers/threads.py
@@ -51,6 +51,7 @@ async def get_thread(thread_id: int, session: AsyncSession = Depends(get_session
                 assembled_prompt=m["assembled_prompt"],
                 intent_code=m.get("intent_code", ""),
                 intent_name=m.get("intent_name", ""),
+                meta=m.get("meta"),
             )
             for m in data["messages"]
         ],
@@ -60,6 +61,7 @@ async def get_thread(thread_id: int, session: AsyncSession = Depends(get_session
             current_step_code=state.get("current_step_code"),
             slots=state.get("slots", {}),
             handoff_count=state.get("handoff_count", 0),
+            soft_insertion_count=state.get("soft_insertion_count", 0),
             suspended_intent=state.get("suspended_intent"),
             resumable_step_code=state.get("resumable_step_code"),
             resumable_slots=state.get("resumable_slots", {}),
diff --git a/services/chat_service.py b/services/chat_service.py
index 0a6e7cf..5894205 100644
--- a/services/chat_service.py
+++ b/services/chat_service.py
@@ -18,11 +18,18 @@ HISTORY_LIMIT = 20
 FALLBACK_INTENT_CODE = "general_info"
 ESCALATE_INTENT_CODE = "escalate_human"
 MAX_BOUNCES = 1
-HANDOFF_CAP = 3  # столько hard-handoff'ов разрешено за диалог; четвёртое — авто-эскалация
+HANDOFF_CAP = 3  # столько hard-handoff'ов разрешено за диалог; четвёртое — авто-перевод
+SOFT_INSERTION_CAP = 3  # столько «боковых вопросов» подряд терпим, потом возвращаем к шагу
 ROUTING_LOOP_REPLY = (
     "Уточню детали с администратором клиники, свяжемся с вами "
     "в течение ближайшего часа."
 )
+SOFT_INSERTION_NUDGE = (
+    "[ВОЗВРАТ К СЦЕНАРИЮ]\n"
+    "Пациент уже несколько реплик подряд задаёт боковые вопросы, не двигая сценарий. "
+    "На этой реплике уверенно верни его к вопросу текущего шага одной короткой фразой; "
+    "не давай развернутого ответа на стороннюю тему."
+)
 
 
 def _auto_thread_name(first_user_text: str) -> str:
@@ -52,6 +59,7 @@ def _format_state_context(
     snapshot: dict,
     current_step: IntentStep | None,
     router_hint: str | None = None,
+    soft_nudge: bool = False,
 ) -> str:
     """Блок с текущим состоянием треда для дописывания в системный промпт."""
     slots = snapshot.get("slots", {}) or {}
@@ -68,6 +76,9 @@ def _format_state_context(
         lines.append("")
         lines.append("[ПОДСКАЗКА РОУТЕРА]")
         lines.append(router_hint)
+    if soft_nudge:
+        lines.append("")
+        lines.append(SOFT_INSERTION_NUDGE)
     return "\n" + "\n".join(lines)
 
 
@@ -153,6 +164,7 @@ async def send_message(
     snapshot = await thread_state_service.load_snapshot(session, thread.id)
     prev_intent_code = snapshot["current_intent_code"]
     handoff_count = snapshot.get("handoff_count", 0)
+    soft_insertion_count = snapshot.get("soft_insertion_count", 0)
     suspended_intent = snapshot.get("suspended_intent")
     resumable_step_code = snapshot.get("resumable_step_code")
     resumable_slots = snapshot.get("resumable_slots", {}) or {}
@@ -172,12 +184,14 @@ async def send_message(
             "current_step_code": resumable_step_code,
             "slots": dict(resumable_slots),
             "handoff_count": 0,
+            "soft_insertion_count": 0,
             "suspended_intent": None,
             "resumable_step_code": None,
             "resumable_slots": {},
         }
         prev_intent_code = suspended_intent
         handoff_count = 0
+        soft_insertion_count = 0
         suspended_intent = None
         resumable_step_code = None
         resumable_slots = {}
@@ -205,12 +219,14 @@ async def send_message(
                 thread.id, prev_intent_code, router_code,
             )
             handoff_count += 1
+            soft_insertion_count = 0
             snapshot = {
                 "current_intent_code": router_code,
                 "current_step": 0,
                 "current_step_code": None,
                 "slots": {},
                 "handoff_count": handoff_count,
+                "soft_insertion_count": 0,
                 # suspended_* не трогаем — там может лежать прерванная sm-ветка,
                 # к которой пациент ещё захочет вернуться.
                 "suspended_intent": suspended_intent,
@@ -234,11 +250,13 @@ async def send_message(
             "current_step_code": None,
             "slots": {},
             "handoff_count": 0,
+            "soft_insertion_count": 0,
             "suspended_intent": None,
             "resumable_step_code": None,
             "resumable_slots": {},
         }
         handoff_count = 0
+        soft_insertion_count = 0
         suspended_intent = None
         resumable_step_code = None
         resumable_slots = {}
@@ -253,10 +271,12 @@ async def send_message(
             "current_step_code": None,
             "slots": {},
             "handoff_count": handoff_count,
+            "soft_insertion_count": 0,
             "suspended_intent": suspended_intent,
             "resumable_step_code": resumable_step_code,
             "resumable_slots": resumable_slots,
         }
+        soft_insertion_count = 0
         router_hint = None
     # Финализируем snapshot.current_intent_code на served_code: для не-sm-веток
     # (general_info / price_question / ...) state_update от LLM не приходит, и без
@@ -274,6 +294,7 @@ async def send_message(
     visible_text = ""
     parse_error: str | None = None
     is_state_machine = False
+    parsed: dict | None = None  # инициализируем заранее: routing_loop guard может пропустить for-цикл
 
     # Если уже сработала защита от петли — не зовём LLM, формируем заглушку.
     if routing_loop_triggered:
@@ -294,7 +315,8 @@ async def send_message(
 
             base_prompt = config_service.compose_full_system_prompt(active_cfg)
             step_prompt = f"\n\n{current_step.system_prompt}" if current_step else ""
-            state_context = _format_state_context(snapshot, current_step, router_hint)
+            soft_nudge = is_state_machine and soft_insertion_count >= SOFT_INSERTION_CAP
+            state_context = _format_state_context(snapshot, current_step, router_hint, soft_nudge)
             system_prompt = base_prompt + step_prompt + state_context
 
             llm_result = await llm.chat(
@@ -354,11 +376,13 @@ async def send_message(
                         "current_step_code": None,
                         "slots": {},
                         "handoff_count": 0,
+                        "soft_insertion_count": 0,
                         "suspended_intent": None,
                         "resumable_step_code": None,
                         "resumable_slots": {},
                     }
                     handoff_count = 0
+                    soft_insertion_count = 0
                     suspended_intent = None
                     resumable_step_code = None
                     resumable_slots = {}
@@ -375,12 +399,14 @@ async def send_message(
                     break
 
                 served_code, intent, active_cfg = await _resolve_intent_with_fallback(session, new_code)
+                soft_insertion_count = 0  # новая ветка — счётчик с нуля
                 snapshot = {
                     "current_intent_code": served_code,
                     "current_step": 0,
                     "current_step_code": None,
                     "slots": {},
                     "handoff_count": handoff_count,
+                    "soft_insertion_count": 0,
                     "suspended_intent": suspended_intent,
                     "resumable_step_code": resumable_step_code,
                     "resumable_slots": resumable_slots,
@@ -390,6 +416,7 @@ async def send_message(
 
             if parsed["state_update"] is not None and current_step is not None:
                 requested = parsed["state_update"]["state_after"]
+                soft_insertion_flag = bool(parsed["state_update"].get("soft_insertion", False))
                 allowed = intent_step_service.parse_allowed_next(current_step)
                 ok, reason = validate_transition(
                     current_step=current_step.code,
@@ -398,10 +425,19 @@ async def send_message(
                 )
                 slots_updated = parsed["state_update"]["slots_updated"]
                 merged_slots = {**snapshot.get("slots", {}), **slots_updated}
+                # Решаем, как изменился soft_insertion_count.
+                # Soft-insertion засчитываем только если ветка явно отметила его и
+                # одновременно осталась на том же шаге без новых сценарных слотов.
+                stayed_on_step = ok and requested == current_step.code
+                if soft_insertion_flag and stayed_on_step and not slots_updated:
+                    soft_insertion_count += 1
+                else:
+                    soft_insertion_count = 0
                 base_state = {
                     "current_intent_code": served_code,
                     "slots": merged_slots,
                     "handoff_count": handoff_count,
+                    "soft_insertion_count": soft_insertion_count,
                     "suspended_intent": suspended_intent,
                     "resumable_step_code": resumable_step_code,
                     "resumable_slots": resumable_slots,
@@ -442,6 +478,7 @@ async def send_message(
         step_code=snapshot.get("current_step_code"),
         slots=snapshot["slots"],
         handoff_count=snapshot.get("handoff_count", handoff_count),
+        soft_insertion_count=snapshot.get("soft_insertion_count", soft_insertion_count),
         suspended_intent=snapshot.get("suspended_intent"),
         resumable_step_code=snapshot.get("resumable_step_code"),
         resumable_slots=snapshot.get("resumable_slots"),
@@ -451,6 +488,29 @@ async def send_message(
     if thread.agent_config_id is None:
         thread.agent_config_id = active_cfg.id
 
+    # Собираем мета-снимок реплики: что увидит UI рядом с бейджем ветки.
+    events: list[str] = []
+    if routing_loop_triggered:
+        events.append("routing_loop")
+    if resumed_from_suspended:
+        events.append("resumed")
+    if bounce_log:
+        events.append("hard_handoff")
+    if router_hint and not routing_loop_triggered and not bounce_log:
+        events.append("sticky")
+    if validation_events:
+        events.append("validation_blocked")
+    # soft_insertion: ветка явно пометила ответ боковым (см. парсер state_update).
+    last_state_update = parsed.get("state_update") if isinstance(parsed, dict) else None
+    if last_state_update and last_state_update.get("soft_insertion"):
+        events.append("soft_insertion")
+    meta = {
+        "router_intent_code": router_code,
+        "served_intent_code": served_code,
+        "step_code": snapshot.get("current_step_code"),
+        "events": events,
+    }
+
     assistant_msg = Message(
         thread_id=thread.id,
         role="assistant",
@@ -458,6 +518,7 @@ async def send_message(
         sources_json=json.dumps(sources, ensure_ascii=False),
         assembled_prompt=last_assembled_prompt,
         intent_id=intent.id,
+        meta_json=json.dumps(meta, ensure_ascii=False),
     )
     session.add(assistant_msg)
 
@@ -498,6 +559,7 @@ async def send_message(
             "current_step_code": snapshot.get("current_step_code"),
             "slots": snapshot["slots"],
             "handoff_count": snapshot.get("handoff_count", handoff_count),
+            "soft_insertion_count": snapshot.get("soft_insertion_count", soft_insertion_count),
             "suspended_intent": snapshot.get("suspended_intent"),
             "resumable_step_code": snapshot.get("resumable_step_code"),
             "resumable_slots": snapshot.get("resumable_slots", {}),
@@ -507,6 +569,7 @@ async def send_message(
         "parse_error": parse_error,
         "routing_loop_triggered": routing_loop_triggered,
         "resumed_from_suspended": resumed_from_suspended,
+        "message_meta": meta,
     }
 
 
@@ -575,6 +638,12 @@ async def get_thread_detail(session: AsyncSession, thread_id: int) -> dict | Non
                 sources = json.loads(m.sources_json)
             except json.JSONDecodeError:
                 logger.warning("Bad sources_json for message %d", m.id)
+        meta = None
+        if m.meta_json:
+            try:
+                meta = json.loads(m.meta_json)
+            except json.JSONDecodeError:
+                logger.warning("Bad meta_json for message %d", m.id)
         messages.append({
             "id": m.id,
             "role": m.role,
@@ -584,6 +653,7 @@ async def get_thread_detail(session: AsyncSession, thread_id: int) -> dict | Non
             "assembled_prompt": m.assembled_prompt or "",
             "intent_code": intent_code or "",
             "intent_name": intent_name or "",
+            "meta": meta,
         })
 
     state = await thread_state_service.load_snapshot(session, thread_id)
diff --git a/services/state_machine.py b/services/state_machine.py
index 2c31f46..0a4adf3 100644
--- a/services/state_machine.py
+++ b/services/state_machine.py
@@ -79,6 +79,7 @@ def parse_branch_response(text: str) -> dict:
 
     state_after = data.get("state_after")
     slots_updated = data.get("slots_updated", {})
+    soft_insertion = bool(data.get("soft_insertion", False))
     if not isinstance(state_after, str) or not state_after:
         return {
             "visible_text": text[:state_match.start()].rstrip(),
@@ -92,7 +93,11 @@ def parse_branch_response(text: str) -> dict:
     return {
         "visible_text": text[:state_match.start()].rstrip(),
         "intent_change": None,
-        "state_update": {"state_after": state_after, "slots_updated": slots_updated},
+        "state_update": {
+            "state_after": state_after,
+            "slots_updated": slots_updated,
+            "soft_insertion": soft_insertion,
+        },
         "parse_error": None,
     }
 
diff --git a/services/thread_state_service.py b/services/thread_state_service.py
index 1269f5b..acaefd2 100644
--- a/services/thread_state_service.py
+++ b/services/thread_state_service.py
@@ -39,6 +39,7 @@ async def load_snapshot(session: AsyncSession, thread_id: int) -> dict:
             "current_step_code": None,
             "slots": {},
             "handoff_count": 0,
+            "soft_insertion_count": 0,
             "suspended_intent": None,
             "resumable_step_code": None,
             "resumable_slots": {},
@@ -57,6 +58,7 @@ async def load_snapshot(session: AsyncSession, thread_id: int) -> dict:
         "current_step_code": state.current_step_code,
         "slots": _parse_slots(state.slots_json),
         "handoff_count": state.handoff_count,
+        "soft_insertion_count": state.soft_insertion_count,
         "suspended_intent": state.suspended_intent,
         "resumable_step_code": state.resumable_step_code,
         "resumable_slots": resumable_slots,
@@ -72,6 +74,7 @@ async def upsert(
     slots: dict,
     step_code: str | None = None,
     handoff_count: int = 0,
+    soft_insertion_count: int = 0,
     suspended_intent: str | None = None,
     resumable_step_code: str | None = None,
     resumable_slots: dict | None = None,
@@ -93,6 +96,7 @@ async def upsert(
             current_step_code=step_code,
             slots_json=slots_raw,
             handoff_count=handoff_count,
+            soft_insertion_count=soft_insertion_count,
             suspended_intent=suspended_intent,
             resumable_step_code=resumable_step_code,
             resumable_slots_json=resumable_raw,
@@ -105,6 +109,7 @@ async def upsert(
         state.current_step_code = step_code
         state.slots_json = slots_raw
         state.handoff_count = handoff_count
+        state.soft_insertion_count = soft_insertion_count
         state.suspended_intent = suspended_intent
         state.resumable_step_code = resumable_step_code
         state.resumable_slots_json = resumable_raw
diff --git a/static/sandbox.html b/static/sandbox.html
index 3ca0520..f730e86 100644
--- a/static/sandbox.html
+++ b/static/sandbox.html
@@ -236,6 +236,47 @@
     font-family: var(--mono);
     margin-right: 6px;
   }
+  .msg-step {
+    display: inline-block;
+    background: #eef2ff;
+    color: #3730a3;
+    padding: 1px 7px;
+    border-radius: 10px;
+    font-size: 10px;
+    font-weight: 500;
+    font-family: var(--mono);
+    margin-right: 6px;
+  }
+  .msg-router {
+    display: inline-block;
+    color: var(--muted);
+    font-size: 10px;
+    margin-right: 6px;
+  }
+  .msg-router code {
+    background: #fafbfd;
+    border: 1px solid var(--border);
+    color: var(--muted);
+    padding: 0 4px;
+    border-radius: 4px;
+    font-family: var(--mono);
+    font-size: 10px;
+  }
+  .msg-event {
+    display: inline-block;
+    padding: 1px 7px;
+    border-radius: 10px;
+    font-size: 10px;
+    font-weight: 500;
+    margin-right: 4px;
+    cursor: help;
+  }
+  .msg-event.sticky        { background: #dbeafe; color: #1e40af; }
+  .msg-event.hard_handoff  { background: #ffedd5; color: #9a3412; }
+  .msg-event.soft_insertion{ background: #fef3c7; color: #78350f; }
+  .msg-event.resumed       { background: #dcfce7; color: #14532d; }
+  .msg-event.routing_loop  { background: #fee2e2; color: #7f1d1d; }
+  .msg-event.validation_blocked { background: #fee2e2; color: #7f1d1d; }
   .msg.assistant p { margin: 0 0 8px 0; }
   .msg.assistant p:last-child { margin-bottom: 0; }
   .msg.assistant ul, .msg.assistant ol { margin: 6px 0; padding-left: 22px; }
@@ -653,6 +694,32 @@ function startNewThread() {
   refreshThreads();
 }
 
+const EVENT_LABELS = {
+  sticky: { text: "удержались", title: "роутер предлагал другую ветку, ветка осталась в сценарии" },
+  hard_handoff: { text: "переключение", title: "ветка сама выдала [INTENT_CHANGE] и передала диалог другой" },
+  soft_insertion: { text: "боковой вопрос", title: "ответ вне шага: модель ответила на побочный вопрос, не двигая сценарий" },
+  resumed: { text: "возврат", title: "восстановили отложенный сценарий со всеми слотами" },
+  routing_loop: { text: "защита от петли", title: "сработала защита: автоматический перевод на оператора" },
+  validation_blocked: { text: "прыжок отклонён", title: "валидатор не разрешил переход в указанный шаг" },
+};
+
+function renderAssistantBadges(intentCode, intentName, meta) {
+  const intent = intentCode ? `<span class="msg-intent" title="${esc(intentName || intentCode)}">${esc(intentCode)}</span>` : "";
+  if (!meta) return intent;
+  const stepBadge = meta.step_code
+    ? `<span class="msg-step" title="шаг state machine">${esc(meta.step_code)}</span>`
+    : "";
+  const router = (meta.router_intent_code && meta.router_intent_code !== meta.served_intent_code)
+    ? `<span class="msg-router">роутер: <code>${esc(meta.router_intent_code)}</code></span>`
+    : "";
+  const events = (meta.events || []).map(e => {
+    const cfg = EVENT_LABELS[e];
+    if (!cfg) return "";
+    return `<span class="msg-event ${esc(e)}" title="${esc(cfg.title)}">${esc(cfg.text)}</span>`;
+  }).join("");
+  return intent + stepBadge + router + events;
+}
+
 function renderMessages(messages) {
   const box = $("chat-messages");
   if (!messages.length) {
@@ -662,18 +729,20 @@ function renderMessages(messages) {
   box.innerHTML = messages.map(m => {
     const isUser = m.role === "user";
     const body = isUser ? esc(m.text) : renderMd(m.text);
-    const intentBadge = m.intent_code ? `<span class="msg-intent" title="${esc(m.intent_name || m.intent_code)}">${esc(m.intent_code)}</span>` : "";
+    const badges = isUser
+      ? ""
+      : renderAssistantBadges(m.intent_code, m.intent_name, m.meta);
     return `
       <div class="msg ${isUser ? "user" : "assistant"}">
         <div class="msg-body">${body}</div>
-        <div class="msg-meta">${intentBadge}${esc(fmtDate(m.created_at))}</div>
+        <div class="msg-meta">${badges}${esc(fmtDate(m.created_at))}</div>
       </div>
     `;
   }).join("");
   box.scrollTop = box.scrollHeight;
 }
 
-function appendMessage(role, text, iso, intentCode, intentName) {
+function appendMessage(role, text, iso, intentCode, intentName, meta) {
   const box = $("chat-messages");
   const empty = box.querySelector(".chat-empty");
   if (empty) empty.remove();
@@ -681,8 +750,8 @@ function appendMessage(role, text, iso, intentCode, intentName) {
   const isUser = role === "user";
   div.className = "msg " + (isUser ? "user" : "assistant");
   const body = isUser ? esc(text) : renderMd(text);
-  const intentBadge = intentCode ? `<span class="msg-intent" title="${esc(intentName || intentCode)}">${esc(intentCode)}</span>` : "";
-  div.innerHTML = `<div class="msg-body">${body}</div><div class="msg-meta">${intentBadge}${esc(fmtDate(iso || new Date().toISOString()))}</div>`;
+  const badges = isUser ? "" : renderAssistantBadges(intentCode, intentName, meta);
+  div.innerHTML = `<div class="msg-body">${body}</div><div class="msg-meta">${badges}${esc(fmtDate(iso || new Date().toISOString()))}</div>`;
   box.appendChild(div);
   box.scrollTop = box.scrollHeight;
   return div;
@@ -696,10 +765,17 @@ function renderState(state, bounces, validationEvents, parseError, routingLoopTr
     return;
   }
   const handoff = Number(state.handoff_count || 0);
+  const softCount = Number(state.soft_insertion_count || 0);
+  const SOFT_CAP = 3;
   const handoffHtml = `
     <div style="margin-top:6px;font-size:11px;color:var(--muted);">
-      переключений ветки в диалоге: <b style="color:var(--fg);">${handoff}</b>
+      переключений ветки в диалоге: <b style="color:var(--fg);">${handoff}</b>${state.current_step_code ? ` · боковых вопросов подряд: <b style="color:var(--fg);">${softCount}</b>` : ''}
     </div>`;
+  const softNudgeHtml = (state.current_step_code && softCount >= SOFT_CAP)
+    ? `<div style="margin-top:8px;padding:6px 8px;border-radius:4px;background:#fef3c7;color:#78350f;font-size:11px;">
+         📣 пациент несколько раз подряд уходит в боковые вопросы — на этой реплике ветка получила инструкцию вернуть его к шагу.
+       </div>`
+    : "";
   const loopHtml = routingLoopTriggered
     ? `<div style="margin-top:8px;padding:6px 8px;border-radius:4px;background:#fee2e2;color:#7f1d1d;font-size:11px;">
          🛑 защита от петли сработала: диалог уведён в <code>escalate_human</code>.
@@ -742,7 +818,7 @@ function renderState(state, bounces, validationEvents, parseError, routingLoopTr
           <b>${esc(state.current_intent_code)}</b>
           <span style="color:var(--muted);font-size:11px;margin-left:4px;">— без пошагового сценария</span>
         </div>
-        ${handoffHtml}${loopHtml}${suspendedHtml}${resumedHtml}${bounceHtml}${validationHtml}${parseErrorHtml}
+        ${handoffHtml}${softNudgeHtml}${loopHtml}${suspendedHtml}${resumedHtml}${bounceHtml}${validationHtml}${parseErrorHtml}
       </div>
     `;
     return;
@@ -753,7 +829,7 @@ function renderState(state, bounces, validationEvents, parseError, routingLoopTr
     <div style="font-size:12px;">
       <div><b>${esc(state.current_intent_code)}</b> · шаг <code>${esc(state.current_step_code)}</code></div>
       <div class="prompt-box" style="margin-top:6px;max-height:200px;">${esc(slotsJson)}</div>
-      ${handoffHtml}${loopHtml}${suspendedHtml}${resumedHtml}${bounceHtml}${validationHtml}${parseErrorHtml}
+      ${handoffHtml}${softNudgeHtml}${loopHtml}${suspendedHtml}${resumedHtml}${bounceHtml}${validationHtml}${parseErrorHtml}
     </div>
   `;
 }
@@ -860,7 +936,7 @@ async function sendMessage() {
     });
     activeThreadId = r.thread_id;
     pending.remove();
-    appendMessage("assistant", r.answer, null, r.intent_code, r.intent_name);
+    appendMessage("assistant", r.answer, null, r.intent_code, r.intent_name, r.message_meta);
     $("chat-title").className = "chat-title";
     $("chat-title").textContent = r.thread_name;
     renderDebug(r.sources, r.assembled_prompt, r.intent_code, r.intent_name, r.config_version, r.router_version, r.router_intent_code, r.bounces, r.thread_state && r.thread_state.current_step_code);