TestingWebApp/flask_app/app/services/draft_validator.py

"""Парсер JSON от LLM и валидатор draft (порт частей `documentGenService.js`)."""
from __future__ import annotations

import json as _json
import re
from typing import Any

from .llm_client import LlmError


_FENCE_RE = re.compile(r'^```(?:json)?\s*([\s\S]*?)```$', re.MULTILINE)


def parse_json_from_llm_text(text: str) -> Any:
    if not isinstance(text, str) or not text.strip():
        raise LlmError('Пустой ответ модели.', code='llm_empty')
    t = text.strip()
    if m := _FENCE_RE.match(t):
        t = m.group(1).strip()
    try:
        return _json.loads(t)
    except _json.JSONDecodeError:
        raise LlmError('Ответ модели не является корректным JSON.', code='llm_json_parse')


def validate_and_normalize_draft(o: Any) -> dict:
    if not isinstance(o, dict):
        raise LlmError('JSON не содержит объекта с данными.', code='llm_shape')
    title = str(o.get('title') or '').strip()
    if not title:
        raise LlmError('В ответе нет поля title.', code='llm_shape')
    desc = o.get('description')
    description = str(desc).strip() if desc and str(desc).strip() else None

    raw_qs = o.get('questions')
    if not isinstance(raw_qs, list) or not raw_qs:
        raise LlmError('В ответе нет вопросов (questions).', code='llm_shape')
    if len(raw_qs) > 40:
        raise LlmError('Слишком много вопросов в ответе (макс. 40).', code='llm_shape')

    questions = []
    for i, q in enumerate(raw_qs):
        if not isinstance(q, dict):
            raise LlmError(f'Вопрос {i + 1}: неверный формат.', code='llm_shape')
        text = str(q.get('text') or '').strip()
        if not text:
            raise LlmError(f'Вопрос {i + 1}: пустой текст.', code='llm_shape')
        has_multi = bool(q.get('hasMultipleAnswers'))
        raw_opts = q.get('options')
        if not isinstance(raw_opts, list) or len(raw_opts) < 2:
            raise LlmError(f'Вопрос {i + 1}: нужны минимум 2 варианта ответа.', code='llm_shape')
        if len(raw_opts) > 12:
            raise LlmError(f'Вопрос {i + 1}: слишком много вариантов (макс. 12).', code='llm_shape')

        options = []
        for j, op in enumerate(raw_opts):
            if not isinstance(op, dict):
                raise LlmError(f'Вопрос {i + 1}, вариант {j + 1}: неверный формат.', code='llm_shape')
            options.append(
                {
                    'text': (str(op.get('text') or '').strip() or f'Вариант {j + 1}'),
                    'isCorrect': bool(op.get('isCorrect')),
                }
            )
        correct_n = sum(1 for x in options if x['isCorrect'])
        if correct_n == 0:
            raise LlmError(
                f'Вопрос {i + 1}: отметьте минимум один правильный вариант.',
                code='llm_shape',
            )
        if not has_multi and correct_n > 1:
            raise LlmError(
                f'Вопрос {i + 1}: с одним правильным ответом должен быть один вариант '
                f'isCorrect, либо укажите hasMultipleAnswers: true.',
                code='llm_shape',
            )
        questions.append({'text': text, 'hasMultipleAnswers': has_multi, 'options': options})

    return {'title': title, 'description': description, 'questions': questions}


def assert_draft_matches_shape(o: dict, shape: list[dict]) -> None:
    """Проверяет, что число вопросов и вариантов = ровно как в shape."""
    qs = o.get('questions') if isinstance(o, dict) else None
    if not isinstance(qs, list):
        raise LlmError('В ответе нет questions.', code='llm_shape')
    if len(qs) != len(shape):
        raise LlmError(
            f'Ожидалось вопросов: {len(shape)}, в ответе: {len(qs)}.',
            code='llm_shape',
        )
    for i, (q, sh) in enumerate(zip(qs, shape)):
        opts = q.get('options') if isinstance(q, dict) else None
        if not isinstance(opts, list):
            raise LlmError(f'Вопрос {i + 1}: нет options.', code='llm_shape')
        if len(opts) != sh['optionsCount']:
            raise LlmError(
                f'Вопрос {i + 1}: ожидалось вариантов {sh["optionsCount"]}, в ответе: {len(opts)}.',
                code='llm_shape',
            )
        if bool(q.get('hasMultipleAnswers')) != sh['hasMultipleAnswers']:
            raise LlmError(
                f'Вопрос {i + 1}: hasMultipleAnswers должен быть {sh["hasMultipleAnswers"]}.',
                code='llm_shape',
            )
        min_c = int(sh.get('minCorrect', 1))
        max_c = int(sh.get('maxCorrect', sh['optionsCount']))
        correct_n = sum(1 for op in opts if bool(op.get('isCorrect')))
        if correct_n < min_c or correct_n > max_c:
            raise LlmError(
                f'Вопрос {i + 1}: правильных ответов должно быть от {min_c} до {max_c}, в ответе: {correct_n}.',
                code='llm_shape',
            )


def normalize_draft_to_shape(draft: dict, shape: list[dict]) -> dict:
    """Приводит draft к shape: число вопросов/вариантов/мульти и диапазон correct."""
    qs = list((draft or {}).get('questions') or [])
    out_qs = []

    def _mk_option(i: int) -> dict:
        return {'text': f'Вариант {i + 1}', 'isCorrect': False}

    for i, sh in enumerate(shape):
        src = qs[i] if i < len(qs) and isinstance(qs[i], dict) else {}
        text = str(src.get('text') or '').strip() or f'Вопрос {i + 1}'
        has_multi = bool(sh.get('hasMultipleAnswers'))
        min_c = int(sh.get('minCorrect', 1))
        max_c = int(sh.get('maxCorrect', sh['optionsCount']))
        if not has_multi:
            min_c = 1
            max_c = 1

        raw_opts = src.get('options') if isinstance(src.get('options'), list) else []
        opts = []
        for j in range(sh['optionsCount']):
            if j < len(raw_opts) and isinstance(raw_opts[j], dict):
                t = str(raw_opts[j].get('text') or '').strip() or f'Вариант {j + 1}'
                opts.append({'text': t, 'isCorrect': bool(raw_opts[j].get('isCorrect'))})
            else:
                opts.append(_mk_option(j))

        true_idx = [idx for idx, op in enumerate(opts) if op['isCorrect']]
        if not has_multi:
            keep = true_idx[0] if true_idx else 0
            for idx, op in enumerate(opts):
                op['isCorrect'] = (idx == keep)
        else:
            if len(true_idx) < min_c:
                for idx in range(len(opts)):
                    if idx not in true_idx:
                        opts[idx]['isCorrect'] = True
                        true_idx.append(idx)
                    if len(true_idx) >= min_c:
                        break
            if len(true_idx) > max_c:
                keep = set(true_idx[:max_c])
                for idx, op in enumerate(opts):
                    op['isCorrect'] = idx in keep

        out_qs.append({'text': text, 'hasMultipleAnswers': has_multi, 'options': opts})

    return {
        'title': str((draft or {}).get('title') or '').strip() or 'Тест',
        'description': (draft or {}).get('description'),
        'questions': out_qs,
    }