You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
167 lines
7.5 KiB
167 lines
7.5 KiB
"""Парсер JSON от LLM и валидатор draft (порт частей `documentGenService.js`).""" |
|
from __future__ import annotations |
|
|
|
import json as _json |
|
import re |
|
from typing import Any |
|
|
|
from .llm_client import LlmError |
|
|
|
|
|
_FENCE_RE = re.compile(r'^```(?:json)?\s*([\s\S]*?)```$', re.MULTILINE) |
|
|
|
|
|
def parse_json_from_llm_text(text: str) -> Any: |
|
if not isinstance(text, str) or not text.strip(): |
|
raise LlmError('Пустой ответ модели.', code='llm_empty') |
|
t = text.strip() |
|
if m := _FENCE_RE.match(t): |
|
t = m.group(1).strip() |
|
try: |
|
return _json.loads(t) |
|
except _json.JSONDecodeError: |
|
raise LlmError('Ответ модели не является корректным JSON.', code='llm_json_parse') |
|
|
|
|
|
def validate_and_normalize_draft(o: Any) -> dict: |
|
if not isinstance(o, dict): |
|
raise LlmError('JSON не содержит объекта с данными.', code='llm_shape') |
|
title = str(o.get('title') or '').strip() |
|
if not title: |
|
raise LlmError('В ответе нет поля title.', code='llm_shape') |
|
desc = o.get('description') |
|
description = str(desc).strip() if desc and str(desc).strip() else None |
|
|
|
raw_qs = o.get('questions') |
|
if not isinstance(raw_qs, list) or not raw_qs: |
|
raise LlmError('В ответе нет вопросов (questions).', code='llm_shape') |
|
if len(raw_qs) > 40: |
|
raise LlmError('Слишком много вопросов в ответе (макс. 40).', code='llm_shape') |
|
|
|
questions = [] |
|
for i, q in enumerate(raw_qs): |
|
if not isinstance(q, dict): |
|
raise LlmError(f'Вопрос {i + 1}: неверный формат.', code='llm_shape') |
|
text = str(q.get('text') or '').strip() |
|
if not text: |
|
raise LlmError(f'Вопрос {i + 1}: пустой текст.', code='llm_shape') |
|
has_multi = bool(q.get('hasMultipleAnswers')) |
|
raw_opts = q.get('options') |
|
if not isinstance(raw_opts, list) or len(raw_opts) < 2: |
|
raise LlmError(f'Вопрос {i + 1}: нужны минимум 2 варианта ответа.', code='llm_shape') |
|
if len(raw_opts) > 12: |
|
raise LlmError(f'Вопрос {i + 1}: слишком много вариантов (макс. 12).', code='llm_shape') |
|
|
|
options = [] |
|
for j, op in enumerate(raw_opts): |
|
if not isinstance(op, dict): |
|
raise LlmError(f'Вопрос {i + 1}, вариант {j + 1}: неверный формат.', code='llm_shape') |
|
options.append( |
|
{ |
|
'text': (str(op.get('text') or '').strip() or f'Вариант {j + 1}'), |
|
'isCorrect': bool(op.get('isCorrect')), |
|
} |
|
) |
|
correct_n = sum(1 for x in options if x['isCorrect']) |
|
if correct_n == 0: |
|
raise LlmError( |
|
f'Вопрос {i + 1}: отметьте минимум один правильный вариант.', |
|
code='llm_shape', |
|
) |
|
if not has_multi and correct_n > 1: |
|
raise LlmError( |
|
f'Вопрос {i + 1}: с одним правильным ответом должен быть один вариант ' |
|
f'isCorrect, либо укажите hasMultipleAnswers: true.', |
|
code='llm_shape', |
|
) |
|
questions.append({'text': text, 'hasMultipleAnswers': has_multi, 'options': options}) |
|
|
|
return {'title': title, 'description': description, 'questions': questions} |
|
|
|
|
|
def assert_draft_matches_shape(o: dict, shape: list[dict]) -> None: |
|
"""Проверяет, что число вопросов и вариантов = ровно как в shape.""" |
|
qs = o.get('questions') if isinstance(o, dict) else None |
|
if not isinstance(qs, list): |
|
raise LlmError('В ответе нет questions.', code='llm_shape') |
|
if len(qs) != len(shape): |
|
raise LlmError( |
|
f'Ожидалось вопросов: {len(shape)}, в ответе: {len(qs)}.', |
|
code='llm_shape', |
|
) |
|
for i, (q, sh) in enumerate(zip(qs, shape)): |
|
opts = q.get('options') if isinstance(q, dict) else None |
|
if not isinstance(opts, list): |
|
raise LlmError(f'Вопрос {i + 1}: нет options.', code='llm_shape') |
|
if len(opts) != sh['optionsCount']: |
|
raise LlmError( |
|
f'Вопрос {i + 1}: ожидалось вариантов {sh["optionsCount"]}, в ответе: {len(opts)}.', |
|
code='llm_shape', |
|
) |
|
if bool(q.get('hasMultipleAnswers')) != sh['hasMultipleAnswers']: |
|
raise LlmError( |
|
f'Вопрос {i + 1}: hasMultipleAnswers должен быть {sh["hasMultipleAnswers"]}.', |
|
code='llm_shape', |
|
) |
|
min_c = int(sh.get('minCorrect', 1)) |
|
max_c = int(sh.get('maxCorrect', sh['optionsCount'])) |
|
correct_n = sum(1 for op in opts if bool(op.get('isCorrect'))) |
|
if correct_n < min_c or correct_n > max_c: |
|
raise LlmError( |
|
f'Вопрос {i + 1}: правильных ответов должно быть от {min_c} до {max_c}, в ответе: {correct_n}.', |
|
code='llm_shape', |
|
) |
|
|
|
|
|
def normalize_draft_to_shape(draft: dict, shape: list[dict]) -> dict: |
|
"""Приводит draft к shape: число вопросов/вариантов/мульти и диапазон correct.""" |
|
qs = list((draft or {}).get('questions') or []) |
|
out_qs = [] |
|
|
|
def _mk_option(i: int) -> dict: |
|
return {'text': f'Вариант {i + 1}', 'isCorrect': False} |
|
|
|
for i, sh in enumerate(shape): |
|
src = qs[i] if i < len(qs) and isinstance(qs[i], dict) else {} |
|
text = str(src.get('text') or '').strip() or f'Вопрос {i + 1}' |
|
has_multi = bool(sh.get('hasMultipleAnswers')) |
|
min_c = int(sh.get('minCorrect', 1)) |
|
max_c = int(sh.get('maxCorrect', sh['optionsCount'])) |
|
if not has_multi: |
|
min_c = 1 |
|
max_c = 1 |
|
|
|
raw_opts = src.get('options') if isinstance(src.get('options'), list) else [] |
|
opts = [] |
|
for j in range(sh['optionsCount']): |
|
if j < len(raw_opts) and isinstance(raw_opts[j], dict): |
|
t = str(raw_opts[j].get('text') or '').strip() or f'Вариант {j + 1}' |
|
opts.append({'text': t, 'isCorrect': bool(raw_opts[j].get('isCorrect'))}) |
|
else: |
|
opts.append(_mk_option(j)) |
|
|
|
true_idx = [idx for idx, op in enumerate(opts) if op['isCorrect']] |
|
if not has_multi: |
|
keep = true_idx[0] if true_idx else 0 |
|
for idx, op in enumerate(opts): |
|
op['isCorrect'] = (idx == keep) |
|
else: |
|
if len(true_idx) < min_c: |
|
for idx in range(len(opts)): |
|
if idx not in true_idx: |
|
opts[idx]['isCorrect'] = True |
|
true_idx.append(idx) |
|
if len(true_idx) >= min_c: |
|
break |
|
if len(true_idx) > max_c: |
|
keep = set(true_idx[:max_c]) |
|
for idx, op in enumerate(opts): |
|
op['isCorrect'] = idx in keep |
|
|
|
out_qs.append({'text': text, 'hasMultipleAnswers': has_multi, 'options': opts}) |
|
|
|
return { |
|
'title': str((draft or {}).get('title') or '').strip() or 'Тест', |
|
'description': (draft or {}).get('description'), |
|
'questions': out_qs, |
|
}
|
|
|