diff --git a/apiApp/config.py b/apiApp/config.py index 3d1bce8..b4291f3 100644 --- a/apiApp/config.py +++ b/apiApp/config.py @@ -40,4 +40,7 @@ WEBHOOK_API_KEY = os.getenv("WEBHOOK_API_KEY", "webhook_secret_key") # Auto-restore recognition on startup ENABLE_AUTO_RESTORE = os.getenv("ENABLE_AUTO_RESTORE", "true").lower() == "true" AUTO_RESTORE_LIMIT = int(os.getenv("AUTO_RESTORE_LIMIT", "100")) # Максимум файлов для восстановления -AUTO_RESTORE_DELAY = int(os.getenv("AUTO_RESTORE_DELAY", "5")) # Задержка перед запуском (секунды) \ No newline at end of file +AUTO_RESTORE_DELAY = int(os.getenv("AUTO_RESTORE_DELAY", "5")) # Задержка перед запуском (секунды) + +# Recognition retry policy (FileAudioAPI side) +MAX_RECOGNITION_ATTEMPTS = int(os.getenv("MAX_RECOGNITION_ATTEMPTS", "3")) \ No newline at end of file diff --git a/apiApp/database/Audio.py b/apiApp/database/Audio.py index aef21c0..582b0e8 100644 --- a/apiApp/database/Audio.py +++ b/apiApp/database/Audio.py @@ -15,6 +15,10 @@ class Audio(Base): duration = Column(Float) file_size = Column(Integer) sourse = Column(Text, default="internal") + recognition_status = Column(Text, default="pending", index=True) # pending, processing, completed, failed + recognition_attempts = Column(Integer, default=0) + recognition_last_error = Column(Text, nullable=True) + recognition_last_attempt_at = Column(DateTime, nullable=True) ai_conclusion = relationship("AiConclusion", back_populates="audio", cascade="all, delete-orphan") @@ -26,5 +30,9 @@ class Audio(Base): "file_path": self.file_path, "duration": self.duration, "file_size": self.file_size, - "sourse": self.sourse + "sourse": self.sourse, + "recognition_status": self.recognition_status, + "recognition_attempts": self.recognition_attempts, + "recognition_last_error": self.recognition_last_error, + "recognition_last_attempt_at": self.recognition_last_attempt_at.isoformat() if self.recognition_last_attempt_at else None, } \ No newline at end of file diff --git a/apiApp/routers/ai_conclusion_router.py b/apiApp/routers/ai_conclusion_router.py index d8b3296..21058ed 100644 --- a/apiApp/routers/ai_conclusion_router.py +++ b/apiApp/routers/ai_conclusion_router.py @@ -62,6 +62,11 @@ class AiConclusionResponse(BaseModel): error: Optional[str] = None +class RecognitionFailedRequest(BaseModel): + filename: str + error: str + + class ConclusionByFilenameResponse(BaseModel): """Заключение по имени файла""" filename: str @@ -154,6 +159,11 @@ async def save_ai_conclusion(request: AiConclusionRequest, db: Session = Depends db.commit() logger.info(f"✅ Заключение сохранено для {request.filename}") + # Обновляем статус распознавания у Audio + audio.recognition_status = "completed" + audio.recognition_last_error = None + db.commit() + # Для внешних файлов — отправляем результат клиенту из FileAudioAPI if (audio.sourse or "").lower() == "external" and request.callback_url: _send_callback(request.callback_url, audio, conclusion_data) @@ -207,3 +217,25 @@ async def save_ai_conclusion(request: AiConclusionRequest, db: Session = Depends status_code=500, detail=str(e) ) + + +@ai_conclusion_router.post("/conclusion/failed", response_model=AiConclusionResponse) +async def mark_recognition_failed(request: RecognitionFailedRequest, db: Session = Depends(get_db)): + """ + Помечает распознавание как failed для файла (чтобы auto-restore не пытался бесконечно). + Используется GigaAM_API при невозможности получить результат. + """ + audio = db.query(Audio).filter(Audio.filename == request.filename).first() + if not audio: + raise HTTPException(status_code=404, detail=f"Файл не найден: {request.filename}") + + audio.recognition_status = "failed" + audio.recognition_last_error = request.error + db.commit() + + return AiConclusionResponse( + success=True, + message="Recognition marked as failed", + audio_id=str(audio.id), + filename=audio.filename + ) diff --git a/apiApp/routers/audio_management_router.py b/apiApp/routers/audio_management_router.py index 43c8a65..b3655e5 100644 --- a/apiApp/routers/audio_management_router.py +++ b/apiApp/routers/audio_management_router.py @@ -13,7 +13,7 @@ from datetime import datetime from apiApp.database import get_db from apiApp.database.Audio import Audio from apiApp.database.AiConclusion import AiConclusion -from apiApp.config import AUDIOFILES_PATH +from apiApp.config import AUDIOFILES_PATH, MAX_RECOGNITION_ATTEMPTS logger = logging.getLogger(__name__) audio_management_router = APIRouter() @@ -32,10 +32,14 @@ def query_audio_without_conclusion(db, limit=None): AiConclusion.audio_id == Audio.id ) + # Берём только те, которые еще можно/нужно распознавать query = db.query(Audio).filter( ~subquery ).filter( Audio.sourse == "internal" + ).filter( + (Audio.recognition_status.in_(["pending", "processing"])) | + ((Audio.recognition_status == "failed") & (Audio.recognition_attempts < MAX_RECOGNITION_ATTEMPTS)) ).order_by(Audio.index_date.asc()) if limit: @@ -112,6 +116,10 @@ async def register_audio_file( audio.filename = filename audio.file_size = file_size audio.index_date = datetime.utcnow() + audio.recognition_status = "pending" + audio.recognition_attempts = 0 + audio.recognition_last_error = None + audio.recognition_last_attempt_at = None db.add(audio) db.commit() @@ -150,6 +158,11 @@ def process_audio_file(audio_id: str, db: Session): return logger.info(f"🎵 Запуск распознавания для {audio.filename}") + audio.recognition_status = "processing" + audio.recognition_attempts = (audio.recognition_attempts or 0) + 1 + audio.recognition_last_attempt_at = datetime.utcnow() + audio.recognition_last_error = None + db.commit() # Проверяем что файл существует на диске from apiApp.config import AUDIOFILES_PATH @@ -158,7 +171,9 @@ def process_audio_file(audio_id: str, db: Session): if not os.path.exists(file_path): logger.error(f"❌ Файл не найден на диске в FileAudioAPI: {file_path}") - # Помечаем audio как проблемный + audio.recognition_status = "failed" + audio.recognition_last_error = f"File not found on disk: {file_path}" + db.commit() return file_size = os.path.getsize(file_path) @@ -187,11 +202,20 @@ def process_audio_file(audio_id: str, db: Session): error_detail = response.text logger.error(f"❌ Ошибка запуска распознавания для {audio.filename}: {response.status_code}") logger.error(f" Detail: {error_detail}") + audio.recognition_status = "failed" + audio.recognition_last_error = f"GigaAM start failed: {response.status_code} {error_detail}" + db.commit() except requests.exceptions.Timeout: logger.error(f"❌ Таймаут при отправке задачи для {audio.filename}") + audio.recognition_status = "failed" + audio.recognition_last_error = "Timeout when starting recognition in GigaAM" + db.commit() except requests.exceptions.ConnectionError as e: logger.error(f"❌ Ошибка подключения к GigaAM API для {audio.filename}: {e}") + audio.recognition_status = "failed" + audio.recognition_last_error = f"Connection error when starting recognition in GigaAM: {e}" + db.commit() except Exception as e: logger.error(f"❌ Ошибка при обработке {audio_id}: {e}")