API для работы с файлами, индексация файлов и результатов распощнавания
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
from apiApp.routers.audio import router as audio_router
|
||||
from apiApp.routers.recognition import router as recognition_router
|
||||
|
||||
__all__ = ["audio_router", "recognition_router"]
|
||||
@@ -0,0 +1,155 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File as FastAPIFile, status
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import List
|
||||
import os
|
||||
import uuid
|
||||
import aiofiles
|
||||
|
||||
from apiApp.database import get_db
|
||||
from apiApp.schemas import (
|
||||
AudioCreate,
|
||||
AudioResponse,
|
||||
AudioListResponse,
|
||||
MessageResponse
|
||||
)
|
||||
from apiApp.services import AudioCRUD
|
||||
from apiApp.config import UPLOAD_FOLDER, ALLOWED_AUDIO_EXTENSIONS, MAX_UPLOAD_SIZE
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/upload", response_model=AudioResponse, status_code=status.HTTP_201_CREATED)
|
||||
async def upload_audio_file(
|
||||
file: UploadFile = FastAPIFile(...),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Загрузка аудиофайла на сервер
|
||||
"""
|
||||
# Проверка расширения файла
|
||||
file_ext = os.path.splitext(file.filename)[1].lower()
|
||||
if file_ext not in ALLOWED_AUDIO_EXTENSIONS:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||||
detail=f"File extension not allowed. Allowed: {', '.join(ALLOWED_AUDIO_EXTENSIONS)}"
|
||||
)
|
||||
|
||||
# Чтение содержимого файла
|
||||
content = await file.read()
|
||||
|
||||
# Проверка размера файла
|
||||
if len(content) > MAX_UPLOAD_SIZE:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
|
||||
detail=f"File too large. Maximum size: {MAX_UPLOAD_SIZE / (1024*1024)}MB"
|
||||
)
|
||||
|
||||
# Сохранение файла
|
||||
file_path = UPLOAD_FOLDER / f"{uuid.uuid4()}{file_ext}"
|
||||
|
||||
try:
|
||||
async with aiofiles.open(file_path, 'wb') as f:
|
||||
await f.write(content)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Error saving file: {str(e)}"
|
||||
)
|
||||
|
||||
# Создание записи в БД
|
||||
try:
|
||||
audio_data = AudioCreate(filename=file.filename)
|
||||
audio = AudioCRUD.create(
|
||||
db=db,
|
||||
audio_data=audio_data,
|
||||
file_path=str(file_path),
|
||||
file_size=len(content)
|
||||
)
|
||||
return audio
|
||||
except Exception as e:
|
||||
# Удаление файла при ошибке записи в БД
|
||||
if os.path.exists(file_path):
|
||||
os.remove(file_path)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Error creating database record: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/audio/list", response_model=AudioListResponse)
|
||||
async def list_audio_files(
|
||||
skip: int = 0,
|
||||
limit: int = 100,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Получить список всех аудиофайлов
|
||||
"""
|
||||
audios = AudioCRUD.get_all(db)
|
||||
return AudioListResponse(
|
||||
audios=audios[skip:skip+limit],
|
||||
count=len(audios)
|
||||
)
|
||||
|
||||
|
||||
@router.get("/audio/{audio_id}", response_model=AudioResponse)
|
||||
async def get_audio(
|
||||
audio_id: uuid.UUID,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Получить информацию о аудиофайле по ID
|
||||
"""
|
||||
audio = AudioCRUD.get_by_id(db, audio_id)
|
||||
if not audio:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Audio not found"
|
||||
)
|
||||
return audio
|
||||
|
||||
|
||||
@router.get("/audio/file/{audio_id}")
|
||||
async def download_audio_file(
|
||||
audio_id: uuid.UUID,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Скачать аудиофайл по ID
|
||||
"""
|
||||
audio = AudioCRUD.get_by_id(db, audio_id)
|
||||
if not audio:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Audio not found"
|
||||
)
|
||||
|
||||
if not audio.file_path or not os.path.exists(audio.file_path):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Audio file not found on disk"
|
||||
)
|
||||
|
||||
return FileResponse(
|
||||
path=audio.file_path,
|
||||
filename=audio.filename,
|
||||
media_type='audio/mpeg'
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/audio/delete/{audio_id}", response_model=AudioResponse)
|
||||
async def delete_audio(
|
||||
audio_id: uuid.UUID,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Удалить аудиофайл
|
||||
"""
|
||||
audio = AudioCRUD.delete(db, audio_id)
|
||||
if not audio:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Audio not found"
|
||||
)
|
||||
return audio
|
||||
@@ -0,0 +1,264 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks, status
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import Dict, Any
|
||||
import uuid
|
||||
import os
|
||||
import asyncio
|
||||
|
||||
from apiApp.database import get_db, Audio
|
||||
from apiApp.schemas import (
|
||||
RecognitionStartResponse,
|
||||
RecognitionStatus,
|
||||
ErrorResponse
|
||||
)
|
||||
from apiApp.services import AudioCRUD, AiConclusionCRUD
|
||||
from apiApp.config import UPLOAD_FOLDER
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Глобальное хранилище статусов задач (в продакшене лучше использовать Redis)
|
||||
recognition_tasks: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
|
||||
async def process_recognition(audio_id: uuid.UUID, file_path: str, task_id: str):
|
||||
"""
|
||||
Фоновая задача для распознавания аудио
|
||||
"""
|
||||
try:
|
||||
# Обновляем статус на processing
|
||||
recognition_tasks[task_id] = {
|
||||
'audio_id': audio_id,
|
||||
'status': 'processing',
|
||||
'result': None,
|
||||
'error': None
|
||||
}
|
||||
|
||||
# Проверяем существование файла
|
||||
if not os.path.exists(file_path):
|
||||
recognition_tasks[task_id]['status'] = 'error'
|
||||
recognition_tasks[task_id]['error'] = 'File not found on disk'
|
||||
return
|
||||
|
||||
# Здесь должна быть реальная логика распознавания
|
||||
# Например, вызов внешнего API или локальной модели
|
||||
# result = await your_recognize_function(file_path)
|
||||
|
||||
# Симуляция обработки (в реальном коде убрать)
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# Пример результата (заменить на реальный)
|
||||
result = {
|
||||
'text': 'Распознанный текст из аудио',
|
||||
'confidence': 0.95,
|
||||
'duration': 120.5,
|
||||
'segments': [
|
||||
{
|
||||
'start': 0.0,
|
||||
'end': 5.2,
|
||||
'text': 'Привет, чем могу помочь?',
|
||||
'speaker': 'SPEAKER_00'
|
||||
},
|
||||
{
|
||||
'start': 5.5,
|
||||
'end': 10.8,
|
||||
'text': 'Мне нужна информация о услугах',
|
||||
'speaker': 'SPEAKER_01'
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Обновляем статус на completed
|
||||
recognition_tasks[task_id]['status'] = 'completed'
|
||||
recognition_tasks[task_id]['result'] = result
|
||||
|
||||
# Получаем сессию БД (для фоновой задачи)
|
||||
from apiApp.database import SessionLocal
|
||||
db = SessionLocal()
|
||||
|
||||
try:
|
||||
# Создаем или обновляем AI Conclusion
|
||||
conclusion = AiConclusionCRUD.get_by_audio_id(db, audio_id)
|
||||
if conclusion:
|
||||
AiConclusionCRUD.update(
|
||||
db=db,
|
||||
conclusion_id=conclusion.id,
|
||||
conclusion_data={
|
||||
"transcription": result.get('segments', []),
|
||||
"ai_transcription": [result.get('text', '')],
|
||||
"conclusion": {
|
||||
"confidence": result.get('confidence', 0.0),
|
||||
"duration": result.get('duration', 0.0)
|
||||
}
|
||||
},
|
||||
end_date=True
|
||||
)
|
||||
else:
|
||||
AiConclusionCRUD.create(
|
||||
db=db,
|
||||
audio_id=audio_id,
|
||||
conclusion={
|
||||
"transcription": result.get('segments', []),
|
||||
"ai_transcription": [result.get('text', '')],
|
||||
"conclusion": {
|
||||
"confidence": result.get('confidence', 0.0),
|
||||
"duration": result.get('duration', 0.0)
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# Обновляем запись аудио
|
||||
AudioCRUD.update_recognition_result(db, audio_id, result)
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
recognition_tasks[task_id]['status'] = 'error'
|
||||
recognition_tasks[task_id]['error'] = str(e)
|
||||
|
||||
|
||||
@router.post("/recognize/{audio_id}", response_model=RecognitionStartResponse, status_code=status.HTTP_202_ACCEPTED)
|
||||
async def start_recognition(
|
||||
audio_id: uuid.UUID,
|
||||
background_tasks: BackgroundTasks,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Запуск распознавания аудиофайла
|
||||
"""
|
||||
# Проверяем существование аудио
|
||||
audio = AudioCRUD.get_by_id(db, audio_id)
|
||||
if not audio:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Audio not found in database"
|
||||
)
|
||||
|
||||
# Проверяем, нет ли уже активной задачи
|
||||
for task_id, task in recognition_tasks.items():
|
||||
if task['audio_id'] == audio_id and task['status'] == 'processing':
|
||||
return RecognitionStartResponse(
|
||||
status="info",
|
||||
message="Recognition already in progress",
|
||||
task_id=task_id,
|
||||
audio_id=audio_id
|
||||
)
|
||||
|
||||
# Проверяем наличие файла
|
||||
if not audio.file_path or not os.path.exists(audio.file_path):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Audio file not found on disk"
|
||||
)
|
||||
|
||||
# Создаем task_id
|
||||
task_id = str(uuid.uuid4())
|
||||
|
||||
# Добавляем фоновую задачу
|
||||
background_tasks.add_task(process_recognition, audio_id, audio.file_path, task_id)
|
||||
|
||||
return RecognitionStartResponse(
|
||||
status="success",
|
||||
message="Recognition started",
|
||||
task_id=task_id,
|
||||
audio_id=audio_id
|
||||
)
|
||||
|
||||
|
||||
@router.get("/recognize/{audio_id}", response_model=RecognitionStatus)
|
||||
async def get_recognition_status(
|
||||
audio_id: uuid.UUID,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Получение статуса распознавания по audio_id
|
||||
"""
|
||||
# Проверяем существование аудио
|
||||
audio = AudioCRUD.get_by_id(db, audio_id)
|
||||
if not audio:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Audio not found"
|
||||
)
|
||||
|
||||
# Ищем задачу для данного audio_id
|
||||
task_info = None
|
||||
for task_id, task in recognition_tasks.items():
|
||||
if task['audio_id'] == audio_id:
|
||||
task_info = {
|
||||
'task_id': task_id,
|
||||
**task
|
||||
}
|
||||
break
|
||||
|
||||
if not task_info:
|
||||
# Проверяем, есть ли сохраненный результат
|
||||
conclusion = AiConclusionCRUD.get_by_audio_id(db, audio_id)
|
||||
if conclusion and conclusion.end_date:
|
||||
return RecognitionStatus(
|
||||
task_id="",
|
||||
audio_id=audio_id,
|
||||
status="completed",
|
||||
result=conclusion.conclusion
|
||||
)
|
||||
|
||||
return RecognitionStatus(
|
||||
task_id="",
|
||||
audio_id=audio_id,
|
||||
status="not_started"
|
||||
)
|
||||
|
||||
return RecognitionStatus(**task_info)
|
||||
|
||||
|
||||
@router.get("/recognize/task/{task_id}", response_model=RecognitionStatus)
|
||||
async def get_recognition_task(task_id: str):
|
||||
"""
|
||||
Получение статуса задачи по task_id
|
||||
"""
|
||||
if task_id not in recognition_tasks:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Task not found"
|
||||
)
|
||||
|
||||
task = recognition_tasks[task_id]
|
||||
return RecognitionStatus(
|
||||
task_id=task_id,
|
||||
audio_id=task['audio_id'],
|
||||
status=task['status'],
|
||||
result=task.get('result'),
|
||||
error=task.get('error')
|
||||
)
|
||||
|
||||
|
||||
@router.get("/recognize/{audio_id}/result")
|
||||
async def get_recognition_result(
|
||||
audio_id: uuid.UUID,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Получение результата распознавания из базы данных
|
||||
"""
|
||||
audio = AudioCRUD.get_by_id(db, audio_id)
|
||||
if not audio:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Audio not found"
|
||||
)
|
||||
|
||||
conclusion = AiConclusionCRUD.get_by_audio_id(db, audio_id)
|
||||
if not conclusion or not conclusion.end_date:
|
||||
return {
|
||||
"status": "not_available",
|
||||
"message": "Recognition result not available yet",
|
||||
"audio_id": str(audio_id)
|
||||
}
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"audio_id": str(audio_id),
|
||||
"result": conclusion.conclusion,
|
||||
"index_date": conclusion.index_date,
|
||||
"end_date": conclusion.end_date
|
||||
}
|
||||
Reference in New Issue
Block a user