Browse Source

add autoLoader

dev
PoturaevPetr 2 weeks ago
parent
commit
8b471c771a
  1. 9
      .env.test
  2. 3
      autoLoader/__init__.py
  3. 7
      autoLoader/config.py
  4. 22
      autoLoader/database/AiConclusion.py
  5. 28
      autoLoader/database/Audio.py
  6. 12
      autoLoader/database/ConclusionVersion.py
  7. 16
      autoLoader/database/Operator.py
  8. 31
      autoLoader/database/__init__.py
  9. 2
      autoLoader/loader/__init__.py
  10. 53
      autoLoader/loader/connector.py
  11. 58
      autoLoader/loader/loader.py
  12. 4
      run_loader.py

9
.env.test

@ -1,7 +1,7 @@
# Database
# DATABASE_URL=sqlite:///./speech_analytics.db
# Для PostgreSQL:
DATABASE_URL=postgresql://postgres_test:test_user@postgres_test:5432/speech_analytics
DATABASE_URL=postgresql://postgres_test:test_user@postgres_test:5432/audiofiles_db
# API Settings
API_V1_PREFIX=/api/v1
@ -15,3 +15,10 @@ APP_VERSION=1.0.0
HOST=0.0.0.0
PORT=5056
RELOAD=True
#SFTP
SFPT_HOSTNAME = 192.168.1.150
SFPT_USERNAME = monitor
SFPT_PASSWORD = Audio4analy6!6
FILESAPTH = audiofiles

3
autoLoader/__init__.py

@ -0,0 +1,3 @@
from autoLoader.database import *
from autoLoader.loader import Loader
loader = Loader()

7
autoLoader/config.py

@ -0,0 +1,7 @@
import os
SFTP_HOSTNAME = os.getenv("SFPT_HOSTNAME", "192.168.1.150")
SFTP_USERNAME = os.getenv("SFPT_USERNAME", "monitor")
SFTP_PASSWORD = os.getenv("SFPT_PASSWORD", "Audio4analy6!6")
FILESAPTH = os.getenv("FILESAPTH", "audiofiles")
DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./speech_analytics.db")

22
autoLoader/database/AiConclusion.py

@ -0,0 +1,22 @@
from sqlalchemy import Column, UUID, ForeignKey, DateTime, JSON
from sqlalchemy.orm import relationship
from apiApp.database import Base
import uuid
from datetime import datetime
class AiConclusion(Base):
__tablename__ = "ai_conclusion"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
audio_id = Column(UUID(as_uuid=True), ForeignKey("audio.id"), nullable=False)
conclusion = Column(JSON, default=lambda: {
"transcription": [],
"ai_transcription": [],
"conclusion": {}
})
index_date = Column(DateTime, default=datetime.utcnow)
end_date = Column(DateTime)
audio_id = Column(UUID(as_uuid=True), ForeignKey("audio.id"))
version = relationship("ConclusionVersion", back_populates="ai_conclusion")

28
autoLoader/database/Audio.py

@ -0,0 +1,28 @@
from sqlalchemy import Column, String, DateTime, UUID, ForeignKey, Float, Integer
from sqlalchemy.orm import relationship
from apiApp.database import Base
import uuid
from datetime import datetime
class Audio(Base):
__tablename__ = "audio"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
filename = Column(String(255), nullable=False)
index_date = Column(DateTime, default=datetime.utcnow)
file_path = Column(String(500))
duration = Column(Float)
file_size = Column(Integer)
ai_conclusion = relationship("AiConclusion", back_populates="audio", cascade="all, delete-orphan")
def to_dict(self):
return {
"id": str(self.id),
"filename": self.filename,
"index_date": self.index_date.isoformat() if self.index_date else None,
"file_path": self.file_path,
"duration": self.duration,
"file_size": self.file_size
}

12
autoLoader/database/ConclusionVersion.py

@ -0,0 +1,12 @@
from sqlalchemy import Column, UUID, ForeignKey, Integer, Text
from apiApp.database import Base
import uuid
class ConclusionVersion(Base):
__tablename__ = "conclusion_version"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
conclusion_id = Column(UUID(as_uuid=True), ForeignKey("ai_conclusion.id"))
version = Column(Integer)
content = Column(Text)

16
autoLoader/database/Operator.py

@ -0,0 +1,16 @@
from sqlalchemy import Column, UUID, String, Integer
from sqlalchemy.orm import relationship
from apiApp.database import Base
import uuid
class Operator(Base):
__tablename__ = "operator"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
fio = Column(String(100))
num = Column(Integer)
calls = relationship("Call", back_populates="operator")

31
autoLoader/database/__init__.py

@ -0,0 +1,31 @@
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from autoLoader.config import DATABASE_URL
# Создание engine
engine = create_engine(
DATABASE_URL,
connect_args={"check_same_thread": False} if "sqlite" in DATABASE_URL else {}
)
# SessionLocal
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
# Base
Base = declarative_base()
# Зависимость для получения сессии БД
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()
# from apiApp.database.Operator import Operator
from apiApp.database.Audio import Audio
from apiApp.database.AiConclusion import AiConclusion
from apiApp.database.ConclusionVersion import ConclusionVersion

2
autoLoader/loader/__init__.py

@ -0,0 +1,2 @@
from autoLoader.loader.connector import *
from autoLoader.loader.loader import *

53
autoLoader/loader/connector.py

@ -0,0 +1,53 @@
from autoLoader.config import SFTP_HOSTNAME, SFTP_USERNAME, SFTP_PASSWORD
import socket
def check_connection():
"""Проверка доступности сервера"""
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(10)
try:
result = sock.connect_ex((SFTP_HOSTNAME, 22))
if result == 0:
print("Порт 22 доступен")
else:
print(f"Порт 22 недоступен. Код ошибки: {result}")
except Exception as e:
print(f"Ошибка проверки соединения: {e}")
finally:
sock.close()
# Перед подключением вызовите проверку
# check_connection()
import paramiko
class ConnectorSFTP():
def __init__(self):
self.sftp = None
self.ssh = paramiko.SSHClient()
def connect(self, remote_path: str):
try:
self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
self.ssh.connect(
hostname=SFTP_HOSTNAME,
username=SFTP_USERNAME,
password=SFTP_PASSWORD,
)
self.sftp = self.ssh.open_sftp()
remote_path = remote_path.lstrip('/') # Удаляем начальный слэш, если есть
self.sftp.chdir(remote_path) # Переходим в директори
except paramiko.AuthenticationException:
print("Ошибка аутентификации. Проверьте имя пользователя и пароль.")
return None, None
except paramiko.SSHException as e:
print(f"Ошибка SSH: {e}")
return None, None
except FileNotFoundError:
print(f"Директория {remote_path} не найдена на сервере.")
return None, None
except IOError as e:
print(f"Ошибка ввода-вывода: {e}")
return None, None

58
autoLoader/loader/loader.py

@ -0,0 +1,58 @@
from autoLoader.config import FILESAPTH
from autoLoader.loader import ConnectorSFTP
import datetime, os
from autoLoader.database import *
local_path = os.path.join(os.getcwd(), FILESAPTH)
class Loader():
def __init__(self):
self.call_types = ['in']
pass
def filter_call(self, filename: str):
if filename.split("-")[0] in self.call_types:
return True
return False
def load(self):
date_now = datetime.datetime.now()# - datetime.timedelta(days=1)
remote_path = f"/{date_now.strftime('%Y/%m/%d')}"
connector = ConnectorSFTP()
connector.connect(remote_path=remote_path)
sftp_client = connector.sftp
ssh_client = connector.ssh
if sftp_client is None or ssh_client is None:
print("Не удалось подключиться к SFTP. Завершение работы.")
exit(1)
db = get_db()
try:
listdir = sftp_client.listdir()
os.makedirs(local_path, exist_ok = True)
diskdir = os.listdir(local_path)
for file in listdir:
if self.filter_call(filename=file):
remote_file = f"{file}".lstrip('/')
filepath = os.path.join(local_path, file)
try:
sftp_client.get(remote_file, filepath) # Скачиваем файл
audio = Audio()
audio.index_date = datetime.datetime.now()
audio.filename = file
db.session.add(audio)
db.session.commit()
print(f"Скачан файл: {remote_file}")
except Exception as e:
print(f"Ошибка при скачивании файла {remote_file}: {e}")
finally:
# Закрываем соединения
sftp_client.close()
ssh_client.close()

4
run_loader.py

@ -0,0 +1,4 @@
from autoLoader import loader
loader.load()
Loading…
Cancel
Save