Проект "Речевая аналитика для Клиники Оленевой"
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

165 lines
7.2 KiB

import os
import time
import shutil
#from apiApp import init_model
#from apiApp.ai import recognition
from ai import recognition
import paramiko
#from apiApp.database import Call
from ai.recognition import clean
import datetime
path = '/media/dev/TEXTFILES/audiofiles'
import gc
import torch
def flush():
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
def is_file_fully_loaded(file_path):
#try:
size = os.path.getsize(file_path)
#except FileNotFoundError:
# print('not found')
# return False
new_size = os.path.getsize(file_path)
if size != new_size: return False
else: return True
def connect_to_sftp(hostname, username, password, remote_path):
try:
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # Добавляет неизвестный хост в известные
ssh.connect(hostname, username=username, password=password)
sftp = ssh.open_sftp()
if len(remote_path)>3:
if remote_path[1]=='/': remote_path=remote_path[1:]
sftp.chdir(remote_path) # Переходим в указанную директорию. Обработка ошибок ниже.
return sftp
except paramiko.AuthenticationException:
print("Ошибка аутентификации. Проверьте имя пользователя и пароль.")
return None
except paramiko.SSHException as e:
print(f"Ошибка SSH: {e}")
return None
except FileNotFoundError:
print(f"Директория {remote_path} не найдена на сервере.")
return None
except IOError as e:
print(f"Ошибка ввода-вывода: {e}")
return None
from datetime import timedelta
def weekd(s):
return [-1,-2,-3,-4,-5,-6,-7]
"""output = []
if s == 3:
output=[0]
if s > 3:
for i in range(0,s-3+1):
output.append(-i)
if s < 3:
for i in range(0,s+5):
output.append(-i)
return sorted(output, reverse = False)"""
def today_audios():
while True:
try:
for i in weekd(datetime.datetime.today().weekday()):
date = str(datetime.datetime.today()).split(' ')[0].split('-')
hour = str(datetime.datetime.today()).split(' ')[1].split(':')[0]
if int(hour)<9:
yesterday = datetime.datetime.now() - timedelta(days=-i+1)
year = yesterday.strftime("%Y")
month = yesterday.strftime("%m")
day = yesterday.strftime("%d")
if int(day) >= 10: remote_path = f"/{year}/{month}/{int(day)}/"
else: remote_path = f"/{year}/{month}/0{int(day)}/"
else:
yesterday = datetime.datetime.now() - timedelta(days=-i)
year = yesterday.strftime("%Y")
month = yesterday.strftime("%m")
day = yesterday.strftime("%d")
if int(day) >= 10: remote_path = f"/{year}/{month}/{str(int(day))}/"
else: remote_path = f"/{year}/{month}/0{int(day)}/"
os.makedirs('/media/dev/TEXTFILES'+remote_path, exist_ok=True)
os.makedirs(f'/media/dev/TEXTFILES/error_audiofiles/OutOfMemoryError{remote_path}', exist_ok=True)
os.makedirs(f'/media/dev/TEXTFILES/error_audiofiles/IndexError{remote_path}', exist_ok=True)
os.makedirs('/media/dev/TEXTFILES/audiofiles' + remote_path, exist_ok=True)
sftp_client = connect_to_sftp("192.168.1.150", "monitor", "Audio4analy6!6", remote_path)
files = sftp_client.listdir()
txtfiles = [i[:-4] for i in os.listdir(f'/media/dev/TEXTFILES{remote_path}')] + os.listdir(
f'/media/dev/TEXTFILES/error_audiofiles/OutOfMemoryError{remote_path}') + os.listdir(
f'/media/dev/TEXTFILES/error_audiofiles/IndexError{remote_path}')
files = list(set(files)-set(txtfiles))
day_audios(remote_path)
time.sleep(900)
except AttributeError as e:
print('SSH error',e)
time.sleep(1800)
today_audios()
archive_path = '/media/dev/TEXTFILES/audiofiles'
def day_audios(remote_path):
os.makedirs('/media/dev/TEXTFILES' + remote_path, exist_ok=True)
os.makedirs(f'/media/dev/TEXTFILES/error_audiofiles/OutOfMemoryError{remote_path}', exist_ok=True)
os.makedirs(f'/media/dev/TEXTFILES/error_audiofiles/IndexError{remote_path}', exist_ok=True)
os.makedirs('/media/dev/TEXTFILES/audiofiles' + remote_path, exist_ok=True)
sftp_client = connect_to_sftp("192.168.1.150", "monitor", "Audio4analy6!6", remote_path)
filess = sftp_client.listdir()
files = []
for i in filess:
if 'in-' in i and not '_t' in i and not 'mp3' in i:
files.append(i)
#print(len(files))
txtfiles = [i[:-4] for i in os.listdir(f'/media/dev/TEXTFILES{remote_path}')] + os.listdir(
f'/media/dev/TEXTFILES/error_audiofiles/OutOfMemoryError{remote_path}') + os.listdir(
f'/media/dev/TEXTFILES/error_audiofiles/IndexError{remote_path}')
#print(set(files)-set(txtfiles))
files = list(set(files) - set(txtfiles))
if len(files)>0: print(files)
print(len(files))
#print(files)
flush()
if files:
for i in files:
if 'in-' in i and not '_t' in i and not 'mp3' in i:
sftp_client.get(remote_path + '/' + i, f"{archive_path}{remote_path}{i}")
print('1ok')
#print(f"{archive_path}{remote_path}{i}")
while not is_file_fully_loaded(os.path.join(f"{archive_path}{remote_path}{i}")):
time.sleep(1)
print('2ok')
try:
print('3ok')
call_text = recognition.to_txt_wdb(archive_path + remote_path + i)
print('4ok')
#Call.add_call(i,'-', call_text, '-', '-', '-', '-', '-')
except torch.OutOfMemoryError:
clean()
recognition.init_model()
try:
recognition.to_txt_wdb(archive_path + remote_path + i)
except torch.OutOfMemoryError:
os.makedirs(f'/media/dev/TEXTFILES/error_audiofiles/OutOfMemoryError{remote_path}{i}',
exist_ok=True)
shutil.copy(archive_path + remote_path + i,
f'/media/dev/TEXTFILES/error_audiofiles/OutOfMemoryError{remote_path}{i}')
except IndexError:
os.makedirs(f'/media/dev/TEXTFILES/error_audiofiles/IndexError{remote_path}{i}',
exist_ok=True)
shutil.copy((archive_path + remote_path + i),
f'/media/dev/TEXTFILES/error_audiofiles/IndexError{remote_path}{i}')
#day_audios('/2025/05/27/')