Browse Source

коммуникация с сервером без очищения от лишних сеансов

main
parent
commit
673d4dc572
  1. 143
      .gitignore
  2. 0
      ai/__init__.py
  3. 96
      ai/d.py
  4. 54
      ai/old_calls_rec.py
  5. 0
      ai/out-79655622900-707-20250124-090133-1737691293.183969.wav
  6. 10
      ai/preprocessing.py
  7. 41
      ai/rec_all_for.py
  8. 380
      ai/recognition.py
  9. 109
      ai/rnnt_model_config.yaml
  10. BIN
      ai/segments/c_1.wav
  11. BIN
      ai/segments/c_2.wav
  12. BIN
      ai/segments/c_3.wav
  13. 43
      apiApp/__init__.py
  14. 3
      apiApp/config/__init__.py
  15. 61
      apiApp/database/Call.py
  16. 14
      apiApp/database/Operator.py
  17. 28
      apiApp/database/User.py
  18. 3
      apiApp/database/__init__.py
  19. 11
      apiApp/helper/jinjaHelper.py
  20. 11
      apiApp/helper/utils.py
  21. 1
      apiApp/interfaces/__init__.py
  22. 36
      apiApp/interfaces/apiFunctions.py
  23. 0
      cleantemp.py
  24. 6
      config.ini
  25. 17
      keep_txts.py
  26. 11
      makefilefornatalia.py
  27. 0
      model_test.py
  28. 165
      rec_all_for_day.py
  29. 290
      requirements.txt
  30. 109
      rnnt_model_config.yaml
  31. 19
      run.py
  32. BIN
      temp/1 9223339795.wav
  33. BIN
      temp/1 Антонина.wav
  34. BIN
      temp/123.mp3
  35. BIN
      temp/123.mp3.wav
  36. BIN
      temp/123.ogg
  37. BIN
      temp/123.ogg.wav
  38. BIN
      temp/123.wav
  39. BIN
      temp/20 9226488628.wav
  40. BIN
      temp/3 Надежда.wav
  41. BIN
      temp/30 9824972560.wav
  42. BIN
      temp/4 3472000334.wav
  43. BIN
      temp/50 9026373947.wav
  44. BIN
      temp/53 9655511570.wav
  45. BIN
      temp/9 9028055111.wav
  46. BIN
      temp/9027955000 обычный звонок.mp3
  47. BIN
      temp/9519212271 обычный звонок.mp3
  48. BIN
      temp/in-2250662-2.wav
  49. BIN
      temp/in-2250662.wav
  50. BIN
      temp/in-3422000203-89028396207-20250202-173540-1738499740.227325.wav
  51. BIN
      temp/in-3422000203-89124901528-20250126-123049-1737876649.196438.wav
  52. BIN
      temp/in-3422000203-89127888996-20250202-104207-1738474927.225568.wav
  53. BIN
      temp/in-3422000203-89523314992-20250315-180757-1742044077.36312.mp3.wav
  54. BIN
      temp/in-3422000303-83422073789-20250126-141431-1737882871.196887.wav
  55. BIN
      temp/in-73422070303-79026434469-20250419-092305-1745036585.6448.wav
  56. BIN
      temp/in-73422070303-79028027705-20250409-110405-1744178645.172052.wav
  57. BIN
      temp/in-73422070303-79028371463-20241005-093328-1728102808.39081 (1).wav
  58. BIN
      temp/in-73422070303-79028371463-20241005-093328-1728102808.39081.wav
  59. BIN
      temp/in-73422070303-79082533189-20250315-185043-1742046643.36394.mp3.wav
  60. BIN
      temp/in-73422070303-79082551616-20250126-152907-1737887347.197152.wav
  61. BIN
      temp/in-73422070303-79082595553-20250126-113805-1737873485.195689.wav
  62. BIN
      temp/in-73422070303-79082604307-20250315-165118-1742039478.36145.mp3.wav
  63. BIN
      temp/in-73422070303-79082729571-20250126-134444-1737881084.196766.wav
  64. BIN
      temp/in-73422070303-79124892401-20250420-145918-1745143158.11268.wav
  65. BIN
      temp/in-73422070303-79128891125-20250419-113055-1745044255.6885.wav
  66. BIN
      temp/in-73422070303-79194723777-20250303-092105-1740975665.6553 (2).wav
  67. BIN
      temp/in-73422070303-79194726227-20250126-101026-1737868226.195319.wav
  68. BIN
      temp/in-73422070303-79223332262-20250126-132543-1737879943.196684.wav
  69. BIN
      temp/in-73422070303-79223520460-20250315-123211-1742023931.35278.mp3.wav
  70. BIN
      temp/in-73422070303-79226437328-20250126-162614-1737890774.197308.wav
  71. BIN
      temp/in-73422070303-79504502044-20250126-175418-1737896058.197497.wav
  72. BIN
      temp/in-73422070303-79504612761-20250126-102943-1737869383.195411.wav
  73. BIN
      temp/in-73422070303-79504695302-20250126-112626-1737872786.195645.wav
  74. BIN
      temp/in-73422070303-79615727549-20241005-093140-1728102700.39075.wav
  75. BIN
      temp/in-73422070303-79638592760-20250126-103119-1737869479.195418.wav
  76. BIN
      temp/in-73422070303-79824744004-20250315-092838-1742012918.32904.mp3.wav
  77. BIN
      temp/in-73422070303-79824814454-20250131-124732-1738309652.220219.wav
  78. BIN
      temp/in-73422070303-79920919113-20250419-173524-1745066124.9703.wav
  79. BIN
      temp/in-73422250662-79223066055-20250315-095240-1742014360.33018.mp3.wav
  80. BIN
      temp/in-73422250662-79223604110-20250315-120547-1742022347.35163.mp3.wav
  81. BIN
      temp/in-73422250662-79638829677-20250126-174834-1737895714.197474.wav
  82. BIN
      temp/in-s-73422298463-20250124-141929-1737710369.188991.wav
  83. BIN
      temp/in-s-79526578558-20250417-132418-1744878258.215700.wav
  84. BIN
      temp/in-р-79918084911-20241005-090209-1728100929.38889-2.wav
  85. BIN
      temp/out-+79082545599-703-20241102-113930-1730529570.191761.wav
  86. BIN
      temp/out-+79091124555-704-20250126-120326-1737875006.196314.wav
  87. BIN
      temp/out-+79125928169-709-20241102-170329-1730549009.192967.wav
  88. BIN
      temp/out-+79127829273-709-20241102-181029-1730553029.193102.wav
  89. BIN
      temp/out-+79526571375-709-20241102-181420-1730553260.193181.wav
  90. BIN
      temp/out-+79630209528-703-20241102-115506-1730530506.191829.wav
  91. BIN
      temp/out-0189526500261-801-20241102-133847-1730536727.192309.wav
  92. BIN
      temp/out-79082532655-705-20241102-090039-1730520038.191025.wav
  93. BIN
      temp/out-79226416611-705-20241102-101859-1730524739.191420.wav
  94. BIN
      temp/out-89048438339-703-20241102-090017-1730520017.191023.wav
  95. BIN
      temp/out-89082658735-708-20250126-115254-1737874374.195713.wav
  96. BIN
      temp/out-89125860472-708-20250126-104916-1737870556.195464.wav
  97. BIN
      temp/out-89519373293-362-20250126-110647-1737871607.195559.wav
  98. BIN
      temp/out-89523278927-504-20241102-132525-1730535925.192215.wav
  99. BIN
      test.wav
  100. 0
      tokenizer_all_sets/__init__.py
  101. Some files were not shown because too many files have changed in this diff Show More

143
.gitignore vendored

@ -0,0 +1,143 @@
# ---> Python
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
/gyno_videos/
/result_gyno/
/images/
/test/
/Ubuntu_CFG/
/distPirogovDesktop/
app_config.json
color_profile.json
users.json
data/2024-08-22_00-25-03-343367.png
data/2024-08-22_00-58-24-039690.png
data/2024-08-22_00-59-26.webm
data/2024-08-22_00-59-34-870157.png
data/2024-08-22_00-59-50-575029.png
data/2024-08-22_00-59-06-827412.png
data/2024-08-22_00-29-12-759857.png
/plugins/
.idea/

0
ai/__init__.py

96
ai/d.py

@ -0,0 +1,96 @@
import os
import time
import recognition
from cleantemp import clean
from datetime import datetime
import paramiko
path = '/home/dev/ExampleApp/audiofiles/'
#n = os.listdir(path)
def connect_to_sftp(hostname, username, password, remote_path):
try:
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # Добавляет неизвестный хост в известные
ssh.connect(hostname, username=username, password=password)
sftp = ssh.open_sftp()
sftp.chdir(remote_path) # Переходим в указанную директорию. Обработка ошибок ниже.
return sftp
except paramiko.AuthenticationException:
print("Ошибка аутентификации. Проверьте имя пользователя и пароль.")
return None
except paramiko.SSHException as e:
print(f"Ошибка SSH: {e}")
return None
except FileNotFoundError:
print(f"{str(datetime.now())[:-7]} - директория {remote_path} ещё не создана. Повторная попытка через 30 минут.")
return None
except IOError as e:
print(f"Ошибка ввода-вывода: {e}")
return None
remote_path = '/2025/02/'
'''for i in range(24, 26):
sftp_client = connect_to_sftp("192.168.1.150", "monitor", "Audio4analy6!6", remote_path + str(i))
files = sftp_client.listdir()
temp=remote_path+str(i)+'/'
for file in files:
sftp_client.get(temp+file,f"/home/dev/ExampleApp/audiofiles{temp}{file}")'''
def is_file_fully_loaded(file_path, name):
print(name)
try:
sftp_client = connect_to_sftp("192.168.1.150", "monitor", "Audio4analy6!6", file_path)
info = sftp_client.stat(name)
size = info.st_size
time.sleep(1)
sftp_client = connect_to_sftp("192.168.1.150", "monitor", "Audio4analy6!6", file_path)
info = sftp_client.stat(name)
new_size = info.st_size
if size != new_size: return False
else: return True
except FileNotFoundError: return False
while True:
time.sleep(1)
remote_path = f"/{str(datetime.now())[:10].replace('-', '/')}/"
try: os.makedirs('/home/dev/ExampleApp/textfiles'+remote_path)
except FileExistsError: print('')
try:
sftp_client = connect_to_sftp("192.168.1.150", "monitor", "Audio4analy6!6", remote_path)
files = sftp_client.listdir()
print('Файлов на сервере за сегодня -', len(files))
texts = os.listdir('/home/dev/ExampleApp/textfiles'+remote_path)
texts=[i[:-4] for i in texts]
while len(set(files)-set(texts))==0:
print(datetime.now())
time.sleep(60)
sftp_client = connect_to_sftp("192.168.1.150", "monitor", "Audio4analy6!6", remote_path)
files=sftp_client.listdir()
for i in list(set(files) - set(texts)):
while not is_file_fully_loaded(remote_path,i):
time.sleep(5)
info = sftp_client.stat(i)
size = info.st_size
if size>=1024:
try:
sftp_client.get(remote_path+i,'/home/dev/ExampleApp/temp/'+i)
recognition.to_txt_wfp('/home/dev/ExampleApp/temp/'+i,i,f'/home/dev/ExampleApp/textfiles{remote_path}')
os.remove('/home/dev/ExampleApp/temp/'+i)
except IndexError:
fin = open(f'/home/dev/ExampleApp/textfiles/{remote_path}/{i}.txt', 'w')
fin.write('Аудиозапись пуста (IndexError).')
fin.close()
else:
fin=open(f'/home/dev/ExampleApp/textfiles/{remote_path}/{i}.txt','w')
fin.write('Аудиозапись пуста.')
fin.close()
except AttributeError: time.sleep(1800)

54
ai/old_calls_rec.py

@ -0,0 +1,54 @@
from rec_all_for import rec_all_from
import os
import paramiko
'''def connect_to_sftp(hostname, username, password, remote_path):
try:
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # Добавляет неизвестный хост в известные
ssh.connect(hostname, username=username, password=password)
sftp = ssh.open_sftp()
sftp.chdir(remote_path) # Переходим в указанную директорию. Обработка ошибок ниже.
return sftp
except paramiko.AuthenticationException:
print("Ошибка аутентификации. Проверьте имя пользователя и пароль.")
return None
except paramiko.SSHException as e:
print(f"Ошибка SSH: {e}")
return None
except FileNotFoundError:
print(f"Директория {remote_path} не найдена на сервере.")
return None
except IOError as e:
print(f"Ошибка ввода-вывода: {e}")
return None'''
'''remote_path = '/2025/02/'
for i in range(26, 27):
sftp_client = connect_to_sftp("192.168.1.150", "monitor", "Audio4analy6!6", remote_path + str(i))
files = sftp_client.listdir()
temp=remote_path+str(i)+'/'
for file in files:
sftp_client.get(temp+file,f"/home/dev/ExampleApp/audiofiles{temp}{file}")'''
days01= ['30']#[str(i) for i in range(24,32)]
#[24,25,26,27,28,29,30,31]
days03= ['0'+str(i) for i in range(1,16)]+[str(i) for i in range(10,14)]
#[01,02,03,04,05,06,07,08,09,10,11,12...21]
'''
#for day in days01:
# os.makedirs(f'/home/dev/ExampleApp/textfiles/2025/01/{day}')
rec_all_from(f'/2025/01/{str(day)}')
print(f'/2025/01/{str(day)}',len(os.listdir(f'/home/dev/ExampleApp/textfiles/2025/01/{day}')))
'''
for day in days03:
# os.makedirs(f'/home/dev/ExampleApp/textfiles/2025/02/{day}')
rec_all_from(f'/2025/03/{str(day)}')
try:
os.makedirs('/home/dev/ExampleApp/textfiles' + f'/2025/03/{str(day)}')
except FileExistsError:
print('ok')
print(f'/2025/03/{str(day)}',len(os.listdir(f'/home/dev/ExampleApp/textfiles/2025/03/{day}')))

0
ai/out-79655622900-707-20250124-090133-1737691293.183969.wav

10
ai/preprocessing.py

@ -0,0 +1,10 @@
def preprocessing(file_path, channel):
"""
Разделение аудиозаписи на реплики
yield - функция генератор, возвращет N количество раз в цикле FOR
"""
# TODO: перенести функцию разделения текста
for i in range(0,10):
yield i

41
ai/rec_all_for.py

@ -0,0 +1,41 @@
import os
import time
import recognition
import paramiko
#from apiApp.ai.bot import remote_path
from cleantemp import clean
path = '/home/dev/ExampleApp/audiofiles'
'''
def is_file_fully_loaded(file_path):
size = os.path.getsize(file_path)
time.sleep(1)
new_size = os.path.getsize(file_path)
if size != new_size: return False
else: return True
'''
import torch
import soundfile as sf
import librosa
def rec_all_from(remote_path):
try: os.makedirs('/home/dev/ExampleApp/audiofiles'+remote_path)
except FileExistsError: print('ok')
for i in os.listdir('/home/dev/ExampleApp/audiofiles'+remote_path):
try:
print(i)
if os.path.getsize(path + remote_path + '/' + i) >= 4096:
recognition.to_txt(path + remote_path + '/'+i,remote_path)
else:
fin = open('/home/dev/ExampleApp/textfiles'+remote_path+f'/{i}.txt', 'w')
fin.write('Аудиозапись пуста.')
fin.close()
clean()
except torch.OutOfMemoryError:
with open("/home/dev/ExampleApp/govno/govno.txt", "a") as govno:
govno.write(remote_path+"/"+i+"\n")
except IndexError:
with open("/home/dev/ExampleApp/govno/govno.txt", "a") as govno:
govno.write(remote_path+"/"+i+" IndexError\n")

380
ai/recognition.py

@ -0,0 +1,380 @@
#TODO: Убрать комментарии
import wave
import torch, torchaudio
from nemo.collections.asr.models import EncDecRNNTBPEModel
from nemo.collections.asr.modules.audio_preprocessing import (AudioToMelSpectrogramPreprocessor as NeMoAudioToMelSpectrogramPreprocessor)
from nemo.collections.asr.parts.preprocessing.features import (FilterbankFeaturesTA as NeMoFilterbankFeaturesTA)
import locale
#from rec_all_for_day import flush
locale.getpreferredencoding = lambda: "UTF-8"
import mutagen
import os
import librosa
import soundfile as sf
import noisereduce as nr
import pydub
from pydub import AudioSegment, silence
model = None
ckpt=None
thresh=-90.0
torch.cuda.empty_cache()
max_memory = 14 * 1024**3 # 20GB
torch.cuda.set_per_process_memory_fraction(14/24)
import torch
import gc
def flush():
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
# Дополнительная очистка если используется multiprocessing
'''if torch.multiprocessing.is_started():
torch.multiprocessing.reducer._python_exit()if torch.multiprocessing.is_started():
torch.multiprocessing.reducer._python_exit()'''
model = None
def init_model():
global model
if model is not None: # Предотвращаем повторную загрузку
return
device = "cuda" if torch.cuda.is_available() else "cpu"
try:
with torch.no_grad():
torch.cuda.empty_cache()
model = EncDecRNNTBPEModel.from_config_file("/home/dev/GigaAM_API/rnnt_model_config.yaml")
ckpt = torch.load("/home/dev/GigaAM_API/rnnt_model_weights.ckpt", map_location="cpu")
model.load_state_dict(ckpt, strict=False)
model.eval().to(device)
except torch.cuda.OutOfMemoryError:
handle_oom_error()
def handle_oom_error():
torch.cuda.empty_cache()
if 'model' in globals():
del model
gc.collect()
raise RuntimeError("Out of Memory. Model initialization failed.")
init_model()
def clean():
if len(os.listdir('/home/dev/ExampleApp/temp/splitted/segments/'))>0:
for i in os.listdir('/home/dev/ExampleApp/temp/splitted/segments/'):
os.remove(os.path.join('/home/dev/ExampleApp/temp/splitted/segments/',i))
class FilterbankFeaturesTA(NeMoFilterbankFeaturesTA):
def __init__(self, mel_scale: str = "htk", wkwargs=None, **kwargs):
if "window_size" in kwargs:
del kwargs["window_size"]
if "window_stride" in kwargs:
del kwargs["window_stride"]
super().__init__(**kwargs)
self._mel_spec_extractor: torchaudio.transforms.MelSpectrogram = (
torchaudio.transforms.MelSpectrogram(
sample_rate=self._sample_rate,
win_length=self.win_length,
hop_length=self.hop_length,
n_mels=kwargs["nfilt"],
window_fn=self.torch_windows[kwargs["window"]],
mel_scale=mel_scale,
norm=kwargs["mel_norm"],
n_fft=kwargs["n_fft"],
f_max=kwargs.get("highfreq", None),
f_min=kwargs.get("lowfreq", 0),
wkwargs=wkwargs,
)
)
class AudioToMelSpectrogramPreprocessor(NeMoAudioToMelSpectrogramPreprocessor):
def __init__(self, mel_scale: str = "htk", **kwargs):
super().__init__(**kwargs)
kwargs["nfilt"] = kwargs["features"]
del kwargs["features"]
self.featurizer = (
FilterbankFeaturesTA(
mel_scale=mel_scale,
**kwargs,
)
)
def segment(file_path):
clean()
dia = {}
lraudiosplit(file_path)
temp_path = '/home/dev/ExampleApp/temp/splitted/'
segments=[]#{}
audio, sr = librosa.load(os.path.join(temp_path, 'client.wav'), mono=True)
silence = silence_c()
if len(silence)>1:
for i in range(len(silence)):
sf.write(os.path.join(temp_path, 'segments', 'c_' + str(silence[i][0]+1) + '.wav'), audio[silence[i][0]:silence[i][1]], sr)
segments.append(os.path.join(temp_path, 'segments', 'c_' + str(silence[i][0]+1) + '.wav'))
dia[silence[i][0]+1] = 'П: '
else:
sf.write(os.path.join(temp_path, 'segments', 'c' + '.wav'), audio, sr)
segments.append(os.path.join(temp_path, 'segments', 'c' + '.wav'))
dia[0] = 'П: '
silence=silence_o()
if len(silence) > 1:
audio, sr = librosa.load(os.path.join(temp_path, 'operator.wav'), mono=True)
for i in range(len(silence)):
sf.write(os.path.join(temp_path, 'segments', 'o_' + str(silence[i][0]) + '.wav'), audio[silence[i][0]:silence[i][1]], sr)
segments.append(os.path.join(temp_path, 'segments', 'o_'+str(silence[i][0]) + '.wav'))
dia[silence[i][0]]='О: '
else:
sf.write(os.path.join(temp_path, 'segments', 'o' + '.wav'), audio, sr)
segments.append(os.path.join(temp_path, 'segments', 'o' + '.wav'))
dia[1] = 'О: '
return segments,dia
def segmentmono(file_path):
dia = {}
temp_path='/home/dev/ExampleApp/apiApp/ai'
if file_path[-3:]=='ogg':
sound = AudioSegment.from_ogg(os.path.join(temp_path, file_path))
sound.export(f"{file_path}.wav", format="wav")
sound = AudioSegment.from_file(os.path.join(temp_path, f'{file_path}.wav'))
elif file_path[-3:] == 'mp3':
sound = AudioSegment.from_mp3(os.path.join(temp_path, file_path))
sound.export(f"{file_path}.wav", format="wav")
sound = AudioSegment.from_file(os.path.join(temp_path, f'{file_path}.wav'))
elif file_path[-3:] == 'wav':
sound = AudioSegment.from_file(os.path.join(temp_path, file_path))
else:
return TypeError
segments=[]#{}
audio, sr = librosa.load(os.path.join(temp_path, file_path), mono=True)
silence = pydub.silence.detect_silence(sound, silence_thresh=-60.0)
j=1
if len(silence)>1:
sf.write(os.path.join(temp_path, 'segments', 'c_' + str(str(j) + '.wav')), audio[:silence[1][0]], sr)
segments.append(os.path.join(temp_path, 'segments', 'c_' + str(str(j) + '.wav')))
dia[silence[0][1]] = 'П: '
for i in range(len(silence)):
j+=1
sf.write(os.path.join(temp_path, 'segments', 'c_' + str(j) + '.wav'), audio[silence[i][1]:silence[i + 1][0]], sr)
segments.append(os.path.join(temp_path, 'segments', 'c_' + str(j) + '.wav'))
dia[silence[i][1]] = ''
else: segments='123.wav'
return segments,dia
def lraudiosplit(file_path):
incoming=True
s_audio_dir = '/home/dev/ExampleApp/temp/splitted/'
opath,cpath = s_audio_dir + 'client.wav',s_audio_dir + 'operator.wav'
audio, sr = librosa.load(file_path, mono=False)
if 'out' in file_path: incoming=False
if incoming: sf.write(opath, audio[0], sr); sf.write(cpath, audio[1], sr)
else: sf.write(cpath, audio[0], sr); sf.write(opath, audio[1], sr)
return cpath, opath
def silence_c():
global thresh
saudio_dir = '/home/dev/ExampleApp/temp/splitted/'
cpath = saudio_dir + 'client.wav'
audio, sr = librosa.load(cpath, mono=False)
caudio,csr=librosa.load(cpath,mono=True)
client_reduced_noise = nr.reduce_noise(y=caudio, sr=csr, prop_decrease=1, time_constant_s=0.1)
sf.write(cpath, client_reduced_noise, csr)
sound = AudioSegment.from_file(os.path.join(saudio_dir, 'client.wav'))
ret = pydub.silence.detect_silence(sound, silence_thresh=thresh)
for s in ret:
s[0] = int(round(s[0] * sr / 1000, 0))
s[1] = int(round(s[1] * sr / 1000, 0))
if len(ret)>1:
ret2=[[0,ret[0][0]]]
for i in range(len(ret)-1):
ret2.append([ret[i][1],ret[i+1][0]])
ret2.append([ret[-1][1], int(round(len(sound) * sr / 1000,0))])
while [0,0] in ret2:
ret2.pop(ret2.index([0,0]))
for i in ret2:
if i[0]!=0: i[0]-=sr//2
if i[1]<=len(sound)-(sr+5): i[1]+=sr//2
sf.write(cpath, audio, 22000)
sf.write(cpath, audio, 22000)
if ret2[-1][0]>ret2[-1][1]:
ret2[-1][1]=int(round(ret2[-1][1] * sr / 1000, 0))
return ret2
else: return ret
def silence_o():
global thresh
saudio_dir = '/home/dev/ExampleApp/temp/splitted/'
opath = saudio_dir + 'operator.wav'
audio, sr = librosa.load(opath, mono=False)
oaudio,osr=librosa.load(opath,mono=True)
client_reduced_noise = nr.reduce_noise(y=oaudio, sr=osr,prop_decrease=1, time_constant_s=0.2)
sf.write(opath, client_reduced_noise, osr)
sound = AudioSegment.from_file(os.path.join(saudio_dir, 'operator.wav'))
ret = pydub.silence.detect_silence(sound, silence_thresh=thresh+15)
for s in ret:
s[0] = int(round(s[0] * sr / 1000, 0))
s[1] = int(round(s[1] * sr / 1000, 0))
if len(ret)>1:
ret2=[[0,ret[0][0]]]
for i in range(len(ret)-1):
ret2.append([ret[i][1],ret[i+1][0]])
ret2.append([ret[-1][1], int(round(len(sound) * sr / 1000,0))])
for i in ret2:
if i[0]!=0: i[0]-=sr//2
if i[1]<=len(sound)-(sr+5): i[1]+=sr
sf.write(opath, audio, 22000)
if ret2[-1][0]>ret2[-1][1]:
ret2[-1][1]=int(round(ret2[-1][1] * sr / 1000, 0))
return ret2
else: return ret
def run_recognition(file_path):
device = "cuda" if torch.cuda.is_available() else "cpu"
model = EncDecRNNTBPEModel.from_config_file("/home/dev/ExampleApp/apiApp/ai/rnnt_model_config.yaml")
ckpt = torch.load("/home/dev/ExampleApp/apiApp/ai/rnnt_model_weights.ckpt", map_location="cpu")
model.load_state_dict(ckpt, strict=False)
model.eval()
#model = model.half()
model = model.to(device)
segments,dia=segment(file_path)
dia2={}
try:
call_text=model.transcribe(segments)[0]
except torch.OutOfMemoryError:
flush()
init_model()
run_recognition(file_path)
for i in sorted(dia):
j=list(dia.keys()).index(i)
if call_text[j]!='':
dia2[i] = dia[i]+call_text[j]
time_positions = sorted(dia2)
resul = []
for i in time_positions:
resul.append({"timestamp": i, "person": dia2[i][:3], "text": dia2[i][3:]})
'''if dia2[i][:3]=='О: ':
resul.append({"timestamp": i, "person": 'Опеатор:', "text": dia2[i][3:]})
else:
resul.append({"timestamp": i, "person": 'Пациент:', "text": dia2[i][3:]})'''
result = {"data":resul}
return result
def to_txt_wdb(file_path):
flush()
file_name = file_path.split('/')[-1]
global model
if os.path.getsize(file_path)>=1024:
text = ''
global device,model,ckpt
if model is None: init_model()
segments, dia = segment(file_path)
dia2 = {}
if file_path[-1]=='3':
sound = AudioSegment.from_mp3(file_path)
sound.export(f"{file_path}.wav", format="wav")
filename = file_path.replace('/home/dev/ExampleApp/temp/', '')
fin = open(f'{filename}.txt'.replace('audiofiles/',''), 'w')
call_text = model.transcribe(segments)[0]
for i in sorted(dia):
j = list(dia.keys()).index(i)
if call_text[j] != '':
dia2[i] = dia[i] + call_text[j]
time_positions = sorted(dia2)
resul = []
for i in time_positions:
resul.append({"timestamp": i, "person": dia2[i][:3], "text": dia2[i][3:]})
# fin=open(f'/home/dev/ExampleApp/textfiles{remote_path}/{filename}.txt','w')
for i in time_positions:
text+=f"{dia2[i]}\n"
fin.write(text)
fin.close()
return {'file_name' : file_name,
'text' : text}
elif os.path.getsize(file_path)<1024:
filename = file_path
fin = open(f'{filename}.txt'.replace('audiofiles/',''), 'w')
fin.write("Аудиозапись пуста.")
fin.close()
return {'file_name' : file_name,
'text' : "Аудиозапись пуста."}
else: return 'Аудиозапись сжата.'
def stereo_or_mono(file_path):
if file_path[-3:]=='wav':
with wave.open(file_path, "rb") as wav_file:
num_channels = wav_file.getnchannels()
if num_channels == 1: return to_message(file_path)
elif num_channels == 2: return call_to_message(file_path)
elif file_path[-3:]=='mp3':
temp_path = '/home/dev/ExampleApp/apiApp/ai'
audio = mutagen.File(file_path)
sound = AudioSegment.from_mp3(os.path.join(temp_path, file_path))
sound.export(f"{file_path}.wav", format="wav")
if audio.info.channels == 1: return to_message(file_path)
elif audio.info.channels == 2: return call_to_message(file_path)
elif file_path[-3:] == 'ogg': return to_message(file_path)
else: return 'Неподдерживаемый тип файла'
def call_to_message(file_path):
print(file_path)
global model, ckpt
if model is None: init_model()
segments, dia = segment(file_path)
dia2 = {}
try:
if model is None: init_model()
call_text = model.transcribe(segments)[0]
except torch.OutOfMemoryError:
model = None
init_model()
call_text = model.transcribe(segments)[0]
if call_text.count('')==len(call_text):
return 'Текст не был распознан, попробуйте другой файл'
elif call_text[:int(len(call_text)/2)-1]==call_text[int(len(call_text)/2):len(call_text)-1]:
result = str(call_text[:int(len(call_text)/2)])
result = result.replace("['",'')
result = result.replace("']", '')
result = result.replace("', '", ' ')
return result
else:
for i in sorted(dia):
j = list(dia.keys()).index(i)
if call_text[j] != '':
dia2[i] = dia[i] + call_text[j]
time_positions = sorted(dia2)
resul = []
for i in time_positions:
resul.append({"timestamp": i, "person": dia2[i][:3], "text": dia2[i][3:]})
result = ''
for i in time_positions:
result+=f"{dia2[i]}\n"
result = result.replace('О: ', '\n<b>Оператор:</b> \n')
result = result.replace('П: ', '\n<b>Пациент:</b> \n')
return result
def to_message(file_path):
device = "cuda" if torch.cuda.is_available() else "cpu"
model = EncDecRNNTBPEModel.from_config_file("/home/dev/ExampleApp/apiApp/ai/rnnt_model_config.yaml")
ckpt = torch.load("/home/dev/ExampleApp/apiApp/ai/rnnt_model_weights.ckpt", map_location="cpu")
model.load_state_dict(ckpt, strict=False)
model.eval()
model = model.to(device)
segments, dia = segmentmono(file_path)
if segments=='123.wav': result=model.transcribe([file_path])[0][0]
else:
call_text = model.transcribe(segments)[0]
result=''
for i in call_text:
if i != '':
result+=(i+' ')
return result

109
ai/rnnt_model_config.yaml

@ -0,0 +1,109 @@
model_class: enc_dec_rnnt_bpe
sample_rate: 16000
log_prediction: true
model_defaults:
enc_hidden: 768
pred_hidden: 320
join_hidden: 320
preprocessor:
_target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
sample_rate: 16000
n_fft: 400
n_window_size: 400
window_size: null
n_window_stride: 160
window_stride: null
features: 64
dither: 0.0
preemph: null
log: true
log_zero_guard_type: clamp
normalize: null
pad_to: 0
mel_norm: null
window: hann
log_zero_guard_value: 1e-9
tokenizer:
dir: tokenizer_all_sets/
type: bpe
validation_ds:
shuffle: False
manifest_filepath: null
encoder:
_target_: nemo.collections.asr.modules.ConformerEncoder
feat_in: 64
feat_out: -1
n_layers: 16
d_model: 768
subsampling: striding
subsampling_factor: 4
subsampling_conv_channels: 768
ff_expansion_factor: 4
self_attention_model: rel_pos
pos_emb_max_len: 5000
n_heads: 16
xscaling: false
untie_biases: true
conv_kernel_size: 31
dropout: 0.1
dropout_emb: 0.1
dropout_att: 0.1
decoder:
_target_: nemo.collections.asr.modules.RNNTDecoder
normalization_mode: null
random_state_sampling: false
blank_as_pad: true
vocab_size: 512
prednet:
pred_hidden: 320
pred_rnn_layers: 1
t_max: null
dropout: 0.0
joint:
_target_: nemo.collections.asr.modules.RNNTJoint
log_softmax: null
fuse_loss_wer: false
fused_batch_size: 1
jointnet:
joint_hidden: 320
activation: relu
dropout: 0.0
encoder_hidden: 768
optim:
name: adamw
lr: 5.0e-05
betas:
- 0.9
- 0.98
weight_decay: 0.01
sched:
name: CosineAnnealing
warmup_steps: 10000
warmup_ratio: null
min_lr: 1.0e-07
nemo_version: 1.12.0
decoding:
strategy: greedy_batch
preserve_alignments: false
greedy:
max_symbols: 3
beam:
beam_size: 5
score_norm: true
loss:
loss_name: default
mwer: false
rnnt_reduction: mean_batch
wer_coef: false
subtract_mean: true
warprnnt_numba_kwargs:
fastemit_lambda: 0.0
clamp: -1.0
rnnt_weight: 0.1
unique_hyp: true

BIN
ai/segments/c_1.wav

Binary file not shown.

BIN
ai/segments/c_2.wav

Binary file not shown.

BIN
ai/segments/c_3.wav

Binary file not shown.

43
apiApp/__init__.py

@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-
from flask import Flask
import logging
import sys, os
from flask_compress import Compress
from apiApp.config import config
from flask_sqlalchemy import SQLAlchemy
# Disable
def blockPrint():
sys.stdout = open(os.devnull, 'w')
# blockPrint()
__version__ = '0.1'
apiApp = Flask(__name__, instance_relative_config=True)
apiApp.config['SECRET_KEY'] = '2qwtq2'
apiApp.config['INTERNAL_API_KEY'] = 'lGR9g09RW5cE07rKivRNIH9f'
apiApp.config['REMOTE_LOGIN_KEY'] = 'oRTiLTHeNTraZEsI'
apiApp.config['SQLALCHEMY_DATABASE_URI'] = config['DATABASE']['URI']
apiApp.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = True
apiApp.config['SQLALCHEMY_POOL_SIZE'] = 10
apiApp.config['SQLALCHEMY_MAX_OVERFLOW'] = 20
db = SQLAlchemy(apiApp)
compress = Compress()
compress.init_app(apiApp)
logging.basicConfig(level = logging.INFO)
from apiApp.interfaces import *
from apiApp.helper import jinjaHelper
#from apiApp.database import *
from apiApp.helper.utils import attach_monitor
attach_monitor(apiApp)
with apiApp.app_context():
db.create_all()

3
apiApp/config/__init__.py

@ -0,0 +1,3 @@
import configparser
config = configparser.ConfigParser()
config.read('config.ini')

61
apiApp/database/Call.py

@ -0,0 +1,61 @@
'''from apiApp import db
from sqlalchemy_serializer import SerializerMixin
import uuid
from apiApp import apiApp
class Call(db.Model, SerializerMixin):
__tablename__ = "call"
id = db.Column(db.Integer, primary_key=True)
filename = db.Column(db.String(100), unique=True)
appointment = db.Column(db.Boolean, default=False)
target = db.Column(db.Boolean, default=False)
operation = db.Column(db.Boolean, default=False)
call_text = db.Column(db.Text, unique=True)
ai_call_text = db.Column(db.Text)
ai_conclusion = db.Column(db.Text)
operator_id = db.Column(db.UUID, db.ForeignKey('operator.id'))
def add_call(filename, operator_id, call_text=None, ai_call_text=None,
ai_conclusion=None, appointment=False, target=False, operation=False):
try:
# Если контекст уже есть (Flask-запрос), работаем в нём
if apiApp and hasattr(apiApp, 'app_context'):
return Call._add_call_internal(
filename, operator_id, call_text, ai_call_text,
ai_conclusion, appointment, target, operation
)
# Если контекста нет (скрипт), создаём его
else:
from apiApp import create_app
app = create_app()
with app.app_context():
return Call._add_call_internal(
filename, operator_id, call_text, ai_call_text,
ai_conclusion, appointment, target, operation
)
except Exception as e:
return False, f"Ошибка: {str(e)}"
def _add_call_internal(filename, operator_id, call_text, ai_call_text,
ai_conclusion, appointment, target, operation):
existing_call = Call.query.filter_by(filename=filename, operator_id=operator_id).first()
if existing_call:
return False, "Такой звонок уже существует."
new_call = Call(
filename=filename,
operator_id=operator_id,
call_text=call_text,
ai_call_text=ai_call_text,
ai_conclusion=ai_conclusion,
appointment=appointment,
target=target,
operation=operation
)
db.session.add(new_call)
db.session.commit()
return True, new_call'''

14
apiApp/database/Operator.py

@ -0,0 +1,14 @@
from apiApp import db
from sqlalchemy_serializer import SerializerMixin
import uuid
class Operator(db.Model, SerializerMixin):
__tablename__ = "operator"
id = db.Column(db.UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
fio = db.Column(db.String(100))
num = db.Column(db.Integer)
calls = db.relationship("Call", backref="operator")

28
apiApp/database/User.py

@ -0,0 +1,28 @@
from apiApp import db
from sqlalchemy_serializer import SerializerMixin
import uuid
from werkzeug.security import check_password_hash
class User(db.Model):
__tablename__ = "user"
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(50))
full_name = db.Column(db.String(100))
email = db.Column(db.String(100))
password = db.Column(db.String(250)) # Увеличено до 250 символов
admin = db.Column(db.Boolean, default=False)
def verify_password(self, password):
if check_password_hash(self.password, password):
return True
else:
return False
def is_active(self):
return True
def is_authenticated(self):
return True
def get_id(self):
return self.id

3
apiApp/database/__init__.py

@ -0,0 +1,3 @@
from apiApp.database.Call import Call
from apiApp.database.Operator import Operator
from apiApp.database.User import User

11
apiApp/helper/jinjaHelper.py

@ -0,0 +1,11 @@
from apiApp import apiApp
import random, string
@apiApp.context_processor
def example():
def randomString(stringLength=10):
letters = string.ascii_lowercase
return ''.join(random.choice(letters) for i in range(stringLength))
return dict(randomString = randomString)

11
apiApp/helper/utils.py

@ -0,0 +1,11 @@
from healthcheck import HealthCheck, EnvironmentDump
import sys
def app_data():
return {
"platform": "Pirogov.AI",
"python_version": sys.version
}
def attach_monitor(app):
health = HealthCheck(app, '/status')

1
apiApp/interfaces/__init__.py

@ -0,0 +1 @@
from apiApp.interfaces.apiFunctions import *

36
apiApp/interfaces/apiFunctions.py

@ -0,0 +1,36 @@
from apiApp import apiApp
from flask import render_template, make_response, send_from_directory, request
import os
from apiApp.config import config
import json
import requests
from ai.recognition import run_recognition, stereo_or_mono
import time
@apiApp.route('/recognize', methods=['POST'])
def recognize_call():
r_files = request.files.getlist("file")
file = r_files[0]
file.save(os.path.join(config['DIST']['PATH'], file.filename))
call_text = run_recognition(os.path.join(config['DIST']['PATH'], file.filename))
time.sleep(10)
return {'call_text': call_text["data"]}
@apiApp.route('/recognize_bot', methods=['POST'])
def recognize_cal_bot():
r_files = request.files.getlist("file")
file = r_files[0]
file.save(os.path.join(config['DIST']['PATH'], file.filename))
call_text = stereo_or_mono(os.path.join(config['DIST']['PATH'], file.filename))
#time.sleep(1)
return {'call_text': call_text}

0
cleantemp.py

6
config.ini

@ -0,0 +1,6 @@
[WEB_APP]
PORT = 5001
[DIST]
PATH = /home/dev/GigaAM_API/temp
[DATABASE]
URI = postgresql://postgres:Gxq-8fN7A9@localhost:5432/call_analytics

17
keep_txts.py

@ -0,0 +1,17 @@
import datetime, time
from rec_all_for_day import today_audios
date = str(datetime.datetime.today()).split(' ')[0].split('-')
remote_path = f'/{date[0]}/{date[1]}/{date[2]}/'
def permanent_recognition():
curr_time = str(datetime.datetime.today()).split(' ')[1].split(':')[0:2]
curr_time = [int(i) for i in curr_time]
#if curr_time[0]<17 and curr_time[0]>7:
#time.sleep((17-curr_time[0])*3600+(17-curr_time[1])*60)
today_audios()
permanent_recognition()

11
makefilefornatalia.py

@ -0,0 +1,11 @@
import os
text=''
for file in os.listdir('/media/dev/TEXTFILES/2025/06/01/'):
with open(f'/media/dev/TEXTFILES/2025/06/01/{file}', 'r') as fil:
text += f"{file} (01.06.2025) - \n{fil.read()}\n\n"
for day in ['31','30', '29','28','27','26']:
for file in os.listdir('/media/dev/TEXTFILES/2025/05'+'/'+day):
with open(os.path.join('/media/dev/TEXTFILES/2025/05'+'/'+day+'/'+file),'r') as fil:
text+=f"{file} ({day}.05.2025) - \n{fil.read()}\n\n"
with open('calls_for_week_in_single_file.txt','w') as f:
f.write(text)

0
model_test.py

165
rec_all_for_day.py

@ -0,0 +1,165 @@
import os
import time
import shutil
#from apiApp import init_model
#from apiApp.ai import recognition
from ai import recognition
import paramiko
#from apiApp.database import Call
from ai.recognition import clean
import datetime
path = '/media/dev/TEXTFILES/audiofiles'
import gc
import torch
def flush():
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
def is_file_fully_loaded(file_path):
#try:
size = os.path.getsize(file_path)
#except FileNotFoundError:
# print('not found')
# return False
new_size = os.path.getsize(file_path)
if size != new_size: return False
else: return True
def connect_to_sftp(hostname, username, password, remote_path):
try:
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # Добавляет неизвестный хост в известные
ssh.connect(hostname, username=username, password=password)
sftp = ssh.open_sftp()
if len(remote_path)>3:
if remote_path[1]=='/': remote_path=remote_path[1:]
sftp.chdir(remote_path) # Переходим в указанную директорию. Обработка ошибок ниже.
return sftp
except paramiko.AuthenticationException:
print("Ошибка аутентификации. Проверьте имя пользователя и пароль.")
return None
except paramiko.SSHException as e:
print(f"Ошибка SSH: {e}")
return None
except FileNotFoundError:
print(f"Директория {remote_path} не найдена на сервере.")
return None
except IOError as e:
print(f"Ошибка ввода-вывода: {e}")
return None
from datetime import timedelta
def weekd(s):
return [-1,-2,-3,-4,-5,-6,-7]
"""output = []
if s == 3:
output=[0]
if s > 3:
for i in range(0,s-3+1):
output.append(-i)
if s < 3:
for i in range(0,s+5):
output.append(-i)
return sorted(output, reverse = False)"""
def today_audios():
while True:
try:
for i in weekd(datetime.datetime.today().weekday()):
date = str(datetime.datetime.today()).split(' ')[0].split('-')
hour = str(datetime.datetime.today()).split(' ')[1].split(':')[0]
if int(hour)<9:
yesterday = datetime.datetime.now() - timedelta(days=-i+1)
year = yesterday.strftime("%Y")
month = yesterday.strftime("%m")
day = yesterday.strftime("%d")
if int(day) >= 10: remote_path = f"/{year}/{month}/{int(day)}/"
else: remote_path = f"/{year}/{month}/0{int(day)}/"
else:
yesterday = datetime.datetime.now() - timedelta(days=-i)
year = yesterday.strftime("%Y")
month = yesterday.strftime("%m")
day = yesterday.strftime("%d")
if int(day) >= 10: remote_path = f"/{year}/{month}/{str(int(day))}/"
else: remote_path = f"/{year}/{month}/0{int(day)}/"
os.makedirs('/media/dev/TEXTFILES'+remote_path, exist_ok=True)
os.makedirs(f'/media/dev/TEXTFILES/error_audiofiles/OutOfMemoryError{remote_path}', exist_ok=True)
os.makedirs(f'/media/dev/TEXTFILES/error_audiofiles/IndexError{remote_path}', exist_ok=True)
os.makedirs('/media/dev/TEXTFILES/audiofiles' + remote_path, exist_ok=True)
sftp_client = connect_to_sftp("192.168.1.150", "monitor", "Audio4analy6!6", remote_path)
files = sftp_client.listdir()
txtfiles = [i[:-4] for i in os.listdir(f'/media/dev/TEXTFILES{remote_path}')] + os.listdir(
f'/media/dev/TEXTFILES/error_audiofiles/OutOfMemoryError{remote_path}') + os.listdir(
f'/media/dev/TEXTFILES/error_audiofiles/IndexError{remote_path}')
files = list(set(files)-set(txtfiles))
day_audios(remote_path)
time.sleep(900)
except AttributeError as e:
print('SSH error',e)
time.sleep(1800)
today_audios()
archive_path = '/media/dev/TEXTFILES/audiofiles'
def day_audios(remote_path):
os.makedirs('/media/dev/TEXTFILES' + remote_path, exist_ok=True)
os.makedirs(f'/media/dev/TEXTFILES/error_audiofiles/OutOfMemoryError{remote_path}', exist_ok=True)
os.makedirs(f'/media/dev/TEXTFILES/error_audiofiles/IndexError{remote_path}', exist_ok=True)
os.makedirs('/media/dev/TEXTFILES/audiofiles' + remote_path, exist_ok=True)
sftp_client = connect_to_sftp("192.168.1.150", "monitor", "Audio4analy6!6", remote_path)
filess = sftp_client.listdir()
files = []
for i in filess:
if 'in-' in i and not '_t' in i and not 'mp3' in i:
files.append(i)
#print(len(files))
txtfiles = [i[:-4] for i in os.listdir(f'/media/dev/TEXTFILES{remote_path}')] + os.listdir(
f'/media/dev/TEXTFILES/error_audiofiles/OutOfMemoryError{remote_path}') + os.listdir(
f'/media/dev/TEXTFILES/error_audiofiles/IndexError{remote_path}')
#print(set(files)-set(txtfiles))
files = list(set(files) - set(txtfiles))
if len(files)>0: print(files)
print(len(files))
#print(files)
flush()
if files:
for i in files:
if 'in-' in i and not '_t' in i and not 'mp3' in i:
sftp_client.get(remote_path + '/' + i, f"{archive_path}{remote_path}{i}")
print('1ok')
#print(f"{archive_path}{remote_path}{i}")
while not is_file_fully_loaded(os.path.join(f"{archive_path}{remote_path}{i}")):
time.sleep(1)
print('2ok')
try:
print('3ok')
call_text = recognition.to_txt_wdb(archive_path + remote_path + i)
print('4ok')
#Call.add_call(i,'-', call_text, '-', '-', '-', '-', '-')
except torch.OutOfMemoryError:
clean()
recognition.init_model()
try:
recognition.to_txt_wdb(archive_path + remote_path + i)
except torch.OutOfMemoryError:
os.makedirs(f'/media/dev/TEXTFILES/error_audiofiles/OutOfMemoryError{remote_path}{i}',
exist_ok=True)
shutil.copy(archive_path + remote_path + i,
f'/media/dev/TEXTFILES/error_audiofiles/OutOfMemoryError{remote_path}{i}')
except IndexError:
os.makedirs(f'/media/dev/TEXTFILES/error_audiofiles/IndexError{remote_path}{i}',
exist_ok=True)
shutil.copy((archive_path + remote_path + i),
f'/media/dev/TEXTFILES/error_audiofiles/IndexError{remote_path}{i}')
#day_audios('/2025/05/27/')

290
requirements.txt

@ -0,0 +1,290 @@
absl-py==2.1.0
aiofiles==24.1.0
aiogram==3.17.0
aiohappyeyeballs==2.4.4
aiohttp==3.11.10
aiosignal==1.3.1
alabaster==0.7.16
alembic==1.14.0
aniso8601==9.0.1
annotated-types==0.7.0
antlr4-python3-runtime==4.9.3
appdirs==1.4.4
asciitree==0.3.3
asteroid-filterbanks==0.4.0
asttokens==3.0.0
async-timeout==5.0.1
attrdict==2.0.1
attrs==24.2.0
audioread==3.0.1
babel==2.16.0
bcrypt==4.2.1
beautifulsoup4==4.12.3
black==19.10b0
blinker==1.9.0
boto3==1.35.76
botocore==1.35.76
braceexpand==0.1.7
Brotli==1.1.0
cdifflib==1.2.6
certifi==2024.8.30
cffi==1.17.1
charset-normalizer==3.4.0
click==8.0.2
colorama==0.4.6
colorlog==6.9.0
comm==0.2.2
contourpy==1.3.0
cryptography==44.0.1
cycler==0.12.1
Cython==3.0.11
datasets==3.1.0
DAWG-Python==0.7.2
decorator==5.1.1
dill==0.3.8
Distance==0.1.3
docker-pycreds==0.4.0
docopt==0.6.2
docutils==0.21.2
editdistance==0.8.1
einops==0.8.0
exceptiongroup==1.2.2
executing==2.1.0
faiss-cpu==1.8.0.post1
fasteners==0.19
fasttext==0.9.3
filelock==3.16.1
Flask==2.2.5
Flask-Compress==1.17
Flask-Migrate==4.0.7
Flask-RESTful==0.3.10
Flask-SQLAlchemy==3.1.1
fonttools==4.55.2
frozenlist==1.5.0
fsspec==2024.9.0
ftfy==6.3.1
g2p-en==2.1.0
gdown==5.2.0
gitdb==4.0.11
GitPython==3.1.43
greenlet==3.1.1
grpcio==1.68.1
h5py==3.12.1
huggingface-hub==0.23.2
hydra-core==1.3.2
HyperPyYAML==1.2.2
idna==3.10
ijson==3.3.0
imageio==2.37.0
imageio-ffmpeg==0.6.0
imagesize==1.4.1
importlib_metadata==8.5.0
importlib_resources==6.4.5
inflect==7.4.0
iniconfig==2.0.0
ipython==8.18.1
ipywidgets==8.1.5
isort==5.13.2
itsdangerous==2.2.0
jedi==0.19.2
jieba==0.42.1
Jinja2==3.1.4
jiwer==2.5.2
jmespath==1.0.1
joblib==1.4.2
julius==0.2.7
jupyterlab_widgets==3.0.13
kaldi-python-io==1.2.2
kaldiio==2.18.0
kiwisolver==1.4.7
kornia==0.7.4
kornia_rs==0.1.7
latexcodec==3.0.0
lazy_loader==0.4
Levenshtein==0.22.0
librosa==0.10.2.post1
lightning==2.4.0
lightning-utilities==0.11.9
llvmlite==0.43.0
loguru==0.7.3
lxml==5.3.0
magic-filter==1.0.12
Mako==1.3.7
Markdown==3.7
markdown-it-py==3.0.0
markdown2==2.5.1
MarkupSafe==3.0.2
marshmallow==3.23.1
matplotlib==3.9.3
matplotlib-inline==0.1.7
mdurl==0.1.2
megatron-core==0.3.0
more-itertools==10.5.0
morph==0.1.5
mpmath==1.3.0
msgpack==1.1.0
multidict==6.1.0
multiprocess==0.70.16
mutagen==1.47.0
nemo-text-processing==0.1.8rc0
nemo-toolkit==1.21.0
networkx==3.2.1
nltk==3.9.1
noisereduce==3.0.3
numba==0.60.0
numcodecs==0.12.1
nvidia-cublas-cu12==12.4.5.8
nvidia-cuda-cupti-cu12==12.4.127
nvidia-cuda-nvrtc-cu12==12.4.127
nvidia-cuda-runtime-cu12==12.4.127
nvidia-cudnn-cu12==9.1.0.70
nvidia-cufft-cu12==11.2.1.3
nvidia-curand-cu12==10.3.5.147
nvidia-cusolver-cu12==11.6.1.9
nvidia-cusparse-cu12==12.3.1.170
nvidia-nccl-cu12==2.21.5
nvidia-nvjitlink-cu12==12.4.127
nvidia-nvtx-cu12==12.4.127
omegaconf==2.3.0
onnx==1.17.0
OpenCC==1.1.6
optuna==4.1.0
packaging==24.2
pandas==2.2.3
pangu==4.0.6.1
parameterized==0.9.0
paramiko==3.5.1
parso==0.8.4
pathspec==0.12.1
pexpect==4.9.0
pillow==11.0.0
plac==1.4.3
platformdirs==4.3.6
pluggy==1.5.0
pooch==1.8.2
portalocker==3.0.0
primePy==1.3
proglog==0.1.10
progress==1.6
prompt_toolkit==3.0.48
propcache==0.2.1
protobuf==5.29.1
psutil==6.1.0
ptyprocess==0.7.0
pure_eval==0.2.3
pyannote.audio==3.2.0
pyannote.core==5.0.0
pyannote.database==5.1.0
pyannote.metrics==3.2.1
pyannote.pipeline==3.0.1
pyarrow==18.1.0
pybind11==2.13.6
pybtex==0.24.0
pybtex-docutils==1.0.3
pycparser==2.22
pydantic==2.10.5
pydantic_core==2.27.2
pydub==0.25.1
Pygments==2.18.0
pymorphy2==0.9.1
pymorphy2-dicts-ru==2.4.417127.4579844
PyNaCl==1.5.0
pynini==2.1.5
pyodbc==5.2.0
pyparsing==3.2.0
pypinyin==0.53.0
pypinyin-dict==0.8.0
pypugjs==5.11.0
PySocks==1.7.1
pytest==8.3.4
pytest-runner==6.0.1
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
pytorch-lightning==2.0.7
pytorch-metric-learning==2.7.0
pytz==2024.2
PyYAML==6.0.2
rapidfuzz==2.13.7
regex==2024.11.6
requests==2.32.3
rich==13.9.4
rouge-score==0.1.2
ruamel.yaml==0.18.6
ruamel.yaml.clib==0.2.12
s3transfer==0.10.4
sacrebleu==2.4.3
sacremoses==0.1.1
safetensors==0.4.5
scikit-learn==1.5.2
scipy==1.13.1
semver==3.0.2
sentence-transformers==2.6.1
sentencepiece==0.2.0
sentry-sdk==2.19.2
setproctitle==1.3.4
shellingham==1.5.4
six==1.17.0
smmap==5.0.1
snowballstemmer==2.2.0
sortedcontainers==2.4.0
soundfile==0.12.1
soupsieve==2.6
numpy==1.23.5
sox==1.5.0
soxr==0.5.0.post1
speechbrain==1.0.2
Sphinx==7.4.7
sphinxcontrib-applehelp==2.0.0
sphinxcontrib-bibtex==2.6.3
sphinxcontrib-devhelp==2.0.0
sphinxcontrib-htmlhelp==2.1.0
sphinxcontrib-jsmath==1.0.1
sphinxcontrib-qthelp==2.0.0
sphinxcontrib-serializinghtml==2.0.0
SQLAlchemy==2.0.36
stack-data==0.6.3
sympy==1.13.1
tabulate==0.9.0
tensorboard==2.18.0
tensorboard-data-server==0.7.2
tensorboardX==2.6.2.2
tensorstore==0.1.45
termcolor==2.5.0
text-unidecode==1.3
textdistance==4.6.3
texterrors==0.5.1
threadpoolctl==3.5.0
tokenizers==0.13.3
toml==0.10.2
tomli==2.2.1
torch==2.5.1
torch-audiomentations==0.11.1
torch_pitch_shift==1.2.5
torchaudio==2.5.1
torchmetrics==1.6.0
tqdm==4.67.1
traitlets==5.14.3
transformers==4.33.3
triton==3.1.0
typed-ast==1.5.5
typeguard==4.4.1
typer==0.15.1
typing_extensions==4.12.2
tzdata==2024.2
urllib3==1.26.20
uWSGI==2.0.28
waitress==3.0.2
wandb==0.18.7
wcwidth==0.2.13
webdataset==0.1.62
Werkzeug==3.1.3
wget==3.2
widgetsnbextension==4.0.13
wrapt==1.17.0
xxhash==3.5.0
yargy==0.16.0
yarl==1.18.3
youtokentome==1.0.6
zarr==2.18.2
zipp==3.21.0
zstandard==0.23.0

109
rnnt_model_config.yaml

@ -0,0 +1,109 @@
model_class: enc_dec_rnnt_bpe
sample_rate: 16000
log_prediction: true
model_defaults:
enc_hidden: 768
pred_hidden: 320
join_hidden: 320
preprocessor:
_target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
sample_rate: 16000
n_fft: 400
n_window_size: 400
window_size: null
n_window_stride: 160
window_stride: null
features: 64
dither: 0.0
preemph: null
log: true
log_zero_guard_type: clamp
normalize: null
pad_to: 0
mel_norm: null
window: hann
log_zero_guard_value: 1e-9
tokenizer:
dir: tokenizer_all_sets/
type: bpe
validation_ds:
shuffle: False
manifest_filepath: null
encoder:
_target_: nemo.collections.asr.modules.ConformerEncoder
feat_in: 64
feat_out: -1
n_layers: 16
d_model: 768
subsampling: striding
subsampling_factor: 4
subsampling_conv_channels: 768
ff_expansion_factor: 4
self_attention_model: rel_pos
pos_emb_max_len: 5000
n_heads: 16
xscaling: false
untie_biases: true
conv_kernel_size: 31
dropout: 0.1
dropout_emb: 0.1
dropout_att: 0.1
decoder:
_target_: nemo.collections.asr.modules.RNNTDecoder
normalization_mode: null
random_state_sampling: false
blank_as_pad: true
vocab_size: 512
prednet:
pred_hidden: 320
pred_rnn_layers: 1
t_max: null
dropout: 0.0
joint:
_target_: nemo.collections.asr.modules.RNNTJoint
log_softmax: null
fuse_loss_wer: false
fused_batch_size: 1
jointnet:
joint_hidden: 320
activation: relu
dropout: 0.0
encoder_hidden: 768
optim:
name: adamw
lr: 5.0e-05
betas:
- 0.9
- 0.98
weight_decay: 0.01
sched:
name: CosineAnnealing
warmup_steps: 10000
warmup_ratio: null
min_lr: 1.0e-07
nemo_version: 1.12.0
decoding:
strategy: greedy_batch
preserve_alignments: false
greedy:
max_symbols: 3
beam:
beam_size: 5
score_norm: true
loss:
loss_name: default
mwer: false
rnnt_reduction: mean_batch
wer_coef: false
subtract_mean: true
warprnnt_numba_kwargs:
fastemit_lambda: 0.0
clamp: -1.0
rnnt_weight: 0.1
unique_hyp: true

19
run.py

@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
from apiApp import apiApp
from apiApp.config import config
from waitress import serve
model = None
'''
from pathlib import Path
import os
BASE_DIR = Path(__file__).parent.absolute()
CONFIG_PATH = '/home/dev/GigaAM_API/config.ini'
'''
#config.read(CONFIG_PATH)
if __name__ == '__main__':
#apiApp.run(debug=True, port=config['WEB_APP']['PORT'], use_reloader=False)
serve(apiApp, host='0.0.0.0', port=config['WEB_APP']['PORT'])

BIN
temp/1 9223339795.wav

Binary file not shown.

BIN
temp/1 Антонина.wav

Binary file not shown.

BIN
temp/123.mp3

Binary file not shown.

BIN
temp/123.mp3.wav

Binary file not shown.

BIN
temp/123.ogg

Binary file not shown.

BIN
temp/123.ogg.wav

Binary file not shown.

BIN
temp/123.wav

Binary file not shown.

BIN
temp/20 9226488628.wav

Binary file not shown.

BIN
temp/3 Надежда.wav

Binary file not shown.

BIN
temp/30 9824972560.wav

Binary file not shown.

BIN
temp/4 3472000334.wav

Binary file not shown.

BIN
temp/50 9026373947.wav

Binary file not shown.

BIN
temp/53 9655511570.wav

Binary file not shown.

BIN
temp/9 9028055111.wav

Binary file not shown.

BIN
temp/9027955000 обычный звонок.mp3

Binary file not shown.

BIN
temp/9519212271 обычный звонок.mp3

Binary file not shown.

BIN
temp/in-2250662-2.wav

Binary file not shown.

BIN
temp/in-2250662.wav

Binary file not shown.

BIN
temp/in-3422000203-89028396207-20250202-173540-1738499740.227325.wav

Binary file not shown.

BIN
temp/in-3422000203-89124901528-20250126-123049-1737876649.196438.wav

Binary file not shown.

BIN
temp/in-3422000203-89127888996-20250202-104207-1738474927.225568.wav

Binary file not shown.

BIN
temp/in-3422000203-89523314992-20250315-180757-1742044077.36312.mp3.wav

Binary file not shown.

BIN
temp/in-3422000303-83422073789-20250126-141431-1737882871.196887.wav

Binary file not shown.

BIN
temp/in-73422070303-79026434469-20250419-092305-1745036585.6448.wav

Binary file not shown.

BIN
temp/in-73422070303-79028027705-20250409-110405-1744178645.172052.wav

Binary file not shown.

BIN
temp/in-73422070303-79028371463-20241005-093328-1728102808.39081 (1).wav

Binary file not shown.

BIN
temp/in-73422070303-79028371463-20241005-093328-1728102808.39081.wav

Binary file not shown.

BIN
temp/in-73422070303-79082533189-20250315-185043-1742046643.36394.mp3.wav

Binary file not shown.

BIN
temp/in-73422070303-79082551616-20250126-152907-1737887347.197152.wav

Binary file not shown.

BIN
temp/in-73422070303-79082595553-20250126-113805-1737873485.195689.wav

Binary file not shown.

BIN
temp/in-73422070303-79082604307-20250315-165118-1742039478.36145.mp3.wav

Binary file not shown.

BIN
temp/in-73422070303-79082729571-20250126-134444-1737881084.196766.wav

Binary file not shown.

BIN
temp/in-73422070303-79124892401-20250420-145918-1745143158.11268.wav

Binary file not shown.

BIN
temp/in-73422070303-79128891125-20250419-113055-1745044255.6885.wav

Binary file not shown.

BIN
temp/in-73422070303-79194723777-20250303-092105-1740975665.6553 (2).wav

Binary file not shown.

BIN
temp/in-73422070303-79194726227-20250126-101026-1737868226.195319.wav

Binary file not shown.

BIN
temp/in-73422070303-79223332262-20250126-132543-1737879943.196684.wav

Binary file not shown.

BIN
temp/in-73422070303-79223520460-20250315-123211-1742023931.35278.mp3.wav

Binary file not shown.

BIN
temp/in-73422070303-79226437328-20250126-162614-1737890774.197308.wav

Binary file not shown.

BIN
temp/in-73422070303-79504502044-20250126-175418-1737896058.197497.wav

Binary file not shown.

BIN
temp/in-73422070303-79504612761-20250126-102943-1737869383.195411.wav

Binary file not shown.

BIN
temp/in-73422070303-79504695302-20250126-112626-1737872786.195645.wav

Binary file not shown.

BIN
temp/in-73422070303-79615727549-20241005-093140-1728102700.39075.wav

Binary file not shown.

BIN
temp/in-73422070303-79638592760-20250126-103119-1737869479.195418.wav

Binary file not shown.

BIN
temp/in-73422070303-79824744004-20250315-092838-1742012918.32904.mp3.wav

Binary file not shown.

BIN
temp/in-73422070303-79824814454-20250131-124732-1738309652.220219.wav

Binary file not shown.

BIN
temp/in-73422070303-79920919113-20250419-173524-1745066124.9703.wav

Binary file not shown.

BIN
temp/in-73422250662-79223066055-20250315-095240-1742014360.33018.mp3.wav

Binary file not shown.

BIN
temp/in-73422250662-79223604110-20250315-120547-1742022347.35163.mp3.wav

Binary file not shown.

BIN
temp/in-73422250662-79638829677-20250126-174834-1737895714.197474.wav

Binary file not shown.

BIN
temp/in-s-73422298463-20250124-141929-1737710369.188991.wav

Binary file not shown.

BIN
temp/in-s-79526578558-20250417-132418-1744878258.215700.wav

Binary file not shown.

BIN
temp/in-р-79918084911-20241005-090209-1728100929.38889-2.wav

Binary file not shown.

BIN
temp/out-+79082545599-703-20241102-113930-1730529570.191761.wav

Binary file not shown.

BIN
temp/out-+79091124555-704-20250126-120326-1737875006.196314.wav

Binary file not shown.

BIN
temp/out-+79125928169-709-20241102-170329-1730549009.192967.wav

Binary file not shown.

BIN
temp/out-+79127829273-709-20241102-181029-1730553029.193102.wav

Binary file not shown.

BIN
temp/out-+79526571375-709-20241102-181420-1730553260.193181.wav

Binary file not shown.

BIN
temp/out-+79630209528-703-20241102-115506-1730530506.191829.wav

Binary file not shown.

BIN
temp/out-0189526500261-801-20241102-133847-1730536727.192309.wav

Binary file not shown.

BIN
temp/out-79082532655-705-20241102-090039-1730520038.191025.wav

Binary file not shown.

BIN
temp/out-79226416611-705-20241102-101859-1730524739.191420.wav

Binary file not shown.

BIN
temp/out-89048438339-703-20241102-090017-1730520017.191023.wav

Binary file not shown.

BIN
temp/out-89082658735-708-20250126-115254-1737874374.195713.wav

Binary file not shown.

BIN
temp/out-89125860472-708-20250126-104916-1737870556.195464.wav

Binary file not shown.

BIN
temp/out-89519373293-362-20250126-110647-1737871607.195559.wav

Binary file not shown.

BIN
temp/out-89523278927-504-20241102-132525-1730535925.192215.wav

Binary file not shown.

BIN
test.wav

Binary file not shown.

0
tokenizer_all_sets/__init__.py

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save