commit
					f94df248d0
				
				 7 changed files with 2729 additions and 0 deletions
			
			
		@ -0,0 +1,221 @@
					 | 
				
			||||
# Byte-compiled / optimized / DLL files | 
				
			||||
__pycache__/ | 
				
			||||
*.py[cod] | 
				
			||||
*$py.class | 
				
			||||
 | 
				
			||||
# C extensions | 
				
			||||
*.so | 
				
			||||
 | 
				
			||||
# Distribution / packaging | 
				
			||||
.Python | 
				
			||||
build/ | 
				
			||||
develop-eggs/ | 
				
			||||
dist/ | 
				
			||||
downloads/ | 
				
			||||
eggs/ | 
				
			||||
.eggs/ | 
				
			||||
lib/ | 
				
			||||
lib64/ | 
				
			||||
parts/ | 
				
			||||
sdist/ | 
				
			||||
var/ | 
				
			||||
wheels/ | 
				
			||||
share/python-wheels/ | 
				
			||||
*.egg-info/ | 
				
			||||
.installed.cfg | 
				
			||||
*.egg | 
				
			||||
MANIFEST | 
				
			||||
 | 
				
			||||
# PyInstaller | 
				
			||||
#  Usually these files are written by a python script from a template | 
				
			||||
#  before PyInstaller builds the exe, so as to inject date/other infos into it. | 
				
			||||
*.manifest | 
				
			||||
*.spec | 
				
			||||
 | 
				
			||||
# Installer logs | 
				
			||||
pip-log.txt | 
				
			||||
pip-delete-this-directory.txt | 
				
			||||
 | 
				
			||||
# Unit test / coverage reports | 
				
			||||
htmlcov/ | 
				
			||||
.tox/ | 
				
			||||
.nox/ | 
				
			||||
.coverage | 
				
			||||
.coverage.* | 
				
			||||
.cache | 
				
			||||
nosetests.xml | 
				
			||||
coverage.xml | 
				
			||||
*.cover | 
				
			||||
*.py,cover | 
				
			||||
.hypothesis/ | 
				
			||||
.pytest_cache/ | 
				
			||||
cover/ | 
				
			||||
 | 
				
			||||
# Translations | 
				
			||||
*.mo | 
				
			||||
*.pot | 
				
			||||
 | 
				
			||||
# Django stuff: | 
				
			||||
*.log | 
				
			||||
local_settings.py | 
				
			||||
db.sqlite3 | 
				
			||||
db.sqlite3-journal | 
				
			||||
 | 
				
			||||
# Flask stuff: | 
				
			||||
instance/ | 
				
			||||
.webassets-cache | 
				
			||||
 | 
				
			||||
# Scrapy stuff: | 
				
			||||
.scrapy | 
				
			||||
 | 
				
			||||
# Sphinx documentation | 
				
			||||
docs/_build/ | 
				
			||||
 | 
				
			||||
# PyBuilder | 
				
			||||
.pybuilder/ | 
				
			||||
target/ | 
				
			||||
 | 
				
			||||
# Jupyter Notebook | 
				
			||||
.ipynb_checkpoints | 
				
			||||
 | 
				
			||||
# IPython | 
				
			||||
profile_default/ | 
				
			||||
ipython_config.py | 
				
			||||
 | 
				
			||||
# pyenv | 
				
			||||
#   For a library or package, you might want to ignore these files since the code is | 
				
			||||
#   intended to run in multiple environments; otherwise, check them in: | 
				
			||||
# .python-version | 
				
			||||
 | 
				
			||||
# pipenv | 
				
			||||
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | 
				
			||||
#   However, in case of collaboration, if having platform-specific dependencies or dependencies | 
				
			||||
#   having no cross-platform support, pipenv may install dependencies that don't work, or not | 
				
			||||
#   install all needed dependencies. | 
				
			||||
#Pipfile.lock | 
				
			||||
 | 
				
			||||
# poetry | 
				
			||||
#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. | 
				
			||||
#   This is especially recommended for binary packages to ensure reproducibility, and is more | 
				
			||||
#   commonly ignored for libraries. | 
				
			||||
#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control | 
				
			||||
#poetry.lock | 
				
			||||
 | 
				
			||||
# pdm | 
				
			||||
#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. | 
				
			||||
#pdm.lock | 
				
			||||
#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it | 
				
			||||
#   in version control. | 
				
			||||
#   https://pdm.fming.dev/#use-with-ide | 
				
			||||
.pdm.toml | 
				
			||||
 | 
				
			||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm | 
				
			||||
__pypackages__/ | 
				
			||||
 | 
				
			||||
# Celery stuff | 
				
			||||
celerybeat-schedule | 
				
			||||
celerybeat.pid | 
				
			||||
 | 
				
			||||
# SageMath parsed files | 
				
			||||
*.sage.py | 
				
			||||
 | 
				
			||||
# Environments | 
				
			||||
.env | 
				
			||||
.venv | 
				
			||||
env/ | 
				
			||||
venv/ | 
				
			||||
ENV/ | 
				
			||||
env.bak/ | 
				
			||||
venv.bak/ | 
				
			||||
 | 
				
			||||
# Spyder project settings | 
				
			||||
.spyderproject | 
				
			||||
.spyproject | 
				
			||||
 | 
				
			||||
# Rope project settings | 
				
			||||
.ropeproject | 
				
			||||
 | 
				
			||||
# mkdocs documentation | 
				
			||||
/site | 
				
			||||
 | 
				
			||||
# mypy | 
				
			||||
.mypy_cache/ | 
				
			||||
.dmypy.json | 
				
			||||
dmypy.json | 
				
			||||
 | 
				
			||||
# Pyre type checker | 
				
			||||
.pyre/ | 
				
			||||
 | 
				
			||||
# pytype static type analyzer | 
				
			||||
.pytype/ | 
				
			||||
 | 
				
			||||
# Cython debug symbols | 
				
			||||
cython_debug/ | 
				
			||||
 | 
				
			||||
# PyCharm | 
				
			||||
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can | 
				
			||||
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore | 
				
			||||
#  and can be added to the global gitignore or merged into this file.  For a more nuclear | 
				
			||||
#  option (not recommended) you can ignore the whole idea folder. | 
				
			||||
.idea/ | 
				
			||||
 | 
				
			||||
# VS Code | 
				
			||||
.vscode/ | 
				
			||||
*.code-workspace | 
				
			||||
 | 
				
			||||
# Data files | 
				
			||||
*.db | 
				
			||||
*.sqlite | 
				
			||||
*.sqlite3 | 
				
			||||
 | 
				
			||||
# ChromaDB database | 
				
			||||
chroma_db/ | 
				
			||||
 | 
				
			||||
# AI Models | 
				
			||||
models/ | 
				
			||||
!models/.gitkeep | 
				
			||||
 | 
				
			||||
# Downloaded data | 
				
			||||
data/ | 
				
			||||
!data/.gitkeep | 
				
			||||
 | 
				
			||||
# Temporary files | 
				
			||||
*.tmp | 
				
			||||
*.temp | 
				
			||||
*.log | 
				
			||||
 | 
				
			||||
# OS generated files | 
				
			||||
.DS_Store | 
				
			||||
.DS_Store? | 
				
			||||
._* | 
				
			||||
.Spotlight-V100 | 
				
			||||
.Trashes | 
				
			||||
ehthumbs.db | 
				
			||||
Thumbs.db | 
				
			||||
 | 
				
			||||
# Windows | 
				
			||||
*.lnk | 
				
			||||
 | 
				
			||||
# Application specific | 
				
			||||
# rag_corpus.json | 
				
			||||
config.json | 
				
			||||
 | 
				
			||||
# LLM cache and temporary files | 
				
			||||
.cache/ | 
				
			||||
*.gguf.tmp | 
				
			||||
 | 
				
			||||
# Logs | 
				
			||||
logs/ | 
				
			||||
*.log | 
				
			||||
 | 
				
			||||
# Backup files | 
				
			||||
*.bak | 
				
			||||
*.backup | 
				
			||||
 | 
				
			||||
# Local configuration | 
				
			||||
local_config.py | 
				
			||||
settings_local.py | 
				
			||||
 | 
				
			||||
# Large files (добавьте сюда файлы больше 100MB если они есть) | 
				
			||||
# *.largefile | 
				
			||||
/scripts/ | 
				
			||||
@ -0,0 +1,26 @@
					 | 
				
			||||
# Medical RAG System with YandexGPT | 
				
			||||
 | 
				
			||||
Система для автоматического преобразования кратких медицинских записей в развернутые формулировки жалоб пациентов с использованием RAG (Retrieval-Augmented Generation) и модели YandexGPT. | 
				
			||||
 | 
				
			||||
## 🚀 Особенности | 
				
			||||
 | 
				
			||||
- **RAG-архитектура**: Поиск релевантных медицинских примеров из базы знаний | 
				
			||||
- **Гибкое использование GPU**: Автоматическое определение и использование GPU для ускорения работы | 
				
			||||
- **Управление токенами**: Интеллектуальное ограничение длины контекста | 
				
			||||
- **Русскоязычная оптимизация**: Специально настроена для работы с медицинскими текстами на русском языке | 
				
			||||
- **Хранение состояния**: Сохранение векторной базы данных между сессиями | 
				
			||||
 | 
				
			||||
## 📋 Требования | 
				
			||||
 | 
				
			||||
### Аппаратные требования | 
				
			||||
- **Минимально**: CPU с 8+ GB RAM | 
				
			||||
- **Рекомендуется**: GPU с 8+ GB VRAM (NVIDIA) | 
				
			||||
- **Память**: 10+ GB свободного места | 
				
			||||
 | 
				
			||||
### Программные требования | 
				
			||||
- Python 3.8+ | 
				
			||||
- PyTorch (с поддержкой CUDA при наличии GPU) | 
				
			||||
- Библиотеки: `chromadb`, `llama-cpp-python`, `sentence-transformers`, `tiktoken` | 
				
			||||
 | 
				
			||||
### Модель  | 
				
			||||
[YandexGPT-5-Lite-8B-instruct-Q4_K_M](https://huggingface.co/yandex/YandexGPT-5-Lite-8B-instruct-GGUF/resolve/main/YandexGPT-5-Lite-8B-instruct-Q4_K_M.gguf?download=true) | 
				
			||||
@ -0,0 +1,6 @@
					 | 
				
			||||
torch>=2.0.0 | 
				
			||||
chromadb>=0.4.0 | 
				
			||||
llama-cpp-python>=0.2.0 | 
				
			||||
sentence-transformers>=2.2.0 | 
				
			||||
tiktoken>=0.4.0 | 
				
			||||
numpy>=1.21.0 | 
				
			||||
					Loading…
					
					
				
		Reference in new issue