diff --git a/.dockerignore b/.dockerignore index db12e9b..aa0756b 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,15 +1,15 @@ -.dockerdata -docker-compose.ollama.yaml -Dockerfile -Dockerfile-cuda128 -install_audiowaveform.sh -samples -scriberr_files -scriberr -install_aw.sh -.prettier* -venv -node_modules -*.env -.venv +.dockerdata +docker-compose.ollama.yaml +Dockerfile +Dockerfile-cuda128 +install_audiowaveform.sh +samples +scriberr_files +scriberr +install_aw.sh +.prettier* +venv +node_modules +*.env +.venv .aider.* \ No newline at end of file diff --git a/.env b/.env deleted file mode 100644 index b164c66..0000000 --- a/.env +++ /dev/null @@ -1,51 +0,0 @@ -# .env file -# Docker image configuration -IMAGE_TAG=main # Docker image tag to use for building the Docker image -PORT=3000 # Port to use for running the web interface - -# Database configuration -POSTGRES_PORT=5432 # Port to use for PostgreSQL database -POSTGRES_USER=root # Username for PostgreSQL database -POSTGRES_PASSWORD=mysecretpassword # Password for PostgreSQL database -POSTGRES_DB=local # Database name -DATABASE_URL=postgres://root:mysecretpassword@db:5432/local # Database URL for connection to PostgreSQL database with credentials from above - -# Application configuration -ADMIN_USERNAME=admin # Username for admin user in web interface -ADMIN_PASSWORD=password # Password for admin user in web interface - -# AI configuration -# Default Model to use for transcription, can be set to any OpenAI model or Ollama model -# For ollama connections, enter the model name and version number. EG: llama3.2:latest -AI_MODEL="gpt-3.5-turbo" - -# Leave blank to use default (OpenAI API), otherwise set to the base URL of your OpenAI API compatible server -# For ollama connections, enter the IP of the Ollama server, and then the port it is running on. -# Include the /v1/ or /api/v1/ path if needed (OpenWeb UI uses /api/ and ollama uses /v1/ -# Example: http://192.168.1.5:11434 or http://host.docker.internal:11434 -# NOTE: host.docker.internal is only available on Windows and MacOS, use the IP address of the host machine on Linux -# NOTE: localhost and 127.0.0.1 will not work, as they refer to the container itself, not the host machine -OLLAMA_BASE_URL="" - -# API Keys -# NOTE: -# If using Ollama, you can leave these blank or set to a dummy value -# If using OpenAI, you must set these to your API keys -# If using a custom API compatible server, you must set these to your API keys -OPENAI_API_KEY="" # Needed for retrieving models from OpenAI, for Ollama connections, this can be left blank or set to a dummy value -HF_API_KEY="" # Needed for retrieving models from HuggingFace for Diarization - -# Diarization configuration -# Default Model to use for Diarization, can be set to any HuggingFace model that supports diarization -# NOTE: This model will be downloaded automatically if it is not already present in the models directory -# NOTE: You can use any model that supports diarization, but the default model is pyannote/speaker-diarization -# NOTE: You can find a list of models that support diarization here: https://huggingface.co/models?other=speaker-diarization -DIARIZATION_MODEL=pyannote/speaker-diarization - -MODELS_DIR=/scriberr/models -WORK_DIR=/scriberr/temp -AUDIO_DIR=/scriberr/uploads - -# Server configuration -BODY_SIZE_LIMIT=1G -HARDWARE_ACCEL=cpu # Set to 'gpu' if you have a Nvidia GPU diff --git a/.github/workflows/version-and-release.yml b/.github/workflows/version-and-release.yml index 416a313..d87318d 100644 --- a/.github/workflows/version-and-release.yml +++ b/.github/workflows/version-and-release.yml @@ -39,7 +39,7 @@ jobs: if [[ "$commit" == *"BREAKING CHANGE"* || "$commit" == *"!:"* ]]; then MAJOR_CHANGE=true break - elif [[ "$commit" =~ ^feat(\([^)]+\))?:.* ]]; then + elif echo "$commit" | grep -Eq "^feat(\([^)]+\))?:.*"; then MINOR_CHANGE=true fi done @@ -76,4 +76,4 @@ jobs: Release ${{ steps.version.outputs.release_tag }} Changes in this release: - ${{ github.event.pull_request.title }} (#${{ github.event.pull_request.number }}) \ No newline at end of file + ${{ github.event.pull_request.title }} (#${{ github.event.pull_request.number }}) diff --git a/.gitignore b/.gitignore index af0b036..280adde 100644 --- a/.gitignore +++ b/.gitignore @@ -1,26 +1,26 @@ -node_modules - -# Output -.output -.vercel -/.svelte-kit -/build - -# OS -.DS_Store -Thumbs.db - -# Env -.env -.env.* -!.env.example -!.env.test - -# Vite -vite.config.js.timestamp-* -vite.config.ts.timestamp-* - -.dockerdata -.aider* -.env -.env +node_modules + +# Output +.output +.vercel +/.svelte-kit +/build + +# OS +.DS_Store +Thumbs.db + +# Env +.env +.env.* +!.env.example +!.env.test + +# Vite +vite.config.js.timestamp-* +vite.config.ts.timestamp-* + +.dockerdata +.aider* +.env +.env diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml new file mode 100644 index 0000000..a55e7a1 --- /dev/null +++ b/.idea/codeStyles/codeStyleConfig.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/README.md b/README.md index 34a115c..48ff4da 100644 --- a/README.md +++ b/README.md @@ -5,16 +5,14 @@ Scriberr is a self-hostable AI audio transcription app. It leverages the open-so **Note**: This app is under active development, and this release includes **breaking changes**. You will lose your old data. Please read the installation instructions carefully. -** DIARIZATION UPDATE **: Diarization is under heavy development and will be disabled until an appropriate implementation is able to be handled. Currently it does not perform to expectation and is being disabled. Hoping to have this released as a full feature in 0.5.0. - ### Build Status **Main Branch:** -[![Main Docker](https://github.com/rishikanthc/Scriberr/actions/workflows/main-docker.yml/badge.svg)](https://github.com/rishikanthc/Scriberr/actions/workflows/main-docker.yml) -[![Main CUDA Docker](https://github.com/rishikanthc/Scriberr/actions/workflows/main-cuda-docker.yml/badge.svg)](https://github.com/rishikanthc/Scriberr/actions/workflows/main-cuda-docker.yml) +[![Main Docker](https://github.com/rishikanthc/Scriberr/actions/workflows/Main%20Docker%20Build.yml/badge.svg)](https://github.com/rishikanthc/Scriberr/actions/workflows/Main%20Docker%20Build.yml) +[![Main CUDA Docker](https://github.com/rishikanthc/Scriberr/actions/workflows/Main%20Cuda%20Docker%20Build.yml/badge.svg)](https://github.com/rishikanthc/Scriberr/actions/workflows/Main%20Cuda%20Docker%20Build.yml) **Nightly Branch:** -[![Nightly Docker](https://github.com/rishikanthc/Scriberr/actions/workflows/nightly-docker.yml/badge.svg)](https://github.com/rishikanthc/Scriberr/actions/workflows/nightly-docker.yml) -[![Nightly CUDA Docker](https://github.com/rishikanthc/Scriberr/actions/workflows/nightly-cuda-docker.yml/badge.svg)](https://github.com/rishikanthc/Scriberr/actions/workflows/nightly-cuda-docker.yml) +[![Nightly Docker](https://github.com/rishikanthc/Scriberr/actions/workflows/Nightly%20Docker%20Build.yml/badge.svg)](https://github.com/rishikanthc/Scriberr/actions/workflows/Nightly%20Docker%20Build.yml) +[![Nightly CUDA Docker](https://github.com/rishikanthc/Scriberr/actions/workflows/Nightly%20Cuda%20Docker%20Build.yml/badge.svg)](https://github.com/rishikanthc/Scriberr/actions/workflows/Nightly%20Cuda%20Docker%20Build.yml) ## Table of Contents @@ -158,14 +156,23 @@ The application can be customized using the following environment variables in y #### Speaker Diarization Setup -Scriberr uses the PyAnnote speaker diarization model from HuggingFace, which requires an API key for download. During the initial setup process: +##### Required Models +The application requires access to the following Hugging Face models: + +* pyannote/speaker-diarization-3.1 +* pyannote/segmentation-3.0 +###### Setup Steps +1. Create a free account at HuggingFace if you don’t already have one. +2. Generate an API token at HuggingFace Tokens. +3. Accept user conditions for the required models on Hugging Face: + - Visit pyannote/speaker-diarization-3.1 and accept the conditions. + - Visit pyannote/segmentation-3.0 and accept the conditions. +4. Enter the API token in the setup wizard when prompted. The token is only used during initial setup and is not stored permanently. +Storage and Usage + -1. Create a free account at [HuggingFace](https://huggingface.co/) -2. Generate an API token at https://huggingface.co/settings/tokens -3. Enter this token in the setup wizard when prompted -4. The token is only used during initial setup and is not stored permanently +The diarization models are downloaded once and stored locally, so you won’t need to provide the API key again after the initial setup. -The diarization model is downloaded once and stored locally, so you won't need to provide the API key again after setup. ### Updating from Previous Versions @@ -226,4 +233,4 @@ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file --- -*Thank you for your patience, support, and interest in the project. Looking forward to any and all feedback.* \ No newline at end of file +*Thank you for your patience, support, and interest in the project. Looking forward to any and all feedback.* diff --git a/docker-compose.gpu.yml b/docker-compose.gpu.yml index d70e130..04be4d9 100644 --- a/docker-compose.gpu.yml +++ b/docker-compose.gpu.yml @@ -1,19 +1,19 @@ -# This can be added when running the main docker-compose.yml file to add gpu support -# add this in your command line: docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up -services: - app: - build: - context: . - dockerfile: Dockerfile-cuda128 - # You can find your architecture by running: nvidia-smi if on linux - # You can find your architecture by running: system_profiler SPDisplaysDataType if on mac - # You can find your architecture by running: wmic path win32_videocontroller get name if on windows - # You will need to change the image to match your architecture, E.G. "main-cuda-11" - image: ghcr.io/rishikanthc/scriberr:${IMAGE_TAG:-main-gpu} - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: all +# This can be added when running the main docker-compose.yml file to add gpu support +# add this in your command line: docker-compose -f docker-compose.yml -f docker-compose.gpu.yml up +services: + app: + build: + context: . + dockerfile: Dockerfile-gpu + # You can find your architecture by running: nvidia-smi if on linux + # You can find your architecture by running: system_profiler SPDisplaysDataType if on mac + # You can find your architecture by running: wmic path win32_videocontroller get name if on windows + # You will need to change the image to match your architecture, E.G. "main-cuda-11" + image: ghcr.io/rishikanthc/scriberr:main-cuda-11 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all capabilities: [gpu] \ No newline at end of file diff --git a/env.example b/env.example index 782cf4f..8eeaae7 100644 --- a/env.example +++ b/env.example @@ -1,54 +1,52 @@ -# .env file -# Docker image configuration -IMAGE_TAG=main # Docker image tag to use for building the Docker image -PORT=3000 # Port to use for running the web interface - -# Database configuration -POSTGRES_PORT=5432 # Port to use for PostgreSQL database -POSTGRES_USER=root # Username for PostgreSQL database -POSTGRES_PASSWORD=mysecretpassword # Password for PostgreSQL database -POSTGRES_DB=local # Database name -DATABASE_URL=postgres://root:mysecretpassword@db:5432/local # Database URL for connection to PostgreSQL database with credentials from above - -# Application configuration -ADMIN_USERNAME=admin # Username for admin user in web interface -ADMIN_PASSWORD=password # Password for admin user in web interface - -# AI configuration -# Default Model to use for transcription, can be set to any OpenAI model or Ollama model -# For ollama connections, enter the model name and version number. EG: llama3.2:latest -AI_MODEL="gpt-3.5-turbo" - -# Leave blank to use default (OpenAI API), otherwise set to the base URL of your OpenAI API compatible server -# For ollama connections, enter the IP of the Ollama server, and then the port it is running on. -# Include the /v1/ or /api/v1/ path if needed (OpenWeb UI uses /api/ and ollama uses /v1/ -# Example: http://192.168.1.5:11434 or http://host.docker.internal:11434 -# NOTE: host.docker.internal is only available on Windows and MacOS, use the IP address of the host machine on Linux -# NOTE: localhost and 127.0.0.1 will not work, as they refer to the container itself, not the host machine -OLLAMA_BASE_URL="" - -# API Keys -# NOTE: -# If using Ollama, you can leave these blank or set to a dummy value -# If using OpenAI, you must set these to your API keys -# If using a custom API compatible server, you must set these to your API keys -OPENAI_API_KEY="" # Needed for retrieving models from OpenAI, for Ollama connections, this can be left blank or set to a dummy value - -# Diarization configuration -# Default Model to use for Diarization, can be set to any compatible model that supports diarization -# NOTE: This model will be downloaded automatically if it is not already present in the models directory -# NOTE: You MUST provide a valid HuggingFace API token with access to pyannote/speaker-diarization models - -DIARIZATION_MODEL=pyannote/speaker-diarization@3.0 -HUGGINGFACE_TOKEN="" # Required for accessing speaker diarization models from HuggingFace - -# Paths -# These almost never need to be changed. They are the paths to the directories where the models and audio files are stored -MODELS_DIR=/scriberr/models -WORK_DIR=/scriberr/temp -AUDIO_DIR=/scriberr/uploads - -# Server configuration -BODY_SIZE_LIMIT=1G -HARDWARE_ACCEL=cpu # Set to 'gpu' if you have a Nvidia GPU +# .env file +# Docker image configuration +IMAGE_TAG=main # Docker image tag to use for building the Docker image +PORT=3000 # Port to use for running the web interface + +# Database configuration +POSTGRES_PORT=5432 # Port to use for PostgreSQL database +POSTGRES_USER=root # Username for PostgreSQL database +POSTGRES_PASSWORD=mysecretpassword # Password for PostgreSQL database +POSTGRES_DB=local # Database name +DATABASE_URL=postgres://root:mysecretpassword@db:5432/local # Database URL for connection to PostgreSQL database with credentials from above + +# Application configuration +ADMIN_USERNAME=admin # Username for admin user in web interface +ADMIN_PASSWORD=password # Password for admin user in web interface + +# AI configuration +# Default Model to use for transcription, can be set to any OpenAI model or Ollama model +# For ollama connections, enter the model name and version number. EG: llama3.2:latest +AI_MODEL="gpt-3.5-turbo" +# Leave blank to use default (OpenAI API), otherwise set to the base URL of your OpenAI API compatible server +# For ollama connections, enter the IP of the Ollama server, and then the port it is running on. +# Include the /v1/ or /api/v1/ path if needed (OpenWeb UI uses /api/ and ollama uses /v1/ +# Example: http://192.168.1.5:11434 or http://host.docker.internal:11434 +# NOTE: host.docker.internal is only available on Windows and MacOS, use the IP address of the host machine on Linux +# NOTE: localhost and 127.0.0.1 will not work, as they refer to the container itself, not the host machine +OLLAMA_BASE_URL="" + +# API Keys +# NOTE: +# If using Ollama, you can leave these blank or set to a dummy value +# If using OpenAI, you must set these to your API keys +# If using a custom API compatible server, you must set these to your API keys +OPENAI_API_KEY="" # Needed for retrieving models from OpenAI, for Ollama connections, this can be left blank or set to a dummy value + +# Diarization configuration +# Default Model to use for Diarization, can be set to any compatible model that supports diarization +# NOTE: This model will be downloaded automatically if it is not already present in the models directory +# NOTE: You MUST provide a valid HuggingFace API token with access to pyannote/speaker-diarization models +DIARIZATION_MODEL=pyannote/speaker-diarization@3.0 +HUGGINGFACE_TOKEN="" # Required for accessing speaker diarization models from HuggingFace + +# Paths +# These almost never need to be changed. They are the paths to the directories where the models and audio files are stored +MODELS_DIR=/scriberr/models +WORK_DIR=/scriberr/temp +AUDIO_DIR=/scriberr/uploads + +# Server configuration +BODY_SIZE_LIMIT=1G +HARDWARE_ACCEL=cpu # Set to 'gpu' if you have a Nvidia GPU USE_WORKER=true # Enable background processing of transcription jobs \ No newline at end of file