Spaces:
Sleeping
Sleeping
Commit
·
23beeea
1
Parent(s):
6f1d384
UNDO the update for the path to the that was casing permission issues in entire app
Browse files- Dockerfile +20 -20
- app/api/routes.py +3 -8
- app/main.py +6 -7
- app/utils/voice_manager.py +5 -10
- app/voice_cloning.py +1 -2
- app/voice_enhancement.py +5 -5
- app/voice_memory.py +2 -3
Dockerfile
CHANGED
|
@@ -38,32 +38,31 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
| 38 |
ffmpeg \
|
| 39 |
git \
|
| 40 |
build-essential \
|
|
|
|
| 41 |
&& apt-get clean \
|
| 42 |
&& rm -rf /var/lib/apt/lists/*
|
| 43 |
|
| 44 |
-
# Create user and
|
| 45 |
-
RUN useradd -m -
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
ENV HOME=/home/user \
|
| 50 |
-
PATH=/home/user/.local/bin:$PATH
|
| 51 |
|
| 52 |
# Set working directory
|
| 53 |
-
WORKDIR
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
# Copy requirements first for better caching
|
| 56 |
COPY --chown=user:user requirements.txt .
|
| 57 |
|
| 58 |
-
#
|
| 59 |
-
|
| 60 |
-
$HOME/app/voice_profiles $HOME/app/cloned_voices $HOME/app/audio_cache $HOME/app/tokenizers $HOME/app/logs && \
|
| 61 |
-
chmod -R 755 $HOME/app && \
|
| 62 |
-
chmod -R 777 $HOME/app/voice_references $HOME/app/voice_profiles $HOME/app/voice_memories \
|
| 63 |
-
$HOME/app/cloned_voices $HOME/app/audio_cache $HOME/app/static $HOME/app/logs $HOME/app/tokenizers $HOME/app/models
|
| 64 |
-
|
| 65 |
-
# Copy static files
|
| 66 |
-
COPY --chown=user:user ./static $HOME/app/static
|
| 67 |
|
| 68 |
# Install Python dependencies
|
| 69 |
RUN pip3 install --no-cache-dir --upgrade pip && \
|
|
@@ -85,11 +84,12 @@ RUN pip3 install -r requirements.txt
|
|
| 85 |
# Install additional dependencies for streaming and voice cloning
|
| 86 |
RUN pip3 install yt-dlp openai-whisper
|
| 87 |
|
| 88 |
-
# Copy application code
|
| 89 |
-
COPY --chown=user:user ./
|
|
|
|
| 90 |
|
| 91 |
# Copy downloaded model from the model-downloader stage
|
| 92 |
-
COPY --chown=user:user --from=model-downloader /model-downloader/models
|
| 93 |
|
| 94 |
# Show available models in torchtune
|
| 95 |
RUN python3 -c "import torchtune.models; print('Available models in torchtune:', dir(torchtune.models))"
|
|
|
|
| 38 |
ffmpeg \
|
| 39 |
git \
|
| 40 |
build-essential \
|
| 41 |
+
sudo \
|
| 42 |
&& apt-get clean \
|
| 43 |
&& rm -rf /var/lib/apt/lists/*
|
| 44 |
|
| 45 |
+
# Create user and give sudo access
|
| 46 |
+
RUN useradd -m -s /bin/bash user && \
|
| 47 |
+
usermod -aG sudo user && \
|
| 48 |
+
echo "user ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
# Set working directory
|
| 51 |
+
WORKDIR /app
|
| 52 |
+
|
| 53 |
+
# Create and set up persistent directories with proper permissions
|
| 54 |
+
RUN mkdir -p /app/static /app/models /app/voice_memories /app/voice_references \
|
| 55 |
+
/app/voice_profiles /app/cloned_voices /app/audio_cache /app/tokenizers /app/logs && \
|
| 56 |
+
chown -R user:user /app && \
|
| 57 |
+
chmod -R 755 /app && \
|
| 58 |
+
chmod -R 777 /app/voice_references /app/voice_profiles /app/voice_memories \
|
| 59 |
+
/app/cloned_voices /app/audio_cache /app/static /app/logs /app/tokenizers /app/models
|
| 60 |
|
| 61 |
# Copy requirements first for better caching
|
| 62 |
COPY --chown=user:user requirements.txt .
|
| 63 |
|
| 64 |
+
# Switch to user
|
| 65 |
+
USER user
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
# Install Python dependencies
|
| 68 |
RUN pip3 install --no-cache-dir --upgrade pip && \
|
|
|
|
| 84 |
# Install additional dependencies for streaming and voice cloning
|
| 85 |
RUN pip3 install yt-dlp openai-whisper
|
| 86 |
|
| 87 |
+
# Copy static files and application code
|
| 88 |
+
COPY --chown=user:user ./static /app/static
|
| 89 |
+
COPY --chown=user:user ./app /app/app
|
| 90 |
|
| 91 |
# Copy downloaded model from the model-downloader stage
|
| 92 |
+
COPY --chown=user:user --from=model-downloader /model-downloader/models /app/models
|
| 93 |
|
| 94 |
# Show available models in torchtune
|
| 95 |
RUN python3 -c "import torchtune.models; print('Available models in torchtune:', dir(torchtune.models))"
|
app/api/routes.py
CHANGED
|
@@ -16,7 +16,7 @@ import numpy as np
|
|
| 16 |
from fastapi import APIRouter, Request, HTTPException, BackgroundTasks, Body, Response, Query
|
| 17 |
from fastapi.responses import StreamingResponse
|
| 18 |
from app.api.schemas import SpeechRequest, ResponseFormat, Voice
|
| 19 |
-
from app.models import Segment
|
| 20 |
from app.api.streaming import AudioChunker
|
| 21 |
from app.prompt_engineering import split_into_segments
|
| 22 |
|
|
@@ -24,11 +24,6 @@ from app.prompt_engineering import split_into_segments
|
|
| 24 |
logger = logging.getLogger(__name__)
|
| 25 |
router = APIRouter()
|
| 26 |
|
| 27 |
-
# Constants
|
| 28 |
-
APP_DIR = os.path.join(os.environ['HOME'], 'app')
|
| 29 |
-
AUDIO_CACHE_DIR = os.path.join(APP_DIR, "audio_cache")
|
| 30 |
-
os.makedirs(AUDIO_CACHE_DIR, exist_ok=True)
|
| 31 |
-
|
| 32 |
# Mapping of response_format to MIME types
|
| 33 |
MIME_TYPES = {
|
| 34 |
"mp3": "audio/mpeg",
|
|
@@ -438,7 +433,7 @@ async def format_audio(audio, response_format, sample_rate, app_state):
|
|
| 438 |
# Generate a hash of the audio tensor for caching
|
| 439 |
audio_hash = hashlib.md5(audio.cpu().numpy().tobytes()).hexdigest()
|
| 440 |
cache_key = f"{audio_hash}_{response_format}"
|
| 441 |
-
cache_dir = getattr(app_state, "audio_cache_dir",
|
| 442 |
os.makedirs(cache_dir, exist_ok=True)
|
| 443 |
cache_path = os.path.join(cache_dir, f"{cache_key}")
|
| 444 |
|
|
@@ -539,7 +534,7 @@ async def format_audio(audio, response_format, sample_rate, app_state):
|
|
| 539 |
# Store in cache if enabled
|
| 540 |
if cache_enabled and cache_key:
|
| 541 |
try:
|
| 542 |
-
cache_path = os.path.join(getattr(app_state, "audio_cache_dir",
|
| 543 |
with open(cache_path, "wb") as f:
|
| 544 |
f.write(response_data)
|
| 545 |
logger.debug(f"Cached {response_format} audio with key: {cache_key}")
|
|
|
|
| 16 |
from fastapi import APIRouter, Request, HTTPException, BackgroundTasks, Body, Response, Query
|
| 17 |
from fastapi.responses import StreamingResponse
|
| 18 |
from app.api.schemas import SpeechRequest, ResponseFormat, Voice
|
| 19 |
+
from app.models import Segment
|
| 20 |
from app.api.streaming import AudioChunker
|
| 21 |
from app.prompt_engineering import split_into_segments
|
| 22 |
|
|
|
|
| 24 |
logger = logging.getLogger(__name__)
|
| 25 |
router = APIRouter()
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
# Mapping of response_format to MIME types
|
| 28 |
MIME_TYPES = {
|
| 29 |
"mp3": "audio/mpeg",
|
|
|
|
| 433 |
# Generate a hash of the audio tensor for caching
|
| 434 |
audio_hash = hashlib.md5(audio.cpu().numpy().tobytes()).hexdigest()
|
| 435 |
cache_key = f"{audio_hash}_{response_format}"
|
| 436 |
+
cache_dir = getattr(app_state, "audio_cache_dir", "/app/audio_cache")
|
| 437 |
os.makedirs(cache_dir, exist_ok=True)
|
| 438 |
cache_path = os.path.join(cache_dir, f"{cache_key}")
|
| 439 |
|
|
|
|
| 534 |
# Store in cache if enabled
|
| 535 |
if cache_enabled and cache_key:
|
| 536 |
try:
|
| 537 |
+
cache_path = os.path.join(getattr(app_state, "audio_cache_dir", "/app/audio_cache"), f"{cache_key}")
|
| 538 |
with open(cache_path, "wb") as f:
|
| 539 |
f.write(response_data)
|
| 540 |
logger.debug(f"Cached {response_format} audio with key: {cache_key}")
|
app/main.py
CHANGED
|
@@ -54,7 +54,7 @@ async def lifespan(app: FastAPI):
|
|
| 54 |
app.state.logger = logger # Make logger available to routes
|
| 55 |
|
| 56 |
# Create necessary directories - use persistent locations
|
| 57 |
-
APP_DIR =
|
| 58 |
os.makedirs(os.path.join(APP_DIR, "models"), exist_ok=True)
|
| 59 |
os.makedirs(os.path.join(APP_DIR, "tokenizers"), exist_ok=True)
|
| 60 |
os.makedirs(os.path.join(APP_DIR, "voice_memories"), exist_ok=True)
|
|
@@ -498,12 +498,11 @@ app.add_middleware(
|
|
| 498 |
)
|
| 499 |
|
| 500 |
# Create static and other required directories
|
| 501 |
-
|
| 502 |
-
os.makedirs(
|
| 503 |
-
os.makedirs(os.path.join(APP_DIR, "cloned_voices"), exist_ok=True)
|
| 504 |
|
| 505 |
# Mount the static files directory
|
| 506 |
-
app.mount("/static", StaticFiles(directory=
|
| 507 |
|
| 508 |
# Include routers
|
| 509 |
app.include_router(api_router, prefix="/api/v1")
|
|
@@ -590,13 +589,13 @@ async def version():
|
|
| 590 |
@app.get("/voice-cloning", include_in_schema=False)
|
| 591 |
async def voice_cloning_ui():
|
| 592 |
"""Voice cloning UI endpoint."""
|
| 593 |
-
return FileResponse(
|
| 594 |
|
| 595 |
# Streaming demo endpoint
|
| 596 |
@app.get("/streaming-demo", include_in_schema=False)
|
| 597 |
async def streaming_demo():
|
| 598 |
"""Streaming TTS demo endpoint."""
|
| 599 |
-
return FileResponse(
|
| 600 |
|
| 601 |
@app.get("/", include_in_schema=False)
|
| 602 |
async def root():
|
|
|
|
| 54 |
app.state.logger = logger # Make logger available to routes
|
| 55 |
|
| 56 |
# Create necessary directories - use persistent locations
|
| 57 |
+
APP_DIR = "/app"
|
| 58 |
os.makedirs(os.path.join(APP_DIR, "models"), exist_ok=True)
|
| 59 |
os.makedirs(os.path.join(APP_DIR, "tokenizers"), exist_ok=True)
|
| 60 |
os.makedirs(os.path.join(APP_DIR, "voice_memories"), exist_ok=True)
|
|
|
|
| 498 |
)
|
| 499 |
|
| 500 |
# Create static and other required directories
|
| 501 |
+
os.makedirs("/app/static", exist_ok=True)
|
| 502 |
+
os.makedirs("/app/cloned_voices", exist_ok=True)
|
|
|
|
| 503 |
|
| 504 |
# Mount the static files directory
|
| 505 |
+
app.mount("/static", StaticFiles(directory="/app/static"), name="static")
|
| 506 |
|
| 507 |
# Include routers
|
| 508 |
app.include_router(api_router, prefix="/api/v1")
|
|
|
|
| 589 |
@app.get("/voice-cloning", include_in_schema=False)
|
| 590 |
async def voice_cloning_ui():
|
| 591 |
"""Voice cloning UI endpoint."""
|
| 592 |
+
return FileResponse("/app/static/voice-cloning.html")
|
| 593 |
|
| 594 |
# Streaming demo endpoint
|
| 595 |
@app.get("/streaming-demo", include_in_schema=False)
|
| 596 |
async def streaming_demo():
|
| 597 |
"""Streaming TTS demo endpoint."""
|
| 598 |
+
return FileResponse("/app/static/streaming-demo.html")
|
| 599 |
|
| 600 |
@app.get("/", include_in_schema=False)
|
| 601 |
async def root():
|
app/utils/voice_manager.py
CHANGED
|
@@ -8,23 +8,18 @@ from typing import Dict, List, Optional, Any
|
|
| 8 |
|
| 9 |
logger = logging.getLogger(__name__)
|
| 10 |
|
| 11 |
-
#
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
VOICE_MEMORIES_DIR = os.path.join(APP_DIR, "voice_memories")
|
| 16 |
-
VOICE_BACKUPS_DIR = os.path.join(APP_DIR, "voice_backups")
|
| 17 |
|
| 18 |
# Ensure directories exist
|
| 19 |
os.makedirs(VOICE_REFERENCES_DIR, exist_ok=True)
|
| 20 |
os.makedirs(VOICE_PROFILES_DIR, exist_ok=True)
|
| 21 |
os.makedirs(VOICE_MEMORIES_DIR, exist_ok=True)
|
| 22 |
-
os.makedirs(VOICE_BACKUPS_DIR, exist_ok=True)
|
| 23 |
|
| 24 |
-
def backup_voice_data(backup_dir: str =
|
| 25 |
"""Create a backup of all voice data."""
|
| 26 |
-
if backup_dir is None:
|
| 27 |
-
backup_dir = VOICE_BACKUPS_DIR
|
| 28 |
os.makedirs(backup_dir, exist_ok=True)
|
| 29 |
timestamp = torch.datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 30 |
backup_path = os.path.join(backup_dir, f"voice_backup_{timestamp}")
|
|
|
|
| 8 |
|
| 9 |
logger = logging.getLogger(__name__)
|
| 10 |
|
| 11 |
+
# Define persistent paths
|
| 12 |
+
VOICE_REFERENCES_DIR = "/app/voice_references"
|
| 13 |
+
VOICE_PROFILES_DIR = "/app/voice_profiles"
|
| 14 |
+
VOICE_MEMORIES_DIR = "/app/voice_memories"
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# Ensure directories exist
|
| 17 |
os.makedirs(VOICE_REFERENCES_DIR, exist_ok=True)
|
| 18 |
os.makedirs(VOICE_PROFILES_DIR, exist_ok=True)
|
| 19 |
os.makedirs(VOICE_MEMORIES_DIR, exist_ok=True)
|
|
|
|
| 20 |
|
| 21 |
+
def backup_voice_data(backup_dir: str = "/app/voice_backups"):
|
| 22 |
"""Create a backup of all voice data."""
|
|
|
|
|
|
|
| 23 |
os.makedirs(backup_dir, exist_ok=True)
|
| 24 |
timestamp = torch.datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 25 |
backup_path = os.path.join(backup_dir, f"voice_backup_{timestamp}")
|
app/voice_cloning.py
CHANGED
|
@@ -27,8 +27,7 @@ from app.models import Segment
|
|
| 27 |
logger = logging.getLogger(__name__)
|
| 28 |
|
| 29 |
# Directory for storing cloned voice data
|
| 30 |
-
|
| 31 |
-
CLONED_VOICES_DIR = os.path.join(APP_DIR, "cloned_voices")
|
| 32 |
os.makedirs(CLONED_VOICES_DIR, exist_ok=True)
|
| 33 |
|
| 34 |
class ClonedVoice(BaseModel):
|
|
|
|
| 27 |
logger = logging.getLogger(__name__)
|
| 28 |
|
| 29 |
# Directory for storing cloned voice data
|
| 30 |
+
CLONED_VOICES_DIR = "/app/cloned_voices"
|
|
|
|
| 31 |
os.makedirs(CLONED_VOICES_DIR, exist_ok=True)
|
| 32 |
|
| 33 |
class ClonedVoice(BaseModel):
|
app/voice_enhancement.py
CHANGED
|
@@ -8,15 +8,13 @@ from typing import Dict, List, Optional, Tuple
|
|
| 8 |
import logging
|
| 9 |
from dataclasses import dataclass
|
| 10 |
from scipy import signal
|
| 11 |
-
from app.models import Segment
|
| 12 |
|
| 13 |
# Setup logging
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
|
| 16 |
-
#
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
VOICE_PROFILES_DIR = os.path.join(APP_DIR, "voice_profiles")
|
| 20 |
|
| 21 |
# Ensure directories exist
|
| 22 |
os.makedirs(VOICE_REFERENCES_DIR, exist_ok=True)
|
|
@@ -478,6 +476,8 @@ def get_voice_segments(voice_name: str, device: torch.device) -> List:
|
|
| 478 |
Returns:
|
| 479 |
List of context segments
|
| 480 |
"""
|
|
|
|
|
|
|
| 481 |
if voice_name not in VOICE_PROFILES:
|
| 482 |
logger.warning(f"Voice {voice_name} not found, defaulting to alloy")
|
| 483 |
voice_name = "alloy"
|
|
|
|
| 8 |
import logging
|
| 9 |
from dataclasses import dataclass
|
| 10 |
from scipy import signal
|
|
|
|
| 11 |
|
| 12 |
# Setup logging
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
|
| 15 |
+
# Define persistent paths
|
| 16 |
+
VOICE_REFERENCES_DIR = "/app/voice_references"
|
| 17 |
+
VOICE_PROFILES_DIR = "/app/voice_profiles"
|
|
|
|
| 18 |
|
| 19 |
# Ensure directories exist
|
| 20 |
os.makedirs(VOICE_REFERENCES_DIR, exist_ok=True)
|
|
|
|
| 476 |
Returns:
|
| 477 |
List of context segments
|
| 478 |
"""
|
| 479 |
+
from app.models import Segment
|
| 480 |
+
|
| 481 |
if voice_name not in VOICE_PROFILES:
|
| 482 |
logger.warning(f"Voice {voice_name} not found, defaulting to alloy")
|
| 483 |
voice_name = "alloy"
|
app/voice_memory.py
CHANGED
|
@@ -12,9 +12,8 @@ from app.models import Segment
|
|
| 12 |
# Setup logging
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
|
| 15 |
-
#
|
| 16 |
-
|
| 17 |
-
VOICE_MEMORIES_DIR = os.path.join(APP_DIR, "voice_memories")
|
| 18 |
os.makedirs(VOICE_MEMORIES_DIR, exist_ok=True)
|
| 19 |
|
| 20 |
@dataclass
|
|
|
|
| 12 |
# Setup logging
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
|
| 15 |
+
# Path to store voice memories - use persistent location
|
| 16 |
+
VOICE_MEMORIES_DIR = "/app/voice_memories"
|
|
|
|
| 17 |
os.makedirs(VOICE_MEMORIES_DIR, exist_ok=True)
|
| 18 |
|
| 19 |
@dataclass
|