Respair commited on
Commit
37fbd5e
·
verified ·
1 Parent(s): f188e49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -5
app.py CHANGED
@@ -83,7 +83,7 @@ def load_examples(csv_path):
83
 
84
  return examples
85
 
86
- # Client wrapper for main generation - FIXED
87
  def run_generation_pipeline_client(
88
  raw_text,
89
  audio_prompt,
@@ -94,10 +94,18 @@ def run_generation_pipeline_client(
94
  use_chained_longform
95
  ):
96
  try:
 
 
 
 
 
 
 
 
97
  # Call the backend API
98
  result = client.predict(
99
  raw_text,
100
- audio_prompt,
101
  num_candidates,
102
  cfg_scale,
103
  top_k,
@@ -126,7 +134,7 @@ def run_generation_pipeline_client(
126
  except Exception as e:
127
  return None, f"Status: Connection error: {str(e)}"
128
 
129
- # Client wrapper for duration-aware generation - FIXED
130
  def run_duration_generation_pipeline_client(
131
  raw_text,
132
  audio_prompt,
@@ -140,10 +148,18 @@ def run_duration_generation_pipeline_client(
140
  chars_per_second
141
  ):
142
  try:
 
 
 
 
 
 
 
 
143
  # Call the backend API
144
  result = client.predict(
145
  raw_text,
146
- audio_prompt,
147
  num_candidates,
148
  cfg_scale,
149
  top_k,
@@ -427,7 +443,7 @@ with gr.Blocks(theme="Respair/Shiki@9.1.0", css=css) as demo:
427
  Both checkpoints have been fine-tuned on a subset of the dataset with only speaker tags. This will allow us to generate high quality samples without relying on audio prompts or dealing with random speaker attributes, but at the cost of tanking the zero-shot faithfulness of the model.
428
  </p>
429
 
430
- <p style="color: #1a1a1a; font-weight: 500; line-height: 1.8; margin-bottom: 20px; font-size: 16px;">e
431
  Takane also comes with an Anti-Hallucination Algorithm (AHA) that generates a few candidates in parallel and automatically returns the best one at the cost of introducing a small overhead.
432
  If you need the fastest response time possible, feel free to enable the Turbo mode. It will disable AHA and tweak the parameters internally to produce samples as fast as 2-3 seconds.
433
  </p>
 
83
 
84
  return examples
85
 
86
+ # Client wrapper for main generation - FIXED for audio handling
87
  def run_generation_pipeline_client(
88
  raw_text,
89
  audio_prompt,
 
94
  use_chained_longform
95
  ):
96
  try:
97
+ # Convert audio prompt to serializable format
98
+ if audio_prompt is not None:
99
+ sample_rate, audio_data = audio_prompt
100
+ # Convert numpy array to list for JSON serialization
101
+ audio_prompt_serializable = (sample_rate, audio_data.tolist())
102
+ else:
103
+ audio_prompt_serializable = None
104
+
105
  # Call the backend API
106
  result = client.predict(
107
  raw_text,
108
+ audio_prompt_serializable,
109
  num_candidates,
110
  cfg_scale,
111
  top_k,
 
134
  except Exception as e:
135
  return None, f"Status: Connection error: {str(e)}"
136
 
137
+ # Client wrapper for duration-aware generation - FIXED for audio handling
138
  def run_duration_generation_pipeline_client(
139
  raw_text,
140
  audio_prompt,
 
148
  chars_per_second
149
  ):
150
  try:
151
+ # Convert audio prompt to serializable format
152
+ if audio_prompt is not None:
153
+ sample_rate, audio_data = audio_prompt
154
+ # Convert numpy array to list for JSON serialization
155
+ audio_prompt_serializable = (sample_rate, audio_data.tolist())
156
+ else:
157
+ audio_prompt_serializable = None
158
+
159
  # Call the backend API
160
  result = client.predict(
161
  raw_text,
162
+ audio_prompt_serializable,
163
  num_candidates,
164
  cfg_scale,
165
  top_k,
 
443
  Both checkpoints have been fine-tuned on a subset of the dataset with only speaker tags. This will allow us to generate high quality samples without relying on audio prompts or dealing with random speaker attributes, but at the cost of tanking the zero-shot faithfulness of the model.
444
  </p>
445
 
446
+ <p style="color: #1a1a1a; font-weight: 500; line-height: 1.8; margin-bottom: 20px; font-size: 16px;">
447
  Takane also comes with an Anti-Hallucination Algorithm (AHA) that generates a few candidates in parallel and automatically returns the best one at the cost of introducing a small overhead.
448
  If you need the fastest response time possible, feel free to enable the Turbo mode. It will disable AHA and tweak the parameters internally to produce samples as fast as 2-3 seconds.
449
  </p>