ford442 commited on
Commit
205e5e8
·
verified ·
1 Parent(s): 701a8e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -2
app.py CHANGED
@@ -12,10 +12,31 @@ def install_flashattn():
12
  subprocess.run(['sh', './flashattn.sh'])
13
  install_flashattn()
14
 
15
- import time
16
- import gradio as gr
 
 
 
 
 
17
  import torch as th
18
  import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  import numpy as np
20
  import tempfile
21
  from diffusers import AutoencoderKLWan
 
12
  subprocess.run(['sh', './flashattn.sh'])
13
  install_flashattn()
14
 
15
+ # --- PyTorch Environment Setup ---
16
+ os.environ['PYTORCH_NVML_BASED_CUDA_CHECK'] = '1'
17
+ os.environ['TORCH_LINALG_PREFER_CUSOLVER'] = '1'
18
+ os.environ['PYTORCH_ALLOC_CONF'] = 'expandable_segments:True,pinned_use_background_threads:True'
19
+ os.environ["SAFETENSORS_FAST_GPU"] = "1"
20
+ os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = '1'
21
+
22
  import torch as th
23
  import torch
24
+
25
+ # Set precision settings for reproducibility and performance
26
+ torch.backends.cuda.matmul.allow_tf32 = False
27
+ torch.backends.cudnn.allow_tf32 = False
28
+ torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
29
+ torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
30
+ torch.backends.cudnn.deterministic = False
31
+ torch.backends.cudnn.benchmark = False # Set to True for potential speedup if input sizes are static, False for dynamic
32
+ torch.backends.cuda.preferred_blas_library="cublas"
33
+ torch.backends.cuda.preferred_linalg_library="cusolver"
34
+ torch.set_float32_matmul_precision("highest")
35
+
36
+
37
+ import time
38
+ import gradio as gr
39
+
40
  import numpy as np
41
  import tempfile
42
  from diffusers import AutoencoderKLWan