groxaxo commited on
Commit
f0d12fa
·
verified ·
1 Parent(s): 5e7527f

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. serve.sh +14 -0
  2. serve_model.py +48 -0
  3. serve_vllm.sh +23 -0
  4. upload_to_hf.py +67 -0
serve.sh ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Script to serve the DeepSWE-Preview-FP8 model with vLLM
3
+
4
+ # Source conda
5
+ source /home/op/miniconda3/etc/profile.d/conda.sh
6
+
7
+ # Activate the vllm-model environment
8
+ conda activate vllm-model
9
+
10
+ # Note: pyparsing is required by pydot which is used by some vLLM components
11
+ # It has been installed in the vllm-model environment to prevent warnings
12
+
13
+ # Run the serving script
14
+ python /home/op/DeepSWE-Preview-FP8/serve_model.py
serve_model.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script to serve the DeepSWE-Preview-FP8 model using vLLM with specific configurations:
4
+ - CUDA devices 1,2
5
+ - Max model length 32000
6
+ - Tensor parallel size 2
7
+ """
8
+
9
+ import os
10
+ import subprocess
11
+ import sys
12
+
13
+ def serve_model():
14
+ # Set CUDA_VISIBLE_DEVICES to use only GPUs 1 and 2
15
+ os.environ["CUDA_VISIBLE_DEVICES"] = "1,2"
16
+
17
+ # Build the vLLM command
18
+ cmd = [
19
+ "python", "-m", "vllm.entrypoints.openai.api_server",
20
+ "--host", "0.0.0.0",
21
+ "--port", "8550",
22
+ "--model", "/home/op/DeepSWE-Preview-FP8", # Current directory
23
+ "--max-model-len", "32000",
24
+ "--tensor-parallel-size", "2",
25
+ "--pipeline-parallel-size", "1",
26
+ ]
27
+
28
+ print("Starting vLLM server with the following configuration:")
29
+ print(f"CUDA_VISIBLE_DEVICES: {os.environ['CUDA_VISIBLE_DEVICES']}")
30
+ print(f"Model path: /home/op/DeepSWE-Preview-FP8")
31
+ print(f"Max model length: 32000")
32
+ print(f"Tensor parallel size: 2")
33
+ print(f"Pipeline parallel size: 1")
34
+ print("\nCommand:", " ".join(cmd))
35
+ print("\n" + "="*50)
36
+
37
+ # Run the command
38
+ try:
39
+ subprocess.run(cmd, check=True)
40
+ except subprocess.CalledProcessError as e:
41
+ print(f"Error running vLLM server: {e}")
42
+ sys.exit(1)
43
+ except KeyboardInterrupt:
44
+ print("\nServer stopped by user")
45
+ sys.exit(0)
46
+
47
+ if __name__ == "__main__":
48
+ serve_model()
serve_vllm.sh ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Direct vLLM serving script for DeepSWE-Preview-FP8 model
3
+
4
+ # Set CUDA devices
5
+ export CUDA_VISIBLE_DEVICES=1,2
6
+
7
+ # Source conda
8
+ source /home/op/miniconda3/etc/profile.d/conda.sh
9
+
10
+ # Activate the vllm-model environment
11
+ conda activate vllm-model
12
+
13
+ # Ensure pyparsing is available (needed by pydot)
14
+ # This was previously causing warnings about missing pyparsing module
15
+
16
+ # Run vLLM with all specified parameters
17
+ python -m vllm.entrypoints.openai.api_server \\
18
+ --host 0.0.0.0 \\
19
+ --port 8550 \\
20
+ --model /home/op/DeepSWE-Preview-FP8 \\
21
+ --max-model-len 28000 \\
22
+ --tensor-parallel-size 2 \\
23
+ --trust-remote-code
upload_to_hf.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from huggingface_hub import HfApi
3
+
4
+ # Initialize the Hugging Face API
5
+ api = HfApi()
6
+
7
+ # Repository details - using the agentica-org namespace as requested
8
+ repo_id = "agentica-org/DeepSWE-Preview-FP8" # Using the organization namespace
9
+ local_dir = "/home/op/DeepSWE-Preview-FP8"
10
+
11
+ # Files to upload (excluding serving scripts)
12
+ files_to_upload = [
13
+ "README.md",
14
+ "added_tokens.json",
15
+ "chat_template.jinja",
16
+ "config.json",
17
+ "generation_config.json",
18
+ "merges.txt",
19
+ "model-00001-of-00007.safetensors",
20
+ "model-00002-of-00007.safetensors",
21
+ "model-00003-of-00007.safetensors",
22
+ "model-00004-of-00007.safetensors",
23
+ "model-00005-of-00007.safetensors",
24
+ "model-00006-of-00007.safetensors",
25
+ "model-00007-of-00007.safetensors",
26
+ "model.safetensors.index.json",
27
+ "special_tokens_map.json",
28
+ "tokenizer_config.json",
29
+ "tokenizer.json",
30
+ "vocab.json"
31
+ ]
32
+
33
+ def upload_model():
34
+ print(f"Creating repository {repo_id}...")
35
+
36
+ # Create the repository if it doesn't exist
37
+ try:
38
+ api.create_repo(repo_id=repo_id, repo_type="model", private=False, exist_ok=True)
39
+ print(f"Repository {repo_id} created or already exists.")
40
+ except Exception as e:
41
+ print(f"Error creating repository: {e}")
42
+ return
43
+
44
+ # Upload files
45
+ print("Uploading files...")
46
+ for file_name in files_to_upload:
47
+ file_path = os.path.join(local_dir, file_name)
48
+ if os.path.exists(file_path):
49
+ try:
50
+ print(f"Uploading {file_name}...")
51
+ api.upload_file(
52
+ path_or_fileobj=file_path,
53
+ path_in_repo=file_name,
54
+ repo_id=repo_id,
55
+ repo_type="model"
56
+ )
57
+ print(f"Uploaded {file_name}")
58
+ except Exception as e:
59
+ print(f"Error uploading {file_name}: {e}")
60
+ else:
61
+ print(f"File not found: {file_path}")
62
+
63
+ print("Upload completed!")
64
+ print(f"Model uploaded to: https://huggingface.co/{repo_id}")
65
+
66
+ if __name__ == "__main__":
67
+ upload_model()