SunDou commited on
Commit
218123c
·
verified ·
1 Parent(s): 80f6a63

Upload data1/rename.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. data1/rename.py +52 -0
data1/rename.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ from pathlib import Path
4
+
5
+
6
+ def rename_repos(csv_path: str, repos_dir: str):
7
+ """Match and rename repos from various formats to owner___repo"""
8
+ repos_dir = Path(repos_dir)
9
+ df = pd.read_csv(csv_path)
10
+
11
+ # Build lookup: possible old names -> new name (owner___repo)
12
+ lookup = {}
13
+ for _, row in df.iterrows():
14
+ owner, repo = row["full_name"].split("/")
15
+ new_name = f"{owner}___{repo}" # three underscores
16
+ # Possible old patterns
17
+ lookup[repo] = new_name # just repo name
18
+ lookup[f"{owner}_{repo}"] = new_name # owner_repo (single underscore)
19
+ lookup[f"{owner}__{repo}"] = new_name # owner__repo (double underscore, just in case)
20
+ lookup[new_name] = new_name # already correct
21
+
22
+ # Get existing directories
23
+ existing = [d for d in repos_dir.iterdir() if d.is_dir() and not d.name.startswith(".")]
24
+
25
+ renamed, skipped = 0, 0
26
+ for d in existing:
27
+ if d.name in lookup:
28
+ new_name = lookup[d.name]
29
+ if d.name != new_name:
30
+ new_path = repos_dir / new_name
31
+ if new_path.exists():
32
+ print(f"[SKIP] Target exists: {d.name} -> {new_name}")
33
+ skipped += 1
34
+ else:
35
+ d.rename(new_path)
36
+ print(f"[OK] {d.name} -> {new_name}")
37
+ renamed += 1
38
+ else:
39
+ print(f"[WARN] Not in CSV: {d.name}")
40
+ skipped += 1
41
+
42
+ print(f"\nDone: {renamed} renamed, {skipped} skipped")
43
+
44
+
45
+ if __name__ == "__main__":
46
+ import argparse
47
+
48
+ parser = argparse.ArgumentParser()
49
+ parser.add_argument("--csv", default="./workdir/repos_checked.csv")
50
+ parser.add_argument("--repos", default="./workdir/repos_raw")
51
+ args = parser.parse_args()
52
+ rename_repos(args.csv, args.repos)