Safetensors
File size: 963 Bytes
4527b5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/bin/bash -l

# (Optional) Activate your Poetry environment
poetry shell

# Generate a timestamp string (e.g., 20230404123056)
TS=$(date '+%Y%m%d%H%M%S')

CONFIG_FILE="configs/preprocessing_config_${TS}.json"

# 1) Generate a JSON config file on the fly
cat <<EOF > "$CONFIG_FILE"
{
  "load_dir": "data",
  "save_dir": "data/processed",

  "min_gene_counts": null,
  "remove_assays": [],
  "max_mitochondrial_prop": null,
  "remove_cell_types": [],
  "hvg_method": null,
  "normalized_total": null,

  "median_dict": "teddy/data_processing/utils/medians/data/teddy_gene_medians.json",
  "log1p": false,
  "compute_medians": false,
  "median_column": "index",

  "reference_id_only": false
}
EOF

# 2) Call preprocess.py, explicitly passing data_path, metadata_path, and config_path
python teddy/data_processing/preprocessing/preprocess.py \
  --data_path data/sample_data.h5ad \
  --metadata_path data/sample_data_metadata.json \
  --config_path "$CONFIG_FILE"