Spaces:

PiuPiuxyz
/

POSEIDON

Sleeping

App Files Files Community

MashaMash commited on Mar 27

Commit

351ed7b

verified ·

1 Parent(s): 5f79cb7

Upload 42 files

Browse files

Files changed (43) hide show

.gitattributes +1 -0
README.md +63 -12
__init__.py +0 -0
__pycache__/__init__.cpython-310.pyc +0 -0
__pycache__/__init__.cpython-312.pyc +0 -0
__pycache__/app.cpython-310.pyc +0 -0
__pycache__/app.cpython-312.pyc +0 -0
__pycache__/poseidon_model.cpython-310.pyc +0 -0
__pycache__/simulations.cpython-310.pyc +0 -0
app.py +138 -0
external/.DS_Store +0 -0
external/poseidon/.gitignore +160 -0
external/poseidon/README.md +151 -0
external/poseidon/assets/fig1.png +3 -0
external/poseidon/configs/README.md +3 -0
external/poseidon/configs/run.yaml +26 -0
external/poseidon/configs/sweep.yaml +57 -0
external/poseidon/pyproject.toml +20 -0
external/poseidon/scOT/__init__.py +0 -0
external/poseidon/scOT/__pycache__/__init__.cpython-310.pyc +0 -0
external/poseidon/scOT/__pycache__/model.cpython-310.pyc +0 -0
external/poseidon/scOT/inference.py +950 -0
external/poseidon/scOT/metrics.py +55 -0
external/poseidon/scOT/model.py +1485 -0
external/poseidon/scOT/problems/__init__.py +0 -0
external/poseidon/scOT/problems/base.py +395 -0
external/poseidon/scOT/problems/elliptic/__init__.py +0 -0
external/poseidon/scOT/problems/elliptic/helmholtz.py +49 -0
external/poseidon/scOT/problems/elliptic/poisson.py +50 -0
external/poseidon/scOT/problems/fluids/__init__.py +0 -0
external/poseidon/scOT/problems/fluids/compressible.py +308 -0
external/poseidon/scOT/problems/fluids/incompressible.py +331 -0
external/poseidon/scOT/problems/fluids/normalization_constants.py +9 -0
external/poseidon/scOT/problems/reaction_diffusion/__init__.py +0 -0
external/poseidon/scOT/problems/reaction_diffusion/allen_cahn.py +53 -0
external/poseidon/scOT/problems/wave/__init__.py +0 -0
external/poseidon/scOT/problems/wave/acoustic.py +125 -0
external/poseidon/scOT/train.py +537 -0
external/poseidon/scOT/trainer.py +762 -0
external/poseidon/scOT/utils.py +97 -0
poseidon_model.py +211 -0
requirements.txt +10 -0
simulations.py +84 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+external/poseidon/assets/fig1.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,14 +1,65 @@
 ---
-title: Test Space
-emoji: 😻
-colorFrom: red
-colorTo: yellow
-sdk: gradio
-sdk_version: 5.23.1
-app_file: app.py
-pinned: false
-license: mit
-short_description: Test
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# 🔱 POSEIDON Playground: Across Scientific Domains 🔱
+**An interactive Gradio demo** exploring how the POSEIDON foundation model for solving Partial Differential Equations (PDEs) could be applied across physics, finance, quantum mechanics, and biology.
+> Built with love 💖  🔱 for the Hugging Face Community ML Research Engineer take-home assignment.
+> Inspired by [POSEIDON: A Foundation Model for Solving PDEs](https://arxiv.org/abs/2405.19101) by CamLab ETH Zürich.
+> Code from the original repo: [github.com/camlab-ethz/poseidon](https://github.com/camlab-ethz/poseidon)
 ---
+##  Goal
+This app highlights the **multidisciplinary potential** of pre-trained PDE models like POSEIDON using:
+- Intuitive **interactive visualizations**
+- Simple simulations from **four real-world domains**
+## 🪩 What You Can Do
+|  Feature | Description |
+|-----------|-------------|
+| ✔️ Pick a scientific domain | Finance, Quantum, Fluids, Biology |
+| ✔️ Run a mini simulation | See how PDEs behave in each field |
+| ✔️ Try POSEIDON inference | Generate predictions from synthetic inputs |
+| ✔️ Use real PDE datasets | Compare POSEIDON output vs. ground truth |
+## 🚀 Running Locally
+1. **Clone this repo**
+```bash
+git clone https://github.com/YOUR_USERNAME/poseidon_demo.git
+cd poseidon_demo
+```
+2. **(Option A) Using Conda**
+```bash
+conda env create -f environment.yml
+conda activate poseidon-demo
+```
+3. **(Option B) Using virtualenv**
+```bash
+python -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
+```
+4. **(Run the demo**
+```bash
+python -m poseidon_demo.app
+```
+## 🪩 Big Thanks
+- Hugging Face for the opportunity and open tools
+-  ETH Zürich’s CamLab for releasing the POSEIDON repo
+- 💖 You, the curious science hacker, for playing with this demo!

__init__.py ADDED Viewed

File without changes

__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (176 Bytes). View file

__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (180 Bytes). View file

__pycache__/app.cpython-310.pyc ADDED Viewed

Binary file (4.52 kB). View file

__pycache__/app.cpython-312.pyc ADDED Viewed

Binary file (4.28 kB). View file

__pycache__/poseidon_model.cpython-310.pyc ADDED Viewed

Binary file (5.53 kB). View file

__pycache__/simulations.cpython-310.pyc ADDED Viewed

Binary file (2.68 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import gradio as gr
+from .simulations import finance_demo, quantum_demo, fluid_demo, bio_demo
+from .poseidon_model import load_model, run_inference_by_domain, run_inference_on_dataset, plot_output, plot_comparison
+def run_poseidon_demo(domain, contrast, cmap):
+    """
+        Loads the POSEIDON model and runs it on synthetic input data
+        based on the selected scientific domain (e.g., Finance, Quantum).
+        Args:
+            domain (str): Selected scientific field.
+            contrast (float): Contrast setting for visualization.
+            cmap (str): Colormap choice for heatmap.
+        Returns:
+            Matplotlib figure showing the output.
+    """
+    model = load_model()
+    output = run_inference_by_domain(model, domain)
+    return plot_output(output, contrast=contrast, cmap=cmap)
+def render_demo(domain):
+    """
+        Returns a mini-simulation plot and a descriptive explanation
+        for the selected domain.
+        Args:
+            domain (str): One of Finance, Quantum, Fluid Dynamics, Biology.
+        Returns:
+            Tuple of (plot, explanatory markdown string).
+    """
+    if domain == "Finance":
+        return finance_demo(), (
+            "📍 **Finance:** PDEs like Black-Scholes are used to model option pricing. "
+            "Imagine fine-tuning Poseidon to forecast derivatives across market regimes!"
+        )
+    elif domain == "Quantum":
+        return quantum_demo(), (
+            "📍 **Quantum Mechanics:** Schrödinger's equation is a core PDE in quantum physics. "
+            "Could Poseidon learn to generalize across quantum systems?"
+        )
+    elif domain == "Fluid Dynamics":
+        return fluid_demo(), (
+            "📍 **Fluid Dynamics:** Poseidon is pretrained here! This sim shows 1D flow, "
+            "but Poseidon can do much more."
+        )
+    elif domain == "Biology / Medicine":
+        return bio_demo(), (
+            "📍 **Biology:** Reaction-diffusion equations appear in tissue growth and morphogenesis. "
+            "Poseidon could help model organ behavior!"
+        )
+    else:
+        return None, "Pick a domain to explore how Poseidon might apply!"
+def run_poseidon_real_dataset(dataset_name):
+    """
+        Loads Poseidon and runs inference on a real scientific dataset from the Hub.
+        Args:
+            dataset_name (str): Dataset ID from dropdown.
+        Returns:
+            Matplotlib figure with side-by-side comparison of input vs output.
+    """
+    model = load_model()
+    input_array, output_array = run_inference_on_dataset(model, dataset_name)
+    return plot_comparison(input_array, output_array)
+# this part defines the app
+with gr.Blocks() as demo:
+    gr.Markdown("# 🔱 POSEIDON Application Across Scientific Domains 🔱")
+    gr.Markdown("### **Welcome to the POSEIDON Playground!**")
+    gr.Markdown("Ever dreamed of solving physics equations with a single click? You’re in the right place.")
+    gr.Markdown("POSEIDON is a foundation model that learned to solve partial differential equations (PDEs) — the magical "
+                "math behind fluid flows, quantum mechanics, financial markets, and even biology!")
+    gr.Markdown("## ☑️ 1. Pick a scientific domain to see a simple PDE simulation and Explanation.")
+    domain_dropdown = gr.Dropdown(
+        ["Finance", "Quantum", "Fluid Dynamics", "Biology / Medicine"],
+        label="Choose The Field",
+        value="Finance"
+    )
+    sim_output = gr.Plot()
+    sim_text = gr.Markdown()
+    domain_dropdown.change(fn=render_demo, inputs=domain_dropdown, outputs=[sim_output, sim_text])
+    gr.Markdown("## 🚀 Run a test output from the POSEIDON model based on the chosen domain")
+    with gr.Row():
+        gr.Markdown("Play with contrast and choose the colormap you prefer.")
+        contrast_slider = gr.Slider(0.5, 5.0, value=2.0, step=0.1, label=" Contrast")
+        cmap_dropdown = gr.Dropdown(
+            ["inferno", "viridis", "plasma"],
+            label="Colormap",
+            value="inferno"
+        )
+    with gr.Row():
+        poseidon_button = gr.Button("POSEIDON Test Output")
+    poseidon_plot = gr.Plot()
+    poseidon_button.click(
+        fn=run_poseidon_demo,
+        inputs=[domain_dropdown, contrast_slider, cmap_dropdown],
+        outputs=poseidon_plot
+    )
+    gr.Markdown("# ")
+    gr.Markdown("## ☑️ 2. Try POSEIDON on Real Scientific Datasets")
+    dataset_dropdown = gr.Dropdown(
+        ["fluids.incompressible.Sines", "fluids.compressible.Riemann", "reaction_diffusion.AllenCahn"],
+        label="Choose a Real Dataset"
+    )
+    dataset_button = gr.Button("POSEIDON on Dataset")
+    dataset_plot = gr.Plot()
+    dataset_button.click(
+        fn=run_poseidon_real_dataset,
+        inputs=[dataset_dropdown],
+        outputs=dataset_plot
+    )
+if __name__ == "__main__":
+    demo.launch()

external/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

external/poseidon/.gitignore ADDED Viewed

	@@ -0,0 +1,160 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

external/poseidon/README.md ADDED Viewed

	@@ -0,0 +1,151 @@

+# Poseidon: Efficient Foundation Models for PDEs
+This is the source code for the paper [*Poseidon: Efficient Foundation Models for PDEs*](https://arxiv.org/abs/2405.19101). It also acts as a package if you want to use the models in your code.
+![Poseidon](assets/fig1.png)
+Find pretrained models and pretraining dataset in our collection on the [🤗 Hub – Pretrained Models and Pretraining Datasets](https://huggingface.co/collections/camlab-ethz/poseidon-664fa125729c53d8607e209a). All datasets corresponding to downstream tasks can be downloaded from the respective collection on the [🤗 Hub – Downstream Tasks](https://huggingface.co/collections/camlab-ethz/poseidon-downstream-tasks-664fa237cd6b0c097971ef14) as well. To use them, follow the respective sections below.
+## Usage
+### Installation & Requirements
+To get all requirements and install the package, run (inside this folder), after getting this repository:
+```bash
+pip install -e .
+```
+We recommend running the above command in a [virtual environment](https://docs.python.org/3/library/venv.html).
+After installation, you can import the models and use the training and inference scripts from everywhere on your system.
+### Using the models in your own code
+To use the (pretrained) models in your own code, you can use the following code snippet (after installing):
+```python
+from scOT.model import ScOT
+model = ScOT.from_pretrained("camlab-ethz/Poseidon-<MODEL_SIZE>")
+```
+This will load the pretrained model from the 🤗 Hub. `<MODEL_SIZE>` has to be replaced by `T`, `B`, or `L`, for the respective pretrained model. You can also load a model from a local path by providing the path to the `from_pretrained` method.
+To finetune and replace embeddings and recovery parameters, load the model as follows:
+```python
+from scOT.model import ScOT
+model = ScOT.from_pretrained("camlab-ethz/Poseidon-<MODEL_SIZE>", config=model_config, ignore_mismatched_sizes=True)
+```
+Here, `model_config` is a `ScOTConfig` with the correct input/output dimensions. We also refer to [the training/finetuning script](scOT/train.py), see below on usage, which might be easier.
+### Training & Finetuning
+The easiest way to finetune **Poseidon** on your own dataset is by plugging in your own dataset and running the provided training script as follows:
+```bash
+accelerate launch scOT/train.py \
+    --config <WANDB_CONFIG_FILE> \
+    --wandb_run_name <WANDB_RUN_NAME> \
+    --wandb_project_name <WANDB_PROJECT_NAME> \
+    --checkpoint_path <CHECKPOINT_PATH> \
+    --data_path <DATA_PATH> \
+    --finetune_from <PRETRAINED_MODEL> \
+    --replace_embedding_recovery <SET ONLY IF EMBED/RECOVERY NEEDS TO BE REPLACED>
+```
+For more arguments and options, see the help message of the script:
+```bash
+accelerate launch scOT/train.py --help
+```
+Since the code is built on top of [🤗 Accelerate](https://huggingface.co/docs/accelerate/en/index), you should run `accelerate config` first.
+We also make heavy use of [Weights and Biases](wandb.com) to log and organise all our runs. The code might run without it (by setting `WANDB_MODE=disabled`), but we don't give any guarantees as this probably breaks the folder structure.
+Most of the actual training configuration is set in a YAML config file (see for all arguments to set for a single W&B [run](configs/run.yaml) or a W&B [sweep](configs/sweep.yaml) (multiple runs, see the [W&B documentation](https://docs.wandb.ai/guides/sweeps) on how to start a sweep)). The config file is passed to the training script via the `--config` argument.
+We do our pretrainings with the same script.
+### Inference/Testing
+To evaluate a model on a dataset, you can use the inference script, for all possible arguments see the help message:
+```bash
+python -m scOT.inference --help
+```
+## Datasets
+We provide all datasets used in the paper on the 🤗 Hub. You can download them from the respective collections:
+- [🤗 Hub – Pretraining Datasets](https://huggingface.co/collections/camlab-ethz/poseidon-664fa125729c53d8607e209a)
+- [🤗 Hub – Downstream Tasks](https://huggingface.co/collections/camlab-ethz/poseidon-downstream-tasks-664fa237cd6b0c097971ef14)
+### Naming convention in the code
+In the code, we refer to the datasets by a different identifier than on the 🤗 Hub, see the following table for a mapping:
+| Code Identifier | 🤗 Hub/Paper Identifier |
+| ----------------|------------------------- |
+|fluids.incompressible.Sines| NS-Sines|
+|fluids.incompressible.Gaussians| NS-Gauss|
+|fluids.compressible.Riemann|CE-RP|
+|fluids.compressible.RiemannCurved|CE-CRP|
+|fluids.compressible.KelvinHelmholtz|CE-KH|
+|fluids.compressible.Gaussians|CE-Gauss|
+|fluids.incompressible.PiecewiseConstants|NS-PwC|
+|fluids.incompressible.VortexSheet|NS-SVS|
+|fluids.incompressible.BrownianBridge|NS-BB|
+|fluids.incompressible.ShearLayer|NS-SL|
+|fluids.incomressible.PiecewiseConstants.tracer|NS-Tracer-PwC|
+|fluids.incompressible.forcing.KolmogorovFlow|FNS-KF|
+|fluids.compressible.RiemannKelvinHelmholtz|CE-RPUI|
+|fluids.compressible.RichtmyerMeshkov|CE-RM|
+|fluids.compressible.gravity.RayleighTaylor|GCE-RT|
+|wave.Layer|Wave-Layer|
+|wave.Gaussians|Wave-Gauss|
+|reaction_diffusion.AllenCahn|ACE|
+|fluids.compressible.steady.Airfoil(.time)|SE-AF|
+|elliptic.poisson.Gaussians(.time)|Poisson-Gauss|
+|elliptic.Helmholtz(.time)|Helmholtz|
+Adding the suffix `.time` to the dataset identifier will load the dataset as time-dependent dataset, i.e. as a long-time limit – use that suffix for finetuning on time-independent datasets.
+### Download & Assembly
+Download all the datasets used in our paper from the 🤗 Hub. You may want to use the CLI provided by the [Hub Python Library](https://huggingface.co/docs/huggingface_hub/en/guides/cli#huggingface-cli-download):
+```bash
+huggingface-cli download camlab-ethz/<DATASET IDENTIFIER FROM PAPER> --repo-type dataset --local-dir <LOCAL DIRECTORY>
+```
+This will download a specific dataset to the specified `LOCAL DIRECTORY`. After download, you need to assemble the datasets to the format expected by the code; for that, we refer to the README in the respective dataset repository. After assembly, remove the chunked dataset files, as they are not needed for training, and place the assembled dataset at the path you specify as `--data_path` for the training/inference script. You may also specify the 🤗 Hub cache location by specifying the environment variable `HF_HOME` as this is where the download will be performed to.
+### Adding your own dataset
+We encourage adding your own datasets. For that, you can subclass from [BaseDataset and BaseTimeDataset](scOT/problems/base.py) and add it to the `get_dataset` selector method. You can then use the dataset in the training script by specifying the dataset identifier in the config file.
+For subclassing, we refer to the docstrings in the base classes and the existing datasets in the [problems](scOT/problems) folder.
+## Pretrained models
+Pretrained models are available on the 🤗 Hub, see the [Poseidon collection](https://huggingface.co/collections/camlab-ethz/poseidon-664fa125729c53d8607e209a) for all models. You can download them via the 🤗 Hub API or by using the `from_pretrained` method, see above.
+## Citation
+If you use our models, code, or datasets, please consider citing our paper:
+```bibtex
+@misc{herde2024poseidon,
+      title={Poseidon: Efficient Foundation Models for PDEs},
+      author={Maximilian Herde and Bogdan Raonić and Tobias Rohner and Roger Käppeli and Roberto Molinaro and Emmanuel de Bézenac and Siddhartha Mishra},
+      year={2024},
+      eprint={2405.19101},
+      archivePrefix={arXiv},
+      primaryClass={cs.LG}
+}
+```

external/poseidon/assets/fig1.png ADDED Viewed

Git LFS Details

SHA256: 6a7e7b8ad4944e46cdaaf3d564a9ec0f75d61765bbd1c298b7604cfdfc6a8f4a
Pointer size: 131 Bytes
Size of remote file: 394 kB

external/poseidon/configs/README.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # Configuration Files
2	+
3	+ We give two sample configuration files. One for a single finetuning run and one for a finetuning sweep. Both finetune the Poseidon-B model on Wave-Layer.

external/poseidon/configs/run.yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+dataset:
+  value: "wave.Layer"
+num_trajectories:
+  value: 128
+model_name:
+  value: "B"
+lr:
+  value: 0.00005
+lr_embedding_recovery:
+  value: 0.0005
+lr_time_embedding:
+  value: 0.0005
+weight_decay:
+  value: 0.000001
+lr_scheduler:
+  value: "cosine"
+warmup_ratio:
+  value: 0.0
+early_stopping_patience:
+  value: 200
+num_epochs:
+  value: 200
+batch_size:
+  value: 40
+max_grad_norm:
+  value: 5.0

external/poseidon/configs/sweep.yaml ADDED Viewed

	@@ -0,0 +1,57 @@

+project: <WANDB_PROJECT>
+entity: <WANDB_ENTITY>
+program: scOT/train.py
+method: grid
+metric:
+  name: "eval/loss"
+  goal: minimize
+command:
+  - "HDF5_USE_FILE_LOCKING=FALSE"
+  - "accelerate"
+  - "launch"
+  - ${program}
+  - "--disable_tqdm"
+  - "--json-config"
+  - "--finetune_from"
+  - "camlab-ethz/Poseidon-B"
+  - "--replace_embedding_recovery"
+  - "--config"
+  - ${args_json}
+parameters:
+  dataset:
+    value: "wave.Layer"
+  num_trajectories:
+    values:
+      - 1
+      - 2
+      - 4
+      - 8
+      - 16
+      - 32
+      - 64
+      - 128
+      - 256
+      - 512
+      - 1024
+  model_name:
+    value: "B"
+  lr:
+    value: 0.00005
+  lr_embedding_recovery:
+    value: 0.0005
+  lr_time_embedding:
+    value: 0.0005
+  weight_decay:
+    value: 0.000001
+  lr_scheduler:
+    value: "cosine"
+  warmup_ratio:
+    value: 0.0
+  early_stopping_patience:
+    value: 200
+  num_epochs:
+    value: 200
+  batch_size:
+    value: 40
+  max_grad_norm:
+    value: 5.0

external/poseidon/pyproject.toml ADDED Viewed

	@@ -0,0 +1,20 @@

+[project]
+name = "scOT"
+version = "1.0.0"
+description = "Foundation models for PDEs based on a scalable Operator Transformer"
+dependencies = [
+    "torch == 2.0.1",
+    "torchvision == 0.15.2",
+    "numpy",
+    "transformers == 4.29.2",
+    "matplotlib",
+    "accelerate == 0.31.0",
+    "wandb == 0.14.2",
+    "h5py",
+    "pandas",
+    "pyyaml",
+]
+[build-system]
+build-backend = "flit_core.buildapi"
+requires = ["flit_core >=3.2,<4"]

external/poseidon/scOT/__init__.py ADDED Viewed

File without changes

external/poseidon/scOT/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (185 Bytes). View file

external/poseidon/scOT/__pycache__/model.cpython-310.pyc ADDED Viewed

Binary file (33.9 kB). View file

external/poseidon/scOT/inference.py ADDED Viewed

	@@ -0,0 +1,950 @@

+"""
+Use this script for inference/testing a scOT model.
+The script can be used in different modes:
+- save_samples: Save samples from a model.
+- save_samples_sweep: Save samples from a sweep.
+- eval: Evaluate a model on the test set.
+- eval_sweep: Evaluate a sweep on the test set.
+- eval_accumulation_error: Evaluate the accumulation error of a model.
+- eval_resolutions: Evaluate a model on different resolutions.
+See the --help page for more information.
+"""
+import argparse
+import torch
+import numpy as np
+import random
+import psutil
+import os
+import pandas as pd
+import wandb
+from transformers.trainer_utils import EvalPrediction
+from scOT.model import ScOT
+from scOT.trainer import TrainingArguments, Trainer
+from scOT.problems.base import get_dataset, BaseTimeDataset
+from scOT.metrics import relative_lp_error, lp_error
+SEED = 0
+torch.manual_seed(SEED)
+np.random.seed(SEED)
+random.seed(SEED)
+def get_trainer(
+    model_path,
+    batch_size,
+    dataset,
+    full_data=False,
+    output_all_steps=False,
+    workers=-1,
+):
+    """
+    Get a trainer for the model (actually just using the interface for inference).
+    Args:
+        model_path: str
+            Path to the model.
+        batch_size: int
+            Batch size for evaluation.
+        dataset: BaseTimeDataset
+            Test set.
+        full_data: bool
+            Whether to save the full data distribution.
+        output_all_steps: bool
+            Whether to output all preliminary steps in autoregressive rollout.
+        workers: int
+            Number of workers for evaluation. If -1 will use all available cores.
+    """
+    num_cpu_cores = len(psutil.Process().cpu_affinity())
+    if workers == -1:
+        workers = num_cpu_cores
+    if workers > num_cpu_cores:
+        workers = num_cpu_cores
+    assert workers > 0
+    model = ScOT.from_pretrained(model_path)
+    args = TrainingArguments(
+        output_dir=".",
+        per_device_eval_batch_size=batch_size,
+        eval_accumulation_steps=16,
+        dataloader_num_workers=workers,
+    )
+    time_involved = isinstance(dataset, BaseTimeDataset)
+    def compute_metrics(eval_preds):
+        if time_involved and output_all_steps:
+            return {}
+        channel_list = dataset.channel_slice_list
+        def get_relative_statistics(errors):
+            median_error = np.median(errors, axis=0)
+            mean_error = np.mean(errors, axis=0)
+            std_error = np.std(errors, axis=0)
+            min_error = np.min(errors, axis=0)
+            max_error = np.max(errors, axis=0)
+            return {
+                "median_relative_l1_error": median_error,
+                "mean_relative_l1_error": mean_error,
+                "std_relative_l1_error": std_error,
+                "min_relative_l1_error": min_error,
+                "max_relative_l1_error": max_error,
+            }
+        def get_statistics(errors):
+            median_error = np.median(errors, axis=0)
+            mean_error = np.mean(errors, axis=0)
+            std_error = np.std(errors, axis=0)
+            min_error = np.min(errors, axis=0)
+            max_error = np.max(errors, axis=0)
+            return {
+                "median_l1_error": median_error,
+                "mean_l1_error": mean_error,
+                "std_l1_error": std_error,
+                "min_l1_error": min_error,
+                "max_l1_error": max_error,
+            }
+        relative_errors = [
+            relative_lp_error(
+                eval_preds.predictions[:, channel_list[i] : channel_list[i + 1]],
+                eval_preds.label_ids[:, channel_list[i] : channel_list[i + 1]],
+                p=1,
+                return_percent=True,
+            )
+            for i in range(len(channel_list) - 1)
+        ]
+        errors = [
+            lp_error(
+                eval_preds.predictions[:, channel_list[i] : channel_list[i + 1]],
+                eval_preds.label_ids[:, channel_list[i] : channel_list[i + 1]],
+                p=1,
+            )
+            for i in range(len(channel_list) - 1)
+        ]
+        relative_error_statistics = [
+            get_relative_statistics(relative_errors[i])
+            for i in range(len(channel_list) - 1)
+        ]
+        error_statistics = [
+            get_statistics(errors[i]) for i in range(len(channel_list) - 1)
+        ]
+        if dataset.output_dim == 1:
+            relative_error_statistics = relative_error_statistics[0]
+            error_statistics = error_statistics[0]
+            if full_data:
+                relative_error_statistics["relative_full_data"] = relative_errors[
+                    0
+                ].tolist()
+                error_statistics["full_data"] = errors[0].tolist()
+            return {**relative_error_statistics, **error_statistics}
+        else:
+            mean_over_relative_means = np.mean(
+                np.array(
+                    [
+                        stats["mean_relative_l1_error"]
+                        for stats in relative_error_statistics
+                    ]
+                ),
+                axis=0,
+            )
+            mean_over_relative_medians = np.mean(
+                np.array(
+                    [
+                        stats["median_relative_l1_error"]
+                        for stats in relative_error_statistics
+                    ]
+                ),
+                axis=0,
+            )
+            mean_over_means = np.mean(
+                np.array([stats["mean_l1_error"] for stats in error_statistics]), axis=0
+            )
+            mean_over_medians = np.mean(
+                np.array([stats["median_l1_error"] for stats in error_statistics]),
+                axis=0,
+            )
+            error_statistics_ = {
+                "mean_relative_l1_error": mean_over_relative_means,
+                "mean_over_median_relative_l1_error": mean_over_relative_medians,
+                "mean_l1_error": mean_over_means,
+                "mean_over_median_l1_error": mean_over_medians,
+            }
+            #!! The above is different from train and finetune (here mean_relative_l1_error is mean over medians instead of mean over means)
+            for i, stats in enumerate(relative_error_statistics):
+                for key, value in stats.items():
+                    error_statistics_[
+                        dataset.printable_channel_description[i] + "/" + key
+                    ] = value
+                    if full_data:
+                        error_statistics_[
+                            dataset.printable_channel_description[i]
+                            + "/"
+                            + "relative_full_data"
+                        ] = relative_errors[i].tolist()
+            for i, stats in enumerate(error_statistics):
+                for key, value in stats.items():
+                    error_statistics_[
+                        dataset.printable_channel_description[i] + "/" + key
+                    ] = value
+                    if full_data:
+                        error_statistics_[
+                            dataset.printable_channel_description[i] + "/" + "full_data"
+                        ] = errors[i].tolist()
+            return error_statistics_
+    trainer = Trainer(
+        model=model,
+        args=args,
+        compute_metrics=compute_metrics,
+    )
+    return trainer
+def rollout(trainer, dataset, ar_steps=1, output_all_steps=False):
+    """
+    Do a rollout of the model.
+    Args:
+        trainer: Trainer
+            Trainer for the model.
+        dataset: BaseTimeDataset
+            Test set.
+        ar_steps: int or list
+            Number of autoregressive steps to take. A single int n is interpreted as taking n homogeneous steps, a list of ints [j_0, j_1, ...] is interpreted as taking a step of size j_i.
+        output_all_steps: bool
+            Whether to output all preliminary steps in autoregressive rollout.
+    """
+    time_involved = isinstance(dataset, BaseTimeDataset)
+    if time_involved and ar_steps != 1:
+        trainer.set_ar_steps(ar_steps, output_all_steps=output_all_steps)
+    else:
+        trainer.set_ar_steps(ar_steps=1, output_all_steps=False)
+    prediction = trainer.predict(dataset, metric_key_prefix="")
+    try:
+        return prediction.predictions, prediction.label_ids, prediction.metrics
+    except:
+        return prediction.predictions
+def get_test_set(
+    dataset, data_path, initial_time=None, final_time=None, dataset_kwargs={}
+):
+    """
+    Get a test set (input at initial_time, output at final_time).
+    Args:
+        dataset: str
+            Dataset name.
+        data_path: str
+            Path to data.
+        initial_time: int
+            Initial time step to start from.
+        final_time: int
+            Final time step to end at.
+        dataset_kwargs: dict
+            Additional arguments for dataset as in scOT.problems.base.get_dataset.
+    """
+    if initial_time is not None and final_time is not None:
+        dataset_kwargs = {
+            **dataset_kwargs,
+            "fix_input_to_time_step": initial_time,
+            "time_step_size": final_time - initial_time,
+            "max_num_time_steps": 1,
+        }
+    dataset = get_dataset(
+        dataset=dataset,
+        which="test",
+        num_trajectories=1,
+        data_path=data_path,
+        move_to_local_scratch=None,
+        **dataset_kwargs,
+    )
+    return dataset
+def get_first_n_inputs(dataset, n):
+    """
+    Helper to get the first n inputs of a dataset.
+    """
+    inputs = []
+    for i in range(n):
+        inputs.append(dataset[i]["pixel_values"])
+    return torch.stack(inputs)
+def get_trajectories(
+    dataset, data_path, ar_steps, initial_time, final_time, dataset_kwargs
+):
+    """
+    Get full trajectories in a dataset. Helper for accumulation error evaluation.
+    Args:
+        dataset: str
+            Dataset name.
+        data_path: str
+            Path to data.
+        ar_steps: int or list
+            Number of autoregressive steps to take. A single int n is interpreted as taking n homogeneous steps, a list of ints [j_0, j_1, ...] is interpreted as taking a step of size j_i.
+        initial_time: int
+            Initial time step to start from.
+        final_time: int
+            Final time step to end at.
+        dataset_kwargs: dict
+            Additional arguments for dataset as in scOT.problems.base.get_dataset.
+    """
+    trajectories = []
+    if isinstance(ar_steps, int):
+        delta = (final_time - initial_time) // ar_steps
+        for i in range(ar_steps):
+            dataset_ = get_test_set(
+                dataset,
+                data_path,
+                initial_time + i * delta,
+                initial_time + (i + 1) * delta,
+                dataset_kwargs,
+            )
+            traj_ = []
+            for j in range(len(dataset_)):
+                traj_.append(dataset_[j]["labels"])
+            trajectories.append(torch.stack(traj_))
+    else:
+        running_time = initial_time
+        for i in ar_steps:
+            dataset_ = get_test_set(
+                dataset, data_path, running_time, running_time + i, dataset_kwargs
+            )
+            running_time += i
+            traj_ = []
+            for j in range(len(dataset_)):
+                traj_.append(dataset_[j]["labels"])
+            trajectories.append(torch.stack(traj_))
+    return torch.stack(trajectories, dim=1)
+def remove_underscore_dict(d):
+    return {key[1:] if key.startswith("_") else key: value for key, value in d.items()}
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Do different evaluations for a model, see --mode."
+    )
+    parser.add_argument(
+        "--model_path",
+        type=str,
+        required=False,
+        help="Model path. Not required when mode==eval_sweep or save_samples_sweep.",
+    )
+    parser.add_argument(
+        "--file",
+        type=str,
+        required=True,
+        help="File to load/write to. May also be a directory to save samples.",
+    )
+    parser.add_argument(
+        "--data_path",
+        type=str,
+        required=True,
+        help="Path to data.",
+    )
+    parser.add_argument(
+        "--dataset",
+        type=str,
+        help="Which test set to load. Not required if mode==eval_sweep or save_samples_sweep.",
+    )
+    parser.add_argument(
+        "--batch_size",
+        type=int,
+        default=64,
+        help="Batch size for evaluation.",
+    )
+    parser.add_argument(
+        "--full_data",
+        action="store_true",
+        help="Whether to save full data distributions.",
+    )
+    parser.add_argument(
+        "--initial_time",
+        type=int,
+        default=None,
+        help="Initial time step to start from.",
+    )
+    parser.add_argument(
+        "--final_time",
+        type=int,
+        default=None,
+        help="Final time step to end at.",
+    )
+    parser.add_argument(
+        "--ar_steps",
+        type=int,
+        nargs="+",
+        default=[1],
+        help="Number of autoregressive steps to take. A single int n is interpreted as taking n homogeneous steps, a list of ints [j_0, j_1, ...] is interpreted as taking a step of size j_i.",
+    )
+    parser.add_argument(
+        "--mode",
+        type=str,
+        choices=[
+            "save_samples",
+            "save_samples_sweep",
+            "eval",
+            "eval_sweep",
+            "eval_accumulation_error",
+            "eval_resolutions",
+        ],
+        default="eval",
+        help="Mode to run. Can be either save_samples to save n samples, save_samples_sweep, eval (to evaluate a single model), eval_sweep (to evaluate all models in a wandb sweep), eval_accumulation_error (to evaluate a model's accumulation error), eval_resolutions (to evaluate a model on different resolutions).",
+    )
+    parser.add_argument(
+        "--save_n_samples",
+        type=int,
+        default=1,
+        help="Number of samples to save. Only required for mode==save_samples or save_samples_sweep.",
+    )
+    parser.add_argument(
+        "--resolutions",
+        type=int,
+        nargs="+",
+        help="List of resolutions to evaluate. Only required for mode==eval_resolutions.",
+    )
+    parser.add_argument(
+        "--wandb_project",
+        type=str,
+        default="scOT",
+        help="Wandb project name. Required if mode==eval_sweep or save_samples_sweep.",
+    )
+    parser.add_argument(
+        "--wandb_entity",
+        type=str,
+        required=False,
+        help="Wandb entity name. Required if mode==eval_sweep or save_samples_sweep.",
+    )
+    parser.add_argument(
+        "--wandb_sweep_id",
+        type=str,
+        default=None,
+        help="Wandb sweep id. Required if mode==eval_sweep or save_samples_sweep.",
+    )
+    parser.add_argument(
+        "--ckpt_dir",
+        type=str,
+        required=True,
+        help="Base checkpoint directory. Required if mode==eval_sweep or save_samples_sweep.",
+    )
+    parser.add_argument(
+        "--exclude_dataset",
+        type=str,
+        nargs="+",
+        default=[],
+        help="Datasets to exclude from evaluation. Only relevant when mode==eval_sweep or save_samples_sweep.",
+    )
+    parser.add_argument(
+        "--exclusively_evaluate_dataset",
+        type=str,
+        nargs="+",
+        default=[],
+        help="Datasets to exclusively evaluate. Only relevant when mode==eval_sweep or save_samples_sweep.",
+    )
+    parser.add_argument(
+        "--just_velocities",
+        action="store_true",
+        help="Use just velocities in incompressible flow data.",
+    )
+    parser.add_argument(
+        "--allow_failed",
+        action="store_true",
+        help="Allow failed runs to be taken into account with eval_sweep.",
+    )
+    parser.add_argument(
+        "--append_time",
+        action="store_true",
+        help="Append .time to dataset name for evaluation.",
+    )
+    parser.add_argument(
+        "--num_trajectories",
+        type=int,
+        default=128,
+        help="Filter runs for number of training trajectories. Only relevant if mode==eval_sweep or save_samples_sweep.",
+    )
+    params = parser.parse_args()
+    if len(params.ar_steps) == 1:
+        params.ar_steps = params.ar_steps[0]
+        ar_steps = params.ar_steps
+    else:
+        ar_steps = params.ar_steps
+        params.ar_steps = [
+            step / (params.final_time - params.initial_time) for step in params.ar_steps
+        ]
+    dataset_kwargs = {}
+    if params.just_velocities:
+        dataset_kwargs["just_velocities"] = True
+    if params.mode == "save_samples":
+        dataset = get_test_set(
+            params.dataset,
+            params.data_path,
+            params.initial_time,
+            params.final_time,
+            dataset_kwargs,
+        )
+        trainer = get_trainer(params.model_path, params.batch_size, dataset)
+        inputs = get_first_n_inputs(dataset, params.save_n_samples)
+        outputs, labels, _ = rollout(trainer, dataset, ar_steps=params.ar_steps)
+        np.save(
+            params.file + "/" + params.dataset.replace(".", "-") + "/" + "inputs.npy",
+            inputs.cpu().numpy(),
+        )
+        np.save(
+            params.file + "/" + params.dataset.replace(".", "-") + "/" + "labels.npy",
+            labels[: params.save_n_samples],
+        )
+        np.save(
+            params.file + "/" + params.dataset.replace(".", "-") + "/" + "outputs.npy",
+            outputs[: params.save_n_samples],
+        )
+    elif params.mode == "save_samples_sweep":
+        api = wandb.Api()
+        sweep = api.sweep(
+            params.wandb_entity
+            + "/"
+            + params.wandb_project
+            + "/"
+            + params.wandb_sweep_id
+        )
+        for run in sweep.runs:
+            if run.state == "finished" or (
+                params.allow_failed and run.state == "failed"
+            ):
+                dset_name = run.config["dataset"]
+                if run.config["num_trajectories"] != params.num_trajectories:
+                    continue
+                if dset_name in params.exclude_dataset:
+                    continue
+                if (
+                    len(params.exclusively_evaluate_dataset) > 0
+                    and dset_name not in params.exclusively_evaluate_dataset
+                ):
+                    continue
+                num_trajectories = run.config["num_trajectories"]
+                ckpt_dir = (
+                    params.ckpt_dir
+                    + "/"
+                    + params.wandb_project
+                    + "/"
+                    + params.wandb_sweep_id
+                    + "/"
+                    + run.name
+                )
+                items = os.listdir(ckpt_dir)
+                dirs = [
+                    item
+                    for item in items
+                    if os.path.isdir(os.path.join(ckpt_dir, item))
+                ]
+                if len(dirs) > 1:
+                    print(
+                        "WARNING: more than one checkpoint in run directory " + ckpt_dir
+                    )
+                    print("choosing " + dirs[0])
+                model_path = os.path.join(ckpt_dir, dirs[0])
+                dataset = get_test_set(
+                    dset_name,
+                    params.data_path,
+                    params.initial_time,
+                    params.final_time,
+                    dataset_kwargs,
+                )
+                trainer = get_trainer(model_path, params.batch_size, dataset)
+                inputs = get_first_n_inputs(dataset, params.save_n_samples)
+                outputs, labels, _ = rollout(trainer, dataset, ar_steps=params.ar_steps)
+                if not os.path.exists(params.file + "/" + dset_name.replace(".", "-")):
+                    os.makedirs(params.file + "/" + dset_name.replace(".", "-"))
+                if not os.path.exists(
+                    params.file
+                    + "/"
+                    + dset_name.replace(".", "-")
+                    + "/"
+                    + str(num_trajectories)
+                ):
+                    os.makedirs(
+                        params.file
+                        + "/"
+                        + dset_name.replace(".", "-")
+                        + "/"
+                        + str(num_trajectories)
+                    )
+                np.save(
+                    params.file
+                    + "/"
+                    + dset_name.replace(".", "-")
+                    + "/"
+                    + str(num_trajectories)
+                    + "/inputs.npy",
+                    inputs.cpu().numpy(),
+                )
+                np.save(
+                    params.file
+                    + "/"
+                    + dset_name.replace(".", "-")
+                    + "/"
+                    + str(num_trajectories)
+                    + "/labels.npy",
+                    labels[: params.save_n_samples],
+                )
+                np.save(
+                    params.file
+                    + "/"
+                    + dset_name.replace(".", "-")
+                    + "/"
+                    + str(num_trajectories)
+                    + "/"
+                    + "outputs.npy",
+                    outputs[: params.save_n_samples],
+                )
+    else:
+        if params.mode == "eval":
+            dataset = get_test_set(
+                params.dataset,
+                params.data_path,
+                params.initial_time,
+                params.final_time,
+                dataset_kwargs,
+            )
+            trainer = get_trainer(
+                params.model_path,
+                params.batch_size,
+                dataset,
+                full_data=params.full_data,
+            )
+            _, _, metrics = rollout(
+                trainer,
+                dataset,
+                ar_steps=params.ar_steps,
+                output_all_steps=False,
+            )
+            data = {
+                "dataset": params.dataset,
+                "initial_time": params.initial_time,
+                "final_time": params.final_time,
+                "ar_steps": ar_steps,
+                **metrics,
+            }
+            data = [remove_underscore_dict(data)]
+        elif params.mode == "eval_sweep":
+            api = wandb.Api()
+            sweep = api.sweep(
+                params.wandb_entity
+                + "/"
+                + params.wandb_project
+                + "/"
+                + params.wandb_sweep_id
+            )
+            data = []
+            for run in sweep.runs:
+                if run.state == "finished" or (
+                    params.allow_failed and run.state == "failed"
+                ):
+                    dset_name = (
+                        run.config["dataset"]
+                        if not params.append_time
+                        else run.config["dataset"] + ".time"
+                    )
+                    if dset_name in params.exclude_dataset:
+                        continue
+                    if (
+                        len(params.exclusively_evaluate_dataset) > 0
+                        and dset_name not in params.exclusively_evaluate_dataset
+                    ):
+                        continue
+                    num_trajectories = run.config["num_trajectories"]
+                    ckpt_dir = (
+                        params.ckpt_dir
+                        + "/"
+                        + params.wandb_project
+                        + "/"
+                        + params.wandb_sweep_id
+                        + "/"
+                        + run.name
+                    )
+                    items = os.listdir(ckpt_dir)
+                    dirs = [
+                        item
+                        for item in items
+                        if os.path.isdir(os.path.join(ckpt_dir, item))
+                    ]
+                    if len(dirs) > 1:
+                        print(
+                            "WARNING: more than one checkpoint in run directory "
+                            + ckpt_dir
+                        )
+                        print("choosing " + dirs[0])
+                        continue
+                    if len(dirs) == 0:
+                        continue
+                    model_path = os.path.join(ckpt_dir, dirs[0])
+                    dataset = get_test_set(
+                        dset_name,
+                        params.data_path,
+                        params.initial_time,
+                        params.final_time,
+                        dataset_kwargs,
+                    )
+                    trainer = get_trainer(
+                        model_path,
+                        params.batch_size,
+                        dataset,
+                        full_data=params.full_data,
+                    )
+                    _, _, metrics = rollout(
+                        trainer,
+                        dataset,
+                        ar_steps=params.ar_steps,
+                        output_all_steps=False,
+                    )
+                    data.append(
+                        remove_underscore_dict(
+                            {
+                                "dataset": dset_name,
+                                "num_trajectories": num_trajectories,
+                                "initial_time": params.initial_time,
+                                "final_time": params.final_time,
+                                "ar_steps": ar_steps,
+                                **metrics,
+                            }
+                        )
+                    )
+        elif params.mode == "eval_accumulation_error":
+            dataset = get_test_set(
+                params.dataset,
+                params.data_path,
+                params.initial_time,
+                params.final_time,
+                dataset_kwargs,
+            )
+            trainer = get_trainer(
+                params.model_path,
+                params.batch_size,
+                dataset,
+                output_all_steps=True,
+                full_data=params.full_data,
+            )
+            predictions, _, _ = rollout(
+                trainer,
+                dataset,
+                ar_steps=params.ar_steps,
+                output_all_steps=True,
+            )
+            labels = get_trajectories(
+                params.dataset,
+                params.data_path,
+                params.ar_steps,
+                params.initial_time,
+                params.final_time,
+                dataset_kwargs,
+            )
+            def compute_metrics(eval_preds):
+                channel_list = dataset.channel_slice_list
+                def get_relative_statistics(errors):
+                    median_error = np.median(errors, axis=0)
+                    mean_error = np.mean(errors, axis=0)
+                    std_error = np.std(errors, axis=0)
+                    min_error = np.min(errors, axis=0)
+                    max_error = np.max(errors, axis=0)
+                    return {
+                        "median_relative_l1_error": median_error,
+                        "mean_relative_l1_error": mean_error,
+                        "std_relative_l1_error": std_error,
+                        "min_relative_l1_error": min_error,
+                        "max_relative_l1_error": max_error,
+                    }
+                def get_statistics(errors):
+                    median_error = np.median(errors, axis=0)
+                    mean_error = np.mean(errors, axis=0)
+                    std_error = np.std(errors, axis=0)
+                    min_error = np.min(errors, axis=0)
+                    max_error = np.max(errors, axis=0)
+                    return {
+                        "median_l1_error": median_error,
+                        "mean_l1_error": mean_error,
+                        "std_l1_error": std_error,
+                        "min_l1_error": min_error,
+                        "max_l1_error": max_error,
+                    }
+                relative_errors = [
+                    relative_lp_error(
+                        eval_preds.predictions[
+                            :, channel_list[i] : channel_list[i + 1]
+                        ],
+                        eval_preds.label_ids[:, channel_list[i] : channel_list[i + 1]],
+                        p=1,
+                        return_percent=True,
+                    )
+                    for i in range(len(channel_list) - 1)
+                ]
+                errors = [
+                    lp_error(
+                        eval_preds.predictions[
+                            :, channel_list[i] : channel_list[i + 1]
+                        ],
+                        eval_preds.label_ids[:, channel_list[i] : channel_list[i + 1]],
+                        p=1,
+                    )
+                    for i in range(len(channel_list) - 1)
+                ]
+                relative_error_statistics = [
+                    get_relative_statistics(relative_errors[i])
+                    for i in range(len(channel_list) - 1)
+                ]
+                error_statistics = [
+                    get_statistics(errors[i]) for i in range(len(channel_list) - 1)
+                ]
+                if dataset.output_dim == 1:
+                    relative_error_statistics = relative_error_statistics[0]
+                    error_statistics = error_statistics[0]
+                    if params.full_data:
+                        relative_error_statistics["relative_full_data"] = (
+                            relative_errors[0].tolist()
+                        )
+                        error_statistics["full_data"] = errors[0].tolist()
+                    return {**relative_error_statistics, **error_statistics}
+                else:
+                    mean_over_relative_means = np.mean(
+                        np.array(
+                            [
+                                stats["mean_relative_l1_error"]
+                                for stats in relative_error_statistics
+                            ]
+                        ),
+                        axis=0,
+                    )
+                    mean_over_relative_medians = np.mean(
+                        np.array(
+                            [
+                                stats["median_relative_l1_error"]
+                                for stats in relative_error_statistics
+                            ]
+                        ),
+                        axis=0,
+                    )
+                    mean_over_means = np.mean(
+                        np.array(
+                            [stats["mean_l1_error"] for stats in error_statistics]
+                        ),
+                        axis=0,
+                    )
+                    mean_over_medians = np.mean(
+                        np.array(
+                            [stats["median_l1_error"] for stats in error_statistics]
+                        ),
+                        axis=0,
+                    )
+                    error_statistics_ = {
+                        "mean_relative_l1_error": mean_over_relative_means,
+                        "mean_over_median_relative_l1_error": mean_over_relative_medians,
+                        "mean_l1_error": mean_over_means,
+                        "mean_over_median_l1_error": mean_over_medians,
+                    }
+                    #!! The above is different from train and finetune (here mean_relative_l1_error is mean over medians instead of mean over means)
+                    for i, stats in enumerate(relative_error_statistics):
+                        for key, value in stats.items():
+                            error_statistics_[
+                                dataset.printable_channel_description[i] + "/" + key
+                            ] = value
+                            if params.full_data:
+                                error_statistics_[
+                                    dataset.printable_channel_description[i]
+                                    + "/"
+                                    + "relative_full_data"
+                                ] = relative_errors[i].tolist()
+                    for i, stats in enumerate(error_statistics):
+                        for key, value in stats.items():
+                            error_statistics_[
+                                dataset.printable_channel_description[i] + "/" + key
+                            ] = value
+                            if params.full_data:
+                                error_statistics_[
+                                    dataset.printable_channel_description[i]
+                                    + "/"
+                                    + "full_data"
+                                ] = errors[i].tolist()
+                    return error_statistics_
+            data = []
+            for step in range(predictions.shape[1]):
+                metrics = compute_metrics(
+                    EvalPrediction(predictions[:, step], labels[:, step].cpu().numpy())
+                )
+                if isinstance(params.ar_steps, int):
+                    delta = (params.final_time - params.initial_time) // params.ar_steps
+                else:
+                    delta = params.ar_steps[step]
+                data.append(
+                    remove_underscore_dict(
+                        {
+                            "dataset": params.dataset,
+                            "initial_time": params.initial_time + step * delta,
+                            "final_time": params.initial_time + (step + 1) * delta,
+                            **metrics,
+                        }
+                    )
+                )
+        elif params.mode == "eval_resolutions":
+            data = []
+            for resolution in params.resolutions:
+                dataset_kwargs = {"resolution": resolution}
+                dataset = get_test_set(
+                    params.dataset,
+                    params.data_path,
+                    params.initial_time,
+                    params.final_time,
+                    dataset_kwargs,
+                )
+                trainer = get_trainer(
+                    params.model_path,
+                    params.batch_size,
+                    dataset,
+                    full_data=params.full_data,
+                )
+                _, _, metrics = rollout(
+                    trainer,
+                    dataset,
+                    ar_steps=params.ar_steps,
+                    output_all_steps=False,
+                )
+                data.append(
+                    remove_underscore_dict(
+                        {
+                            "dataset": params.dataset,
+                            "initial_time": params.initial_time,
+                            "final_time": params.final_time,
+                            "ar_steps": ar_steps,
+                            "resolution": resolution,
+                            **metrics,
+                        }
+                    )
+                )
+        if os.path.exists(params.file):
+            df = pd.read_csv(params.file)
+        else:
+            df = pd.DataFrame()
+        df = pd.concat([df, pd.DataFrame(data)], ignore_index=True)
+        df.to_csv(params.file, index=False)

external/poseidon/scOT/metrics.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import numpy as np
+def lp_error(preds: np.ndarray, targets: np.ndarray, p=1):
+    num_samples, num_channels, _, _ = preds.shape
+    preds = preds.reshape(num_samples, num_channels, -1)
+    targets = targets.reshape(num_samples, num_channels, -1)
+    errors = np.sum(np.abs(preds - targets) ** p, axis=-1)
+    return np.sum(errors, axis=-1) ** (1 / p)
+def relative_lp_error(
+    preds: np.ndarray,
+    targets: np.ndarray,
+    p=1,
+    return_percent=True,
+):
+    num_samples, num_channels, _, _ = preds.shape
+    preds = preds.reshape(num_samples, num_channels, -1)
+    targets = targets.reshape(num_samples, num_channels, -1)
+    errors = np.sum(np.abs(preds - targets) ** p, axis=-1)
+    normalization_factor = np.sum(np.abs(targets) ** p, axis=-1)
+    # catch 0 division
+    normalization_factor = np.sum(normalization_factor, axis=-1)
+    normalization_factor = np.where(
+        normalization_factor == 0, 1e-10, normalization_factor
+    )
+    errors = (np.sum(errors, axis=-1) / normalization_factor) ** (1 / p)
+    if return_percent:
+        errors *= 100
+    return errors
+def mean_relative_lp_error(
+    preds: np.ndarray,
+    targets: np.ndarray,
+    p=1,
+    return_percent=True,
+):
+    errors = relative_lp_error(preds, targets, p, return_percent)
+    return np.mean(errors, axis=0)
+def median_relative_lp_error(
+    preds: np.ndarray,
+    targets: np.ndarray,
+    p=1,
+    return_percent=True,
+):
+    errors = relative_lp_error(preds, targets, p, return_percent)
+    return np.median(errors, axis=0)

external/poseidon/scOT/model.py ADDED Viewed

	@@ -0,0 +1,1485 @@

+"""
+This file contains scOT.
+A lot of this file is taken from the transformers library and changed to our purposes. Huggingface Transformers is licensed under
+Apache 2.0 License, see trainer.py for details.
+We follow https://github.com/huggingface/transformers/blob/v4.35.2/src/transformers/models/swinv2/configuration_swinv2.py
+and https://github.com/huggingface/transformers/blob/v4.35.2/src/transformers/models/swinv2/modeling_swinv2.py#L1129
+The class ConvNeXtBlock is taken from the facebookresearch/ConvNeXt repository and is licensed under the MIT License,
+MIT License
+Copyright (c) Meta Platforms, Inc. and affiliates.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+from transformers import (
+    Swinv2PreTrainedModel,
+    PretrainedConfig,
+)
+from transformers.models.swinv2.modeling_swinv2 import (
+    Swinv2EncoderOutput,
+    Swinv2Attention,
+    Swinv2DropPath,
+    Swinv2Intermediate,
+    Swinv2Output,
+    window_reverse,
+    window_partition,
+)
+from transformers.utils import ModelOutput
+from dataclasses import dataclass
+import torch
+from torch import nn
+from typing import Optional, Union, Tuple, List
+import math
+import collections
+@dataclass
+class ScOTOutput(ModelOutput):
+    loss: Optional[torch.FloatTensor] = None
+    output: torch.FloatTensor = None
+    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
+    attentions: Optional[Tuple[torch.FloatTensor]] = None
+    reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
+class ScOTConfig(PretrainedConfig):
+    """https://github.com/huggingface/transformers/blob/v4.35.2/src/transformers/models/swinv2/configuration_swinv2.py"""
+    model_type = "swinv2"
+    attribute_map = {
+        "num_attention_heads": "num_heads",
+        "num_hidden_layers": "num_layers",
+    }
+    def __init__(
+        self,
+        image_size=224,
+        patch_size=4,
+        num_channels=3,
+        num_out_channels=1,
+        embed_dim=96,
+        depths=[2, 2, 6, 2],
+        num_heads=[3, 6, 12, 24],
+        skip_connections=[True, True, True],
+        window_size=7,
+        mlp_ratio=4.0,
+        qkv_bias=True,
+        hidden_dropout_prob=0.0,
+        attention_probs_dropout_prob=0.0,
+        drop_path_rate=0.1,
+        hidden_act="gelu",
+        use_absolute_embeddings=False,
+        initializer_range=0.02,
+        layer_norm_eps=1e-5,
+        p=1,  # for loss: 1 for l1, 2 for l2
+        channel_slice_list_normalized_loss=None,  # if None will fall back to absolute loss otherwise normalized loss with split channels
+        residual_model="convnext",  # "convnext" or "resnet"
+        use_conditioning=False,
+        learn_residual=False,  # learn the residual for time-dependent problems
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.image_size = image_size
+        self.patch_size = patch_size
+        self.num_channels = num_channels
+        self.embed_dim = embed_dim
+        self.depths = depths
+        self.num_layers = len(depths)
+        self.num_heads = num_heads
+        self.skip_connections = skip_connections
+        self.window_size = window_size
+        self.mlp_ratio = mlp_ratio
+        self.qkv_bias = qkv_bias
+        self.hidden_dropout_prob = hidden_dropout_prob
+        self.attention_probs_dropout_prob = attention_probs_dropout_prob
+        self.drop_path_rate = drop_path_rate
+        self.hidden_act = hidden_act
+        self.use_absolute_embeddings = use_absolute_embeddings
+        self.use_conditioning = use_conditioning
+        self.learn_residual = learn_residual if self.use_conditioning else False
+        self.layer_norm_eps = layer_norm_eps
+        self.initializer_range = initializer_range
+        # we set the hidden_size attribute in order to make Swinv2 work with VisionEncoderDecoderModel
+        # this indicates the channel dimension after the last stage of the model
+        self.hidden_size = int(embed_dim * 2 ** (len(depths) - 1))
+        self.pretrained_window_sizes = (0, 0, 0, 0)
+        self.num_out_channels = num_out_channels
+        self.p = p
+        self.channel_slice_list_normalized_loss = channel_slice_list_normalized_loss
+        self.residual_model = residual_model
+class LayerNorm(nn.LayerNorm):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+    def forward(self, x, time):
+        return super().forward(x)
+class ConditionalLayerNorm(nn.Module):
+    def __init__(self, dim, eps=1e-5):
+        super().__init__()
+        self.eps = eps
+        self.weight = nn.Linear(1, dim)
+        self.bias = nn.Linear(1, dim)
+    def forward(self, x, time):
+        mean = x.mean(dim=-1, keepdim=True)
+        var = (x**2).mean(dim=-1, keepdim=True) - mean**2
+        x = (x - mean) / (var + self.eps).sqrt()
+        time = time.reshape(-1, 1).type_as(x)
+        weight = self.weight(time).unsqueeze(1)
+        bias = self.bias(time).unsqueeze(1)
+        if x.dim() == 4:
+            weight = weight.unsqueeze(1)
+            bias = bias.unsqueeze(1)
+        return weight * x + bias
+class ConvNeXtBlock(nn.Module):
+    r"""Taken from: https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py
+    ConvNeXt Block. There are two equivalent implementations:
+    (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
+    (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
+    We use (2) as we find it slightly faster in PyTorch
+    Args:
+        dim (int): Number of input channels.
+        drop_path (float): Stochastic depth rate. Default: 0.0
+        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+    """
+    def __init__(self, config, dim, drop_path=0.0, layer_scale_init_value=1e-6):
+        super().__init__()
+        self.dwconv = nn.Conv2d(
+            dim, dim, kernel_size=7, padding=3, groups=dim
+        )  # depthwise conv
+        if config.use_conditioning:
+            layer_norm = ConditionalLayerNorm
+        else:
+            layer_norm = LayerNorm
+        self.norm = layer_norm(dim, eps=config.layer_norm_eps)
+        self.pwconv1 = nn.Linear(
+            dim, 4 * dim
+        )  # pointwise/1x1 convs, implemented with linear layers
+        self.act = nn.GELU()
+        self.pwconv2 = nn.Linear(4 * dim, dim)
+        self.weight = (
+            nn.Parameter(layer_scale_init_value * torch.ones((dim)), requires_grad=True)
+            if layer_scale_init_value > 0
+            else None
+        )  # was gamma before
+        self.drop_path = Swinv2DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
+    def forward(self, x, time):
+        batch_size, sequence_length, hidden_size = x.shape
+        #! assumes square images
+        input_dim = math.floor(sequence_length**0.5)
+        input = x
+        x = x.reshape(batch_size, input_dim, input_dim, hidden_size)
+        x = x.permute(0, 3, 1, 2)
+        x = self.dwconv(x)
+        x = x.permute(0, 2, 3, 1)  # (N, C, H, W) -> (N, H, W, C)
+        x = self.norm(x, time)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.pwconv2(x)
+        if self.weight is not None:
+            x = self.weight * x
+        x = x.reshape(batch_size, sequence_length, hidden_size)
+        x = input + self.drop_path(x)
+        return x
+class ResNetBlock(nn.Module):
+    def __init__(self, config, dim):
+        super().__init__()
+        kernel_size = 3
+        pad = (kernel_size - 1) // 2
+        self.conv1 = nn.Conv2d(dim, dim, kernel_size=kernel_size, stride=1, padding=pad)
+        self.conv2 = nn.Conv2d(dim, dim, kernel_size=kernel_size, stride=1, padding=pad)
+        self.bn1 = nn.BatchNorm2d(dim)
+        self.bn2 = nn.BatchNorm2d(dim)
+    def forward(self, x, time):
+        batch_size, sequence_length, hidden_size = x.shape
+        #! assumes square images
+        input_dim = math.floor(sequence_length**0.5)
+        input = x
+        x = x.reshape(batch_size, input_dim, input_dim, hidden_size)
+        x = x.permute(0, 3, 1, 2)
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = nn.functional.leaky_relu(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = x.permute(0, 2, 3, 1)
+        x = x.reshape(batch_size, sequence_length, hidden_size)
+        x = x + input
+        return x
+class ScOTPatchEmbeddings(nn.Module):
+    """
+    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
+    `hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
+    Transformer.
+    """
+    def __init__(self, config):
+        super().__init__()
+        image_size, patch_size = config.image_size, config.patch_size
+        num_channels, hidden_size = config.num_channels, config.embed_dim
+        image_size = (
+            image_size
+            if isinstance(image_size, collections.abc.Iterable)
+            else (image_size, image_size)
+        )
+        patch_size = (
+            patch_size
+            if isinstance(patch_size, collections.abc.Iterable)
+            else (patch_size, patch_size)
+        )
+        num_patches = (image_size[1] // patch_size[1]) * (
+            image_size[0] // patch_size[0]
+        )
+        self.image_size = image_size
+        self.patch_size = patch_size
+        self.num_channels = num_channels
+        self.num_patches = num_patches
+        self.grid_size = (
+            image_size[0] // patch_size[0],
+            image_size[1] // patch_size[1],
+        )
+        self.projection = nn.Conv2d(
+            num_channels, hidden_size, kernel_size=patch_size, stride=patch_size
+        )
+    def maybe_pad(self, pixel_values, height, width):
+        if width % self.patch_size[1] != 0:
+            pad_values = (0, self.patch_size[1] - width % self.patch_size[1])
+            pixel_values = nn.functional.pad(pixel_values, pad_values)
+        if height % self.patch_size[0] != 0:
+            pad_values = (0, 0, 0, self.patch_size[0] - height % self.patch_size[0])
+            pixel_values = nn.functional.pad(pixel_values, pad_values)
+        return pixel_values
+    def forward(
+        self, pixel_values: Optional[torch.FloatTensor]
+    ) -> Tuple[torch.Tensor, Tuple[int]]:
+        _, num_channels, height, width = pixel_values.shape
+        if num_channels != self.num_channels:
+            raise ValueError(
+                "Make sure that the channel dimension of the pixel values match with the one set in the configuration."
+            )
+        # pad the input to be divisible by self.patch_size, if needed
+        pixel_values = self.maybe_pad(pixel_values, height, width)
+        embeddings = self.projection(pixel_values)
+        _, _, height, width = embeddings.shape
+        output_dimensions = (height, width)
+        embeddings = embeddings.flatten(2).transpose(1, 2)
+        return embeddings, output_dimensions
+class ScOTEmbeddings(nn.Module):
+    """
+    Construct the patch and position embeddings. Optionally, also the mask token.
+    """
+    def __init__(self, config, use_mask_token=False):
+        super().__init__()
+        self.patch_embeddings = ScOTPatchEmbeddings(config)
+        num_patches = self.patch_embeddings.num_patches
+        self.patch_grid = self.patch_embeddings.grid_size
+        self.mask_token = (
+            nn.Parameter(torch.zeros(1, 1, config.embed_dim))
+            if use_mask_token
+            else None
+        )
+        if config.use_absolute_embeddings:
+            self.position_embeddings = nn.Parameter(
+                torch.zeros(1, num_patches, config.embed_dim)
+            )
+        else:
+            self.position_embeddings = None
+        if config.use_conditioning:
+            layer_norm = ConditionalLayerNorm
+        else:
+            layer_norm = LayerNorm
+        self.norm = layer_norm(config.embed_dim)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+    def forward(
+        self,
+        pixel_values: Optional[torch.FloatTensor],
+        bool_masked_pos: Optional[torch.BoolTensor] = None,
+        time: Optional[torch.FloatTensor] = None,
+    ) -> Tuple[torch.Tensor]:
+        embeddings, output_dimensions = self.patch_embeddings(pixel_values)
+        embeddings = self.norm(embeddings, time)
+        batch_size, seq_len, _ = embeddings.size()
+        if bool_masked_pos is not None:
+            mask_tokens = self.mask_token.expand(batch_size, seq_len, -1)
+            # replace the masked visual tokens by mask_tokens
+            mask = bool_masked_pos.unsqueeze(-1).type_as(mask_tokens)
+            embeddings = embeddings * (1.0 - mask) + mask_tokens * mask
+        if self.position_embeddings is not None:
+            embeddings = embeddings + self.position_embeddings
+        embeddings = self.dropout(embeddings)
+        return embeddings, output_dimensions
+class ScOTLayer(nn.Module):
+    def __init__(
+        self,
+        config,
+        dim,
+        input_resolution,
+        num_heads,
+        drop_path=0.0,
+        shift_size=0,
+        pretrained_window_size=0,
+    ):
+        super().__init__()
+        self.chunk_size_feed_forward = config.chunk_size_feed_forward
+        self.shift_size = shift_size
+        self.window_size = config.window_size
+        self.input_resolution = input_resolution
+        self.set_shift_and_window_size(input_resolution)
+        self.attention = Swinv2Attention(
+            config=config,
+            dim=dim,
+            num_heads=num_heads,
+            window_size=self.window_size,
+            pretrained_window_size=(
+                pretrained_window_size
+                if isinstance(pretrained_window_size, collections.abc.Iterable)
+                else (pretrained_window_size, pretrained_window_size)
+            ),
+        )
+        if config.use_conditioning:
+            layer_norm = ConditionalLayerNorm
+        else:
+            layer_norm = LayerNorm
+        self.layernorm_before = layer_norm(dim, eps=config.layer_norm_eps)
+        self.drop_path = Swinv2DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
+        self.intermediate = Swinv2Intermediate(config, dim)
+        self.output = Swinv2Output(config, dim)
+        self.layernorm_after = layer_norm(dim, eps=config.layer_norm_eps)
+    def set_shift_and_window_size(self, input_resolution):
+        target_window_size = (
+            self.window_size
+            if isinstance(self.window_size, collections.abc.Iterable)
+            else (self.window_size, self.window_size)
+        )
+        target_shift_size = (
+            self.shift_size
+            if isinstance(self.shift_size, collections.abc.Iterable)
+            else (self.shift_size, self.shift_size)
+        )
+        window_dim = (
+            input_resolution[0].item()
+            if torch.is_tensor(input_resolution[0])
+            else input_resolution[0]
+        )
+        self.window_size = (
+            window_dim if window_dim <= target_window_size[0] else target_window_size[0]
+        )
+        self.shift_size = (
+            0
+            if input_resolution
+            <= (
+                self.window_size
+                if isinstance(self.window_size, collections.abc.Iterable)
+                else (self.window_size, self.window_size)
+            )
+            else target_shift_size[0]
+        )
+    def get_attn_mask(self, height, width, dtype):
+        if self.shift_size > 0:
+            # calculate attention mask for shifted window multihead self attention
+            img_mask = torch.zeros((1, height, width, 1), dtype=dtype)
+            height_slices = (
+                slice(0, -self.window_size),
+                slice(-self.window_size, -self.shift_size),
+                slice(-self.shift_size, None),
+            )
+            width_slices = (
+                slice(0, -self.window_size),
+                slice(-self.window_size, -self.shift_size),
+                slice(-self.shift_size, None),
+            )
+            count = 0
+            for height_slice in height_slices:
+                for width_slice in width_slices:
+                    img_mask[:, height_slice, width_slice, :] = count
+                    count += 1
+            mask_windows = window_partition(img_mask, self.window_size)
+            mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
+            attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+            attn_mask = attn_mask.masked_fill(
+                attn_mask != 0, float(-100.0)
+            ).masked_fill(attn_mask == 0, float(0.0))
+        else:
+            attn_mask = None
+        return attn_mask
+    def maybe_pad(self, hidden_states, height, width):
+        pad_right = (self.window_size - width % self.window_size) % self.window_size
+        pad_bottom = (self.window_size - height % self.window_size) % self.window_size
+        pad_values = (0, 0, 0, pad_right, 0, pad_bottom)
+        hidden_states = nn.functional.pad(hidden_states, pad_values)
+        return hidden_states, pad_values
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        input_dimensions: Tuple[int, int],
+        time: torch.Tensor,
+        head_mask: Optional[torch.FloatTensor] = None,
+        output_attentions: Optional[bool] = False,
+        always_partition: Optional[bool] = False,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        if not always_partition:
+            self.set_shift_and_window_size(input_dimensions)
+        else:
+            pass
+        height, width = input_dimensions
+        batch_size, _, channels = hidden_states.size()
+        shortcut = hidden_states
+        # pad hidden_states to multiples of window size
+        hidden_states = hidden_states.view(batch_size, height, width, channels)
+        hidden_states, pad_values = self.maybe_pad(hidden_states, height, width)
+        _, height_pad, width_pad, _ = hidden_states.shape
+        # cyclic shift
+        if self.shift_size > 0:
+            shifted_hidden_states = torch.roll(
+                hidden_states, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2)
+            )
+        else:
+            shifted_hidden_states = hidden_states
+        # partition windows
+        hidden_states_windows = window_partition(
+            shifted_hidden_states, self.window_size
+        )
+        hidden_states_windows = hidden_states_windows.view(
+            -1, self.window_size * self.window_size, channels
+        )
+        attn_mask = self.get_attn_mask(height_pad, width_pad, dtype=hidden_states.dtype)
+        if attn_mask is not None:
+            attn_mask = attn_mask.to(hidden_states_windows.device)
+        attention_outputs = self.attention(
+            hidden_states_windows,
+            attn_mask,
+            head_mask,
+            output_attentions=output_attentions,
+        )
+        attention_output = attention_outputs[0]
+        attention_windows = attention_output.view(
+            -1, self.window_size, self.window_size, channels
+        )
+        shifted_windows = window_reverse(
+            attention_windows, self.window_size, height_pad, width_pad
+        )
+        # reverse cyclic shift
+        if self.shift_size > 0:
+            attention_windows = torch.roll(
+                shifted_windows, shifts=(self.shift_size, self.shift_size), dims=(1, 2)
+            )
+        else:
+            attention_windows = shifted_windows
+        was_padded = pad_values[3] > 0 or pad_values[5] > 0
+        if was_padded:
+            attention_windows = attention_windows[:, :height, :width, :].contiguous()
+        attention_windows = attention_windows.view(batch_size, height * width, channels)
+        hidden_states = self.layernorm_before(attention_windows, time)
+        hidden_states = shortcut + self.drop_path(hidden_states)
+        layer_output = self.intermediate(hidden_states)
+        layer_output = self.output(layer_output)
+        layer_output = hidden_states + self.drop_path(
+            self.layernorm_after(layer_output, time)
+        )
+        layer_outputs = (
+            (layer_output, attention_outputs[1])
+            if output_attentions
+            else (layer_output,)
+        )
+        return layer_outputs
+class ScOTPatchRecovery(nn.Module):
+    """https://github.com/198808xc/Pangu-Weather/blob/main/pseudocode.py"""
+    def __init__(self, config):
+        super().__init__()
+        image_size, patch_size = config.image_size, config.patch_size
+        num_out_channels, hidden_size = (
+            config.num_out_channels,
+            config.embed_dim,  # if not config.skip_connections[0] else 2 * config.embed_dim,
+        )
+        image_size = (
+            image_size
+            if isinstance(image_size, collections.abc.Iterable)
+            else (image_size, image_size)
+        )
+        patch_size = (
+            patch_size
+            if isinstance(patch_size, collections.abc.Iterable)
+            else (patch_size, patch_size)
+        )
+        num_patches = (image_size[0] // patch_size[0]) * (
+            image_size[1] // patch_size[1]
+        )
+        self.num_patches = num_patches
+        self.patch_size = patch_size
+        self.image_size = image_size
+        self.num_out_channels = num_out_channels
+        self.grid_size = (
+            image_size[0] // patch_size[0],
+            image_size[1] // patch_size[1],
+        )
+        self.projection = nn.ConvTranspose2d(
+            in_channels=hidden_size,
+            out_channels=num_out_channels,
+            kernel_size=patch_size,
+            stride=patch_size,
+        )
+        # the following is not done in Pangu
+        self.mixup = nn.Conv2d(
+            num_out_channels,
+            num_out_channels,
+            kernel_size=5,
+            stride=1,
+            padding=2,
+            bias=False,
+        )
+    def maybe_crop(self, pixel_values, height, width):
+        if pixel_values.shape[2] > height:
+            pixel_values = pixel_values[:, :, :height, :]
+        if pixel_values.shape[3] > width:
+            pixel_values = pixel_values[:, :, :, :width]
+        return pixel_values
+    def forward(self, hidden_states):
+        hidden_states = hidden_states.transpose(1, 2)
+        hidden_states = hidden_states.reshape(
+            hidden_states.shape[0], hidden_states.shape[1], *self.grid_size
+        )
+        output = self.projection(hidden_states)
+        output = self.maybe_crop(output, self.image_size[0], self.image_size[1])
+        return self.mixup(output)
+class ScOTPatchMerging(nn.Module):
+    """
+    Patch Merging Layer.
+    Args:
+        input_resolution (`Tuple[int]`):
+            Resolution of input feature.
+        dim (`int`):
+            Number of input channels.
+        norm_layer (`nn.Module`, *optional*, defaults to `nn.LayerNorm`):
+            Normalization layer class.
+    """
+    def __init__(
+        self, input_resolution: Tuple[int], dim: int, norm_layer: nn.Module = LayerNorm
+    ) -> None:
+        super().__init__()
+        self.input_resolution = input_resolution
+        self.dim = dim
+        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
+        self.norm = norm_layer(2 * dim)
+    def maybe_pad(self, input_feature, height, width):
+        should_pad = (height % 2 == 1) or (width % 2 == 1)
+        if should_pad:
+            pad_values = (0, 0, 0, width % 2, 0, height % 2)
+            input_feature = nn.functional.pad(input_feature, pad_values)
+        return input_feature
+    def forward(
+        self,
+        input_feature: torch.Tensor,
+        input_dimensions: Tuple[int, int],
+        time: torch.Tensor,
+    ) -> torch.Tensor:
+        height, width = input_dimensions
+        # `dim` is height * width
+        batch_size, dim, num_channels = input_feature.shape
+        input_feature = input_feature.view(batch_size, height, width, num_channels)
+        # pad input to be disible by width and height, if needed
+        input_feature = self.maybe_pad(input_feature, height, width)
+        # [batch_size, height/2, width/2, num_channels]
+        input_feature_0 = input_feature[:, 0::2, 0::2, :]
+        # [batch_size, height/2, width/2, num_channels]
+        input_feature_1 = input_feature[:, 1::2, 0::2, :]
+        # [batch_size, height/2, width/2, num_channels]
+        input_feature_2 = input_feature[:, 0::2, 1::2, :]
+        # [batch_size, height/2, width/2, num_channels]
+        input_feature_3 = input_feature[:, 1::2, 1::2, :]
+        # [batch_size, height/2 * width/2, 4*num_channels]
+        input_feature = torch.cat(
+            [input_feature_0, input_feature_1, input_feature_2, input_feature_3], -1
+        )
+        input_feature = input_feature.view(
+            batch_size, -1, 4 * num_channels
+        )  # [batch_size, height/2 * width/2, 4*C]
+        input_feature = self.reduction(input_feature)
+        input_feature = self.norm(input_feature, time)
+        return input_feature
+class ScOTPatchUnmerging(nn.Module):
+    def __init__(
+        self,
+        input_resolution: Tuple[int],
+        dim: int,
+        norm_layer: nn.Module = LayerNorm,
+    ) -> None:
+        super().__init__()
+        self.input_resolution = input_resolution
+        self.dim = dim
+        self.upsample = nn.Linear(dim, 2 * dim, bias=False)
+        self.mixup = nn.Linear(dim // 2, dim // 2, bias=False)
+        self.norm = norm_layer(dim // 2)
+    def maybe_crop(self, input_feature, height, width):
+        height_in, width_in = input_feature.shape[1], input_feature.shape[2]
+        if height_in > height:
+            input_feature = input_feature[:, :height, :, :]
+        if width_in > width:
+            input_feature = input_feature[:, :, :width, :]
+        return input_feature
+    def forward(
+        self,
+        input_feature: torch.Tensor,
+        output_dimensions: Tuple[int, int],
+        time: torch.Tensor,
+    ) -> torch.Tensor:
+        output_height, output_width = output_dimensions
+        batch_size, seq_len, hidden_size = input_feature.shape
+        #! assume square image
+        input_height = input_width = math.floor(seq_len**0.5)
+        input_feature = self.upsample(input_feature)
+        input_feature = input_feature.reshape(
+            batch_size, input_height, input_width, 2, 2, hidden_size // 2
+        )
+        input_feature = input_feature.permute(0, 1, 3, 2, 4, 5)
+        input_feature = input_feature.reshape(
+            batch_size, 2 * input_height, 2 * input_width, hidden_size // 2
+        )
+        input_feature = self.maybe_crop(input_feature, output_height, output_width)
+        input_feature = input_feature.reshape(batch_size, -1, hidden_size // 2)
+        input_feature = self.norm(input_feature, time)
+        return self.mixup(input_feature)
+class ScOTEncodeStage(nn.Module):
+    def __init__(
+        self,
+        config,
+        dim,
+        input_resolution,
+        depth,
+        num_heads,
+        drop_path,
+        downsample,
+        pretrained_window_size=0,
+    ):
+        super().__init__()
+        self.config = config
+        self.dim = dim
+        window_size = (
+            config.window_size
+            if isinstance(config.window_size, collections.abc.Iterable)
+            else (config.window_size, config.window_size)
+        )
+        self.blocks = nn.ModuleList(
+            [
+                ScOTLayer(
+                    config=config,
+                    dim=dim,
+                    input_resolution=input_resolution,
+                    num_heads=num_heads,
+                    shift_size=(
+                        [0, 0]
+                        if (i % 2 == 0)
+                        else [window_size[0] // 2, window_size[1] // 2]
+                    ),
+                    drop_path=drop_path[i],
+                    pretrained_window_size=pretrained_window_size,
+                )
+                for i in range(depth)
+            ]
+        )
+        # patch merging layer
+        if downsample is not None:
+            if config.use_conditioning:
+                layer_norm = ConditionalLayerNorm
+            else:
+                layer_norm = LayerNorm
+            self.downsample = downsample(
+                input_resolution, dim=dim, norm_layer=layer_norm
+            )
+        else:
+            self.downsample = None
+        self.pointing = False
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        input_dimensions: Tuple[int, int],
+        time: torch.Tensor,
+        head_mask: Optional[torch.FloatTensor] = None,
+        output_attentions: Optional[bool] = False,
+        always_partition: Optional[bool] = False,
+    ) -> Tuple[torch.Tensor]:
+        height, width = input_dimensions
+        inputs = hidden_states
+        for i, layer_module in enumerate(self.blocks):
+            layer_head_mask = head_mask[i] if head_mask is not None else None
+            layer_outputs = layer_module(
+                hidden_states,
+                input_dimensions,
+                time,
+                layer_head_mask,
+                output_attentions,
+                always_partition,
+            )
+            hidden_states = layer_outputs[0]
+        hidden_states_before_downsampling = hidden_states
+        if self.downsample is not None:
+            height_downsampled, width_downsampled = (height + 1) // 2, (width + 1) // 2
+            output_dimensions = (height, width, height_downsampled, width_downsampled)
+            hidden_states = self.downsample(
+                hidden_states_before_downsampling + inputs, input_dimensions, time
+            )
+        else:
+            output_dimensions = (height, width, height, width)
+        stage_outputs = (
+            hidden_states,
+            hidden_states_before_downsampling,
+            output_dimensions,
+        )
+        if output_attentions:
+            stage_outputs += layer_outputs[1:]
+        return stage_outputs
+class ScOTDecodeStage(nn.Module):
+    def __init__(
+        self,
+        config,
+        dim,
+        input_resolution,
+        depth,
+        num_heads,
+        drop_path,
+        upsample,
+        upsampled_size,
+        pretrained_window_size=0,
+    ):
+        super().__init__()
+        self.config = config
+        self.dim = dim
+        window_size = (
+            config.window_size
+            if isinstance(config.window_size, collections.abc.Iterable)
+            else (config.window_size, config.window_size)
+        )
+        self.blocks = nn.ModuleList(
+            [
+                ScOTLayer(
+                    config=config,
+                    dim=dim,
+                    input_resolution=input_resolution,
+                    num_heads=num_heads,
+                    shift_size=(
+                        [0, 0]
+                        if (i % 2 == 0)
+                        else [window_size[0] // 2, window_size[1] // 2]
+                    ),
+                    drop_path=drop_path[depth - 1 - i],  # TODO: reverse...
+                    pretrained_window_size=pretrained_window_size,
+                )
+                for i in reversed(range(depth))  # TODO: reverse here?
+            ]
+        )
+        if upsample is not None:
+            if config.use_conditioning:
+                layer_norm = ConditionalLayerNorm
+            else:
+                layer_norm = LayerNorm
+            self.upsample = upsample(input_resolution, dim=dim, norm_layer=layer_norm)
+            self.upsampled_size = upsampled_size
+        else:
+            self.upsample = None
+        self.pointing = False
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        input_dimensions: Tuple[int, int],
+        time: torch.Tensor,
+        head_mask: Optional[torch.Tensor] = None,
+        output_attentions: Optional[bool] = False,
+        always_partition: Optional[bool] = False,
+    ) -> Tuple[torch.Tensor]:
+        height, width = input_dimensions
+        for i, layer_module in enumerate(self.blocks):
+            layer_head_mask = head_mask[i] if head_mask is not None else None
+            layer_outputs = layer_module(
+                hidden_states,
+                input_dimensions,
+                time,
+                layer_head_mask,
+                output_attentions,
+                always_partition,
+            )
+            hidden_states = layer_outputs[0]
+        hidden_states_before_upsampling = hidden_states
+        if self.upsample is not None:
+            height_upsampled, width_upsampled = self.upsampled_size
+            output_dimensions = (height, width, height_upsampled, width_upsampled)
+            hidden_states = self.upsample(
+                hidden_states_before_upsampling,
+                (height_upsampled, width_upsampled),
+                time,
+            )
+        else:
+            output_dimensions = (height, width, height, width)
+        stage_outputs = (
+            hidden_states,
+            hidden_states_before_upsampling,
+            output_dimensions,
+        )
+        if output_attentions:
+            stage_outputs += layer_outputs[1:]
+        return stage_outputs
+class ScOTEncoder(nn.Module):
+    """
+    This is just a Swinv2Encoder with changed dpr.
+    We just have to change the drop path rate since we also have a decoder by default.
+    """
+    def __init__(self, config, grid_size, pretrained_window_sizes=(0, 0, 0, 0)):
+        super().__init__()
+        self.num_layers = len(config.depths)
+        self.config = config
+        if self.config.pretrained_window_sizes is not None:
+            pretrained_window_sizes = config.pretrained_window_sizes
+        drop_rates_encode_decode = torch.linspace(
+            0, config.drop_path_rate, 2 * sum(config.depths)
+        )
+        dpr = [
+            x.item()
+            for x in drop_rates_encode_decode[: drop_rates_encode_decode.shape[0] // 2]
+        ]
+        self.layers = nn.ModuleList(
+            [
+                ScOTEncodeStage(
+                    config=config,
+                    dim=int(config.embed_dim * 2**i_layer),
+                    input_resolution=(
+                        grid_size[0] // (2**i_layer),
+                        grid_size[1] // (2**i_layer),
+                    ),
+                    depth=config.depths[i_layer],
+                    num_heads=config.num_heads[i_layer],
+                    drop_path=dpr[
+                        sum(config.depths[:i_layer]) : sum(config.depths[: i_layer + 1])
+                    ],
+                    downsample=(
+                        ScOTPatchMerging if (i_layer < self.num_layers - 1) else None
+                    ),
+                    pretrained_window_size=pretrained_window_sizes[i_layer],
+                )
+                for i_layer in range(self.num_layers)
+            ]
+        )
+        self.gradient_checkpointing = False
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        input_dimensions: Tuple[int, int],
+        time: torch.Tensor,
+        head_mask: Optional[torch.FloatTensor] = None,
+        output_attentions: Optional[bool] = False,
+        output_hidden_states: Optional[bool] = False,
+        output_hidden_states_before_downsampling: Optional[bool] = False,
+        always_partition: Optional[bool] = False,
+        return_dict: Optional[bool] = True,
+    ) -> Union[Tuple, Swinv2EncoderOutput]:
+        all_hidden_states = () if output_hidden_states else None
+        all_reshaped_hidden_states = () if output_hidden_states else None
+        all_self_attentions = () if output_attentions else None
+        if output_hidden_states:
+            batch_size, _, hidden_size = hidden_states.shape
+            # rearrange b (h w) c -> b c h w
+            reshaped_hidden_state = hidden_states.view(
+                batch_size, *input_dimensions, hidden_size
+            )
+            reshaped_hidden_state = reshaped_hidden_state.permute(0, 3, 1, 2)
+            all_hidden_states += (hidden_states,)
+            all_reshaped_hidden_states += (reshaped_hidden_state,)
+        for i, layer_module in enumerate(self.layers):
+            layer_head_mask = head_mask[i] if head_mask is not None else None
+            if self.gradient_checkpointing and self.training:
+                layer_outputs = self._gradient_checkpointing_func(
+                    layer_module.__call__,
+                    hidden_states,
+                    input_dimensions,
+                    time,
+                    layer_head_mask,
+                    output_attentions,
+                )
+            else:
+                layer_outputs = layer_module(
+                    hidden_states,
+                    input_dimensions,
+                    time,
+                    layer_head_mask,
+                    output_attentions,
+                    always_partition,
+                )
+            hidden_states = layer_outputs[0]
+            hidden_states_before_downsampling = layer_outputs[1]
+            output_dimensions = layer_outputs[2]
+            input_dimensions = (output_dimensions[-2], output_dimensions[-1])
+            if output_hidden_states and output_hidden_states_before_downsampling:
+                batch_size, _, hidden_size = hidden_states_before_downsampling.shape
+                # rearrange b (h w) c -> b c h w
+                # here we use the original (not downsampled) height and width
+                reshaped_hidden_state = hidden_states_before_downsampling.view(
+                    batch_size,
+                    *(output_dimensions[0], output_dimensions[1]),
+                    hidden_size,
+                )
+                reshaped_hidden_state = reshaped_hidden_state.permute(0, 3, 1, 2)
+                all_hidden_states += (hidden_states_before_downsampling,)
+                all_reshaped_hidden_states += (reshaped_hidden_state,)
+            elif output_hidden_states and not output_hidden_states_before_downsampling:
+                batch_size, _, hidden_size = hidden_states.shape
+                # rearrange b (h w) c -> b c h w
+                reshaped_hidden_state = hidden_states.view(
+                    batch_size, *input_dimensions, hidden_size
+                )
+                reshaped_hidden_state = reshaped_hidden_state.permute(0, 3, 1, 2)
+                all_hidden_states += (hidden_states,)
+                all_reshaped_hidden_states += (reshaped_hidden_state,)
+            if output_attentions:
+                all_self_attentions += layer_outputs[3:]
+        if not return_dict:
+            return tuple(
+                v
+                for v in [hidden_states, all_hidden_states, all_self_attentions]
+                if v is not None
+            )
+        return Swinv2EncoderOutput(
+            last_hidden_state=hidden_states,
+            hidden_states=all_hidden_states,
+            attentions=all_self_attentions,
+            reshaped_hidden_states=all_reshaped_hidden_states,
+        )
+class ScOTDecoder(nn.Module):
+    """Here we do reverse encoder."""
+    def __init__(self, config, grid_size, pretrained_window_sizes=(0, 0, 0, 0)):
+        super().__init__()
+        self.num_layers = len(config.depths)
+        self.config = config
+        if self.config.pretrained_window_sizes is not None:
+            pretrained_window_sizes = config.pretrained_window_sizes
+        drop_rates_encode_decode = torch.linspace(
+            0, config.drop_path_rate, 2 * sum(config.depths)
+        )
+        dpr = [
+            x.item()
+            for x in drop_rates_encode_decode[drop_rates_encode_decode.shape[0] // 2 :]
+        ]
+        self.layers = nn.ModuleList(
+            [
+                ScOTDecodeStage(
+                    config=config,
+                    dim=int(config.embed_dim * 2**i_layer),
+                    input_resolution=(
+                        grid_size[0] // (2**i_layer),
+                        grid_size[1] // (2**i_layer),
+                    ),
+                    depth=config.depths[i_layer],
+                    num_heads=config.num_heads[i_layer],
+                    drop_path=dpr[
+                        sum(config.depths[i_layer + 1 :]) : sum(config.depths[i_layer:])
+                    ],
+                    upsample=ScOTPatchUnmerging if i_layer > 0 else None,
+                    upsampled_size=(
+                        grid_size[0] // (2 ** (i_layer - 1)),
+                        grid_size[1] // (2 ** (i_layer - 1)),
+                    ),
+                    pretrained_window_size=pretrained_window_sizes[i_layer],
+                )
+                for i_layer in reversed(range(self.num_layers))
+            ]
+        )
+        self.gradient_checkpointing = False
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        input_dimensions: Tuple[int, int],
+        skip_states: List[torch.FloatTensor],
+        time: torch.Tensor,
+        head_mask: Optional[torch.FloatTensor] = None,
+        output_attentions: Optional[bool] = False,
+        output_hidden_states: Optional[bool] = False,
+        output_hidden_states_before_upsampling: Optional[bool] = False,
+        always_partition: Optional[bool] = False,
+        return_dict: Optional[bool] = True,
+    ) -> Union[Tuple, Swinv2EncoderOutput]:
+        all_hidden_states = () if output_hidden_states else None
+        all_reshaped_hidden_states = () if output_hidden_states else None
+        all_self_attentions = () if output_attentions else None
+        if output_hidden_states:
+            batch_size, _, hidden_size = hidden_states.shape
+            # rearrange b (h w) c -> b c h w
+            reshaped_hidden_state = hidden_states.view(
+                batch_size, *input_dimensions, hidden_size
+            )
+            reshaped_hidden_state = reshaped_hidden_state.permute(0, 3, 1, 2)
+            all_hidden_states += (hidden_states,)
+            all_reshaped_hidden_states += (reshaped_hidden_state,)
+        for i, layer_module in enumerate(self.layers):
+            layer_head_mask = head_mask[i] if head_mask is not None else None
+            if i != 0 and skip_states[len(skip_states) - i] is not None:
+                # residual connection
+                hidden_states = hidden_states + skip_states[len(skip_states) - i]
+            if self.gradient_checkpointing and self.training:
+                layer_outputs = self._gradient_checkpointing_func(
+                    layer_module.__call__,
+                    hidden_states,
+                    input_dimensions,
+                    time,
+                    layer_head_mask,
+                    output_attentions,
+                )
+            else:
+                layer_outputs = layer_module(
+                    hidden_states,
+                    input_dimensions,
+                    time,
+                    layer_head_mask,
+                    output_attentions,
+                    always_partition,
+                )
+            hidden_states = layer_outputs[0]
+            hidden_states_before_upsampling = layer_outputs[1]
+            output_dimensions = layer_outputs[2]
+            input_dimensions = (output_dimensions[-2], output_dimensions[-1])
+            if output_hidden_states and output_hidden_states_before_upsampling:
+                batch_size, _, hidden_size = hidden_states_before_upsampling.shape
+                # rearrange b (h w) c -> b c h w
+                # here we use the original (not downsampled) height and width
+                reshaped_hidden_state = hidden_states_before_upsampling.view(
+                    batch_size,
+                    *(output_dimensions[0], output_dimensions[1]),
+                    hidden_size,
+                )
+                reshaped_hidden_state = reshaped_hidden_state.permute(0, 3, 1, 2)
+                all_hidden_states += (hidden_states_before_upsampling,)
+                all_reshaped_hidden_states += (reshaped_hidden_state,)
+            elif output_hidden_states and not output_hidden_states_before_upsampling:
+                batch_size, _, hidden_size = hidden_states.shape
+                # rearrange b (h w) c -> b c h w
+                reshaped_hidden_state = hidden_states.view(
+                    batch_size, *input_dimensions, hidden_size
+                )
+                reshaped_hidden_state = reshaped_hidden_state.permute(0, 3, 1, 2)
+                all_hidden_states += (hidden_states,)
+                all_reshaped_hidden_states += (reshaped_hidden_state,)
+            if output_attentions:
+                all_self_attentions += layer_outputs[3:]
+        if not return_dict:
+            return tuple(
+                v
+                for v in [hidden_states, all_hidden_states, all_self_attentions]
+                if v is not None
+            )
+        return Swinv2EncoderOutput(
+            last_hidden_state=hidden_states,
+            hidden_states=all_hidden_states,
+            attentions=all_self_attentions,
+            reshaped_hidden_states=all_reshaped_hidden_states,
+        )
+class ScOT(Swinv2PreTrainedModel):
+    """Inspired by https://github.com/huggingface/transformers/blob/v4.35.2/src/transformers/models/swinv2/modeling_swinv2.py#L1129"""
+    def __init__(self, config, use_mask_token=False):
+        super().__init__(config)
+        self.config = config
+        self.num_layers_encoder = len(config.depths)
+        self.num_layers_decoder = len(config.depths)
+        self.num_features = int(config.embed_dim * 2 ** (self.num_layers_encoder - 1))
+        self.embeddings = ScOTEmbeddings(config, use_mask_token=use_mask_token)
+        self.encoder = ScOTEncoder(config, self.embeddings.patch_grid)
+        self.decoder = ScOTDecoder(config, self.embeddings.patch_grid)
+        self.patch_recovery = ScOTPatchRecovery(config)
+        if config.residual_model == "convnext":
+            res_model = ConvNeXtBlock
+        elif config.residual_model == "resnet":
+            res_model = ResNetBlock
+        else:
+            raise ValueError("residual_model must be 'convnext' or 'resnet'")
+        self.residual_blocks = nn.ModuleList(
+            [
+                (
+                    nn.ModuleList(
+                        [
+                            res_model(config, config.embed_dim * 2**i)
+                            for _ in range(depth)
+                        ]
+                    )
+                    if depth > 0
+                    else nn.ModuleList([nn.Identity()])
+                )
+                for i, depth in enumerate(config.skip_connections)
+            ]
+        )
+        self.post_init()
+    def get_input_embeddings(self):
+        return self.embeddings.patch_embeddings
+    def _prune_heads(self, heads_to_prune):
+        for layer, heads in heads_to_prune.items():
+            self.encoder.layers[layer].attention.prune_heads(heads)
+        for layer, heads in reversed(heads_to_prune.items()):
+            self.decoder.layers[layer].attention.prune_heads(heads)
+    def _downsample(self, image, target_size):
+        image_size = image.shape[-2]
+        freqs = torch.fft.fftfreq(image_size, d=1 / image_size)
+        sel = torch.logical_and(freqs >= -target_size / 2, freqs <= target_size / 2 - 1)
+        image_hat = torch.fft.fft2(image, norm="forward")
+        image_hat = image_hat[:, :, sel, :][:, :, :, sel]
+        image = torch.fft.ifft2(image_hat, norm="forward").real
+        return image
+    def _upsample(self, image, target_size):
+        # https://stackoverflow.com/questions/71143279/upsampling-images-in-frequency-domain-using-pytorch
+        image_size = image.shape[-2]
+        image_hat = torch.fft.fft2(image, norm="forward")
+        image_hat = torch.fft.fftshift(image_hat)
+        pad_size = (target_size - image_size) // 2
+        real = nn.functional.pad(
+            image_hat.real, (pad_size, pad_size, pad_size, pad_size), value=0.0
+        )
+        imag = nn.functional.pad(
+            image_hat.imag, (pad_size, pad_size, pad_size, pad_size), value=0.0
+        )
+        image_hat = torch.fft.ifftshift(torch.complex(real, imag))
+        image = torch.fft.ifft2(image_hat, norm="forward").real
+        return image
+    def forward(
+        self,
+        pixel_values: Optional[torch.FloatTensor] = None,
+        time: Optional[torch.FloatTensor] = None,
+        bool_masked_pos: Optional[torch.BoolTensor] = None,
+        head_mask: Optional[torch.FloatTensor] = None,
+        pixel_mask: Optional[torch.BoolTensor] = None,
+        labels: Optional[torch.FloatTensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, ScOTOutput]:
+        return_dict = (
+            return_dict if return_dict is not None else self.config.use_return_dict
+        )
+        output_attentions = (
+            output_attentions
+            if output_attentions is not None
+            else self.config.output_attentions
+        )
+        output_hidden_states = (
+            output_hidden_states
+            if output_hidden_states is not None
+            else self.config.output_hidden_states
+        )
+        if pixel_values is None:
+            raise ValueError("pixel_values cannot be None")
+        head_mask = self.get_head_mask(
+            head_mask, self.num_layers_encoder + self.num_layers_decoder
+        )
+        if isinstance(head_mask, list):
+            head_mask_encoder = head_mask[: self.num_layers_encoder]
+            head_mask_decoder = head_mask[self.num_layers_encoder :]
+        else:
+            head_mask_encoder, head_mask_decoder = head_mask.split(
+                [self.num_layers_encoder, self.num_layers_decoder]
+            )
+        image_size = pixel_values.shape[2]
+        # image must be square
+        if image_size != self.config.image_size:
+            if image_size < self.config.image_size:
+                pixel_values = self._upsample(pixel_values, self.config.image_size)
+            else:
+                pixel_values = self._downsample(pixel_values, self.config.image_size)
+        embedding_output, input_dimensions = self.embeddings(
+            pixel_values, bool_masked_pos=bool_masked_pos, time=time
+        )
+        encoder_outputs = self.encoder(
+            embedding_output,
+            input_dimensions,
+            time,
+            head_mask=head_mask_encoder,
+            output_attentions=output_attentions,
+            output_hidden_states=True,
+            output_hidden_states_before_downsampling=True,
+            return_dict=return_dict,
+        )
+        if return_dict:
+            skip_states = list(encoder_outputs.hidden_states[1:])
+        else:
+            skip_states = list(encoder_outputs[1][1:])
+        for i in range(len(skip_states)):
+            for block in self.residual_blocks[i]:
+                if isinstance(block, nn.Identity):
+                    skip_states[i] = block(skip_states[i])
+                else:
+                    skip_states[i] = block(skip_states[i], time)
+        #! assumes square images
+        input_dim = math.floor(skip_states[-1].shape[1] ** 0.5)
+        decoder_output = self.decoder(
+            skip_states[-1],
+            (input_dim, input_dim),
+            time=time,
+            skip_states=skip_states[:-1],
+            head_mask=head_mask_decoder,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        sequence_output = decoder_output[0]
+        prediction = self.patch_recovery(sequence_output)
+        # The following can be used for learning just the residual for time-dependent problems
+        if self.config.learn_residual:
+            if self.config.num_channels > self.config.num_out_channels:
+                pixel_values = pixel_values[:, 0 : self.config.num_out_channels]
+            prediction += pixel_values
+        if image_size != self.config.image_size:
+            if image_size > self.config.image_size:
+                prediction = self._upsample(prediction, image_size)
+            else:
+                prediction = self._downsample(prediction, image_size)
+        if pixel_mask is not None:
+            prediction[pixel_mask] = labels[pixel_mask].type_as(prediction)
+        loss = None
+        if labels is not None:
+            if self.config.p == 1:
+                loss_fn = nn.functional.l1_loss
+            elif self.config.p == 2:
+                loss_fn = nn.functional.mse_loss
+            else:
+                raise ValueError("p must be 1 or 2")
+            if self.config.channel_slice_list_normalized_loss is not None:
+                loss = torch.mean(
+                    torch.stack(
+                        [
+                            loss_fn(
+                                prediction[
+                                    :,
+                                    self.config.channel_slice_list_normalized_loss[
+                                        i
+                                    ] : self.config.channel_slice_list_normalized_loss[
+                                        i + 1
+                                    ],
+                                ],
+                                labels[
+                                    :,
+                                    self.config.channel_slice_list_normalized_loss[
+                                        i
+                                    ] : self.config.channel_slice_list_normalized_loss[
+                                        i + 1
+                                    ],
+                                ],
+                            )
+                            / (
+                                loss_fn(
+                                    labels[
+                                        :,
+                                        self.config.channel_slice_list_normalized_loss[
+                                            i
+                                        ] : self.config.channel_slice_list_normalized_loss[
+                                            i + 1
+                                        ],
+                                    ],
+                                    torch.zeros_like(
+                                        labels[
+                                            :,
+                                            self.config.channel_slice_list_normalized_loss[
+                                                i
+                                            ] : self.config.channel_slice_list_normalized_loss[
+                                                i + 1
+                                            ],
+                                        ]
+                                    ),
+                                )
+                                + 1e-10
+                            )
+                            for i in range(
+                                len(self.config.channel_slice_list_normalized_loss) - 1
+                            )
+                        ]
+                    )
+                )
+            else:
+                loss = loss_fn(prediction, labels)
+        if not return_dict:
+            output = (prediction,) + decoder_output[1:] + encoder_outputs[1:]
+            return ((loss,) + output) if loss is not None else output
+        return ScOTOutput(
+            loss=loss,
+            output=prediction,
+            hidden_states=(
+                decoder_output.hidden_states + encoder_outputs.hidden_states
+                if output_hidden_states is not None and output_hidden_states is True
+                else None
+            ),
+            attentions=(
+                decoder_output.attentions + encoder_outputs.attentions
+                if output_attentions is not None and output_attentions is True
+                else None
+            ),
+            reshaped_hidden_states=(
+                decoder_output.reshaped_hidden_states
+                + encoder_outputs.reshaped_hidden_states
+                if output_hidden_states is not None and output_hidden_states is True
+                else None
+            ),
+        )

external/poseidon/scOT/problems/__init__.py ADDED Viewed

File without changes

external/poseidon/scOT/problems/base.py ADDED Viewed

	@@ -0,0 +1,395 @@

+"""
+This file contains the dataset selector get_dataset, as well as the base
+classes for all datasets.
+"""
+from torch.utils.data import Dataset, ConcatDataset
+from typing import Optional, List, Dict
+from abc import ABC
+import re
+import os
+import shutil
+from accelerate.utils import broadcast_object_list
+def get_dataset(dataset, **kwargs):
+    """
+    Get a dataset by name.
+    If you enter a list of str, will return a ConcatDataset of the datasets.
+    Available choices are:
+    - fluids.incompressible.BrownianBridge(.tracer)
+    - fluids.incompressible.Gaussians(.tracer)
+    - fluids.incompressible.ShearLayer
+    - fluids.incompressible.Sines(.tracer)
+    - fluids.incompressible.PiecewiseConstants(.tracer)
+    - fluids.incompressible.VortexSheet(.tracer)
+    - fluids.incompressible.forcing.KolmogorovFlow
+    - fluids.compressible.gravity.RayleighTaylor(.tracer)
+    - fluids.compressible.RiemannKelvinHelmholtz
+    - fluids.compressible.RiemannCurved
+    - fluids.compressible.Riemann
+    - fluids.compressible.KelvinHelmholtz
+    - fluids.compressible.Gaussians
+    - fluids.compressible.RichtmyerMeshkov(.tracer)
+    - fluids.compressible.steady.Airfoil(.time)
+    - elliptic.poisson.Gaussians(.time)
+    - elliptic.Helmholtz(.time)
+    - wave.Layer
+    - wave.Gaussians
+    - reaction_diffusion.AllenCahn
+    Adding .out at the end of the str, returns a dataset with more time steps.
+    **kwargs overwrite the default settings.
+    .time is a time-wrapped time-independent dataset.
+    """
+    if isinstance(dataset, list):
+        return ConcatDataset([get_dataset(d, **kwargs) for d in dataset])
+    if "fluids" in dataset:
+        if "fluids.incompressible" in dataset:
+            if "BrownianBridge" in dataset:
+                from .fluids.incompressible import BrownianBridge as dset
+            elif "Gaussians" in dataset:
+                from .fluids.incompressible import Gaussians as dset
+            elif "ShearLayer" in dataset:
+                from .fluids.incompressible import ShearLayer as dset
+            elif "Sines" in dataset:
+                from .fluids.incompressible import Sines as dset
+            elif "PiecewiseConstants" in dataset:
+                from .fluids.incompressible import PiecewiseConstants as dset
+            elif "VortexSheet" in dataset:
+                from .fluids.incompressible import VortexSheet as dset
+            elif "forcing" in dataset:
+                if "KolmogorovFlow" in dataset:
+                    from .fluids.incompressible import KolmogorovFlow as dset
+                else:
+                    raise ValueError(f"Unknown dataset {dataset}")
+            else:
+                raise ValueError(f"Unknown dataset {dataset}")
+        elif "fluids.compressible" in dataset:
+            if "gravity" in dataset:
+                if "RayleighTaylor" in dataset:
+                    from .fluids.compressible import RayleighTaylor as dset
+                    if "out" in dataset:
+                        default_time_settings = {
+                            "max_num_time_steps": 10,
+                            "time_step_size": 1,
+                        }
+                    else:
+                        default_time_settings = {
+                            "max_num_time_steps": 7,
+                            "time_step_size": 1,
+                        }
+                    kwargs = {**default_time_settings, **kwargs}
+                elif "Blast" in dataset:
+                    from .fluids.compressible import Blast as dset
+            elif "RiemannKelvinHelmholtz" in dataset:
+                from .fluids.compressible import RiemannKelvinHelmholtz as dset
+            elif "RiemannCurved" in dataset:
+                from .fluids.compressible import RiemannCurved as dset
+            elif "Riemann" in dataset:
+                from .fluids.compressible import Riemann as dset
+            elif "KelvinHelmholtz" in dataset:
+                from .fluids.compressible import KelvinHelmholtz as dset
+            elif "Gaussians" in dataset:
+                from .fluids.compressible import Gaussians as dset
+            elif "RichtmyerMeshkov" in dataset:
+                from .fluids.compressible import RichtmyerMeshkov as dset
+            elif "steady" in dataset:
+                if "steady.Airfoil" in dataset:
+                    from .fluids.compressible import Airfoil as dset
+                    if "out" in dataset:
+                        raise ValueError(f"Unknown dataset {dataset}")
+                else:
+                    raise ValueError(f"Unknown dataset {dataset}")
+            else:
+                raise ValueError(f"Unknown dataset {dataset}")
+        else:
+            raise ValueError(f"Unknown dataset {dataset}")
+        if "out" in dataset:
+            default_time_settings = {"max_num_time_steps": 10, "time_step_size": 2}
+        else:
+            default_time_settings = {"max_num_time_steps": 7, "time_step_size": 2}
+        if "tracer" in dataset:
+            tracer = True
+        else:
+            tracer = False
+        if not "steady" in dataset:
+            kwargs = {"tracer": tracer, **default_time_settings, **kwargs}
+    elif "elliptic" in dataset:
+        if ".out" in dataset:
+            raise NotImplementedError(f"Unknown dataset {dataset}")
+        if "elliptic.poisson" in dataset:
+            if "Gaussians" in dataset:
+                from .elliptic.poisson import Gaussians as dset
+            else:
+                raise ValueError(f"Unknown dataset {dataset}")
+        elif "elliptic.Helmholtz" in dataset:
+            from .elliptic.helmholtz import Helmholtz as dset
+        else:
+            raise ValueError(f"Unknown dataset {dataset}")
+    elif "wave" in dataset:
+        if "wave.Layer" in dataset:
+            if "out" in dataset:
+                default_time_settings = {"max_num_time_steps": 10, "time_step_size": 2}
+            else:
+                default_time_settings = {"max_num_time_steps": 7, "time_step_size": 2}
+            kwargs = {**default_time_settings, **kwargs}
+            from .wave.acoustic import Layer as dset
+        elif "wave.Gaussians" in dataset:
+            if "out" in dataset:
+                raise ValueError(f"Unknown dataset {dataset}")
+            else:
+                default_time_settings = {"max_num_time_steps": 7, "time_step_size": 2}
+            kwargs = {**default_time_settings, **kwargs}
+            from .wave.acoustic import Gaussians as dset
+        else:
+            raise ValueError(f"Unknown dataset {dataset}")
+    elif "reaction_diffusion" in dataset:
+        if "reaction_diffusion.AllenCahn" in dataset:
+            if "out" in dataset:
+                default_time_settings = {"max_num_time_steps": 9, "time_step_size": 2}
+            else:
+                default_time_settings = {"max_num_time_steps": 7, "time_step_size": 2}
+            kwargs = {**default_time_settings, **kwargs}
+            from .reaction_diffusion.allen_cahn import AllenCahn as dset
+    else:
+        raise ValueError(f"Unknown dataset {dataset}")
+    return dset(**kwargs) if ".time" not in dataset else TimeWrapper(dset(**kwargs))
+class BaseDataset(Dataset, ABC):
+    """A base class for all datasets. Can be directly derived from if you have a steady/non-time dependent problem."""
+    def __init__(
+        self,
+        which: Optional[str] = None,
+        num_trajectories: Optional[int] = None,
+        data_path: Optional[str] = "./data",
+        move_to_local_scratch: Optional[str] = None,
+    ) -> None:
+        """
+        Args:
+            which: Which dataset to use, i.e. train, val, or test.
+            resolution: The resolution of the dataset.
+            num_trajectories: The number of trajectories to use for training.
+            data_path: The path to the data files.
+            move_to_local_scratch: If not None, move the data to this directory at dataset initialization and use it from there.
+        """
+        assert which in ["train", "val", "test"]
+        assert num_trajectories is not None and (
+            num_trajectories > 0 or num_trajectories in [-1, -2, -8]
+        )
+        self.num_trajectories = num_trajectories
+        self.data_path = data_path
+        self.which = which
+        self.move_to_local_scratch = move_to_local_scratch
+    def _move_to_local_scratch(self, file_path):
+        if self.move_to_local_scratch is not None:
+            data_dir = os.path.join(self.data_path, file_path)
+            file = file_path.split("/")[-1]
+            scratch_dir = self.move_to_local_scratch
+            dest_dir = os.path.join(scratch_dir, file)
+            RANK = int(os.environ.get("LOCAL_RANK", -1))
+            if not os.path.exists(dest_dir) and (RANK == 0 or RANK == -1):
+                print(f"Start copying {file} to {dest_dir}...")
+                shutil.copy(data_dir, dest_dir)
+                print("Finished data copy.")
+            # idk how to do the barrier differently
+            ls = broadcast_object_list([dest_dir], from_process=0)
+            dest_dir = ls[0]
+            return dest_dir
+        else:
+            return file_path
+    def post_init(self) -> None:
+        """
+        Call after self.N_max, self.N_val, self.N_test, as well as the file_paths and normalization constants are set.
+        """
+        assert (
+            self.N_max is not None
+            and self.N_max > 0
+            and self.N_max >= self.N_val + self.N_test
+        )
+        if self.num_trajectories == -1:
+            self.num_trajectories = self.N_max - self.N_val - self.N_test
+        elif self.num_trajectories == -2:
+            self.num_trajectories = (self.N_max - self.N_val - self.N_test) // 2
+        elif self.num_trajectories == -8:
+            self.num_trajectories = (self.N_max - self.N_val - self.N_test) // 8
+        assert self.num_trajectories + self.N_val + self.N_test <= self.N_max
+        assert self.N_val is not None and self.N_val > 0
+        assert self.N_test is not None and self.N_test > 0
+        if self.which == "train":
+            self.length = self.num_trajectories
+            self.start = 0
+        elif self.which == "val":
+            self.length = self.N_val
+            self.start = self.N_max - self.N_val - self.N_test
+        else:
+            self.length = self.N_test
+            self.start = self.N_max - self.N_test
+        self.output_dim = self.label_description.count(",") + 1
+        descriptors, channel_slice_list = self.get_channel_lists(self.label_description)
+        self.printable_channel_description = descriptors
+        self.channel_slice_list = channel_slice_list
+    def __len__(self) -> int:
+        """
+        Returns: overall length of dataset.
+        """
+        return self.length
+    def __getitem__(self, idx) -> Dict:
+        """
+        Get an item. OVERWRITE!
+        Args:
+            idx: The index of the sample to get.
+        Returns:
+            A dict of key-value pairs of data.
+        """
+        pass
+    @staticmethod
+    def get_channel_lists(label_description):
+        matches = re.findall(r"\[([^\[\]]+)\]", label_description)
+        channel_slice_list = [0]  # use as channel_slice_list[i]:channel_slice_list[i+1]
+        beautiful_descriptors = []
+        for match in matches:
+            channel_slice_list.append(channel_slice_list[-1] + 1 + match.count(","))
+            splt = match.split(",")
+            if len(splt) > 1:
+                beautiful_descriptors.append("".join(splt))
+            else:
+                beautiful_descriptors.append(match)
+        return beautiful_descriptors, channel_slice_list
+class BaseTimeDataset(BaseDataset, ABC):
+    """A base class for time dependent problems. Inherit time-dependent problems from here."""
+    def __init__(
+        self,
+        *args,
+        max_num_time_steps: Optional[int] = None,
+        time_step_size: Optional[int] = None,
+        fix_input_to_time_step: Optional[int] = None,
+        allowed_time_transitions: Optional[List[int]] = None,
+        **kwargs,
+    ) -> None:
+        """
+        Args:
+            max_num_time_steps: The maximum number of time steps to use.
+            time_step_size: The size of the time step.
+            fix_input_to_time_step: If not None, fix the input to this time step.
+            allowed_time_transitions: If not None, only allow these time transitions (time steps).
+        """
+        assert max_num_time_steps is not None and max_num_time_steps > 0
+        assert time_step_size is not None and time_step_size > 0
+        assert fix_input_to_time_step is None or fix_input_to_time_step >= 0
+        super().__init__(*args, **kwargs)
+        self.max_num_time_steps = max_num_time_steps
+        self.time_step_size = time_step_size
+        self.fix_input_to_time_step = fix_input_to_time_step
+        self.allowed_time_transitions = allowed_time_transitions
+    def _idx_map(self, idx):
+        i = idx // self.multiplier
+        _idx = idx - i * self.multiplier
+        if self.fix_input_to_time_step is None:
+            t1, t2 = self.time_indices[_idx]
+            assert t2 >= t1
+            t = t2 - t1
+        else:
+            t1 = self.fix_input_to_time_step
+            t2 = self.time_step_size * (_idx + 1) + self.fix_input_to_time_step
+            t = t2 - t1
+        return i, t, t1, t2
+    def post_init(self) -> None:
+        """
+        Call after self.N_max, self.N_val, self.N_test, as well as the file_paths and normalization constants are set.
+        self.max_time_step must have already been set.
+        """
+        assert (
+            self.N_max is not None
+            and self.N_max > 0
+            and self.N_max >= self.N_val + self.N_test
+        )
+        if self.num_trajectories == -1:
+            self.num_trajectories = self.N_max - self.N_val - self.N_test
+        elif self.num_trajectories == -2:
+            self.num_trajectories = (self.N_max - self.N_val - self.N_test) // 2
+        elif self.num_trajectories == -8:
+            self.num_trajectories = (self.N_max - self.N_val - self.N_test) // 8
+        assert self.num_trajectories + self.N_val + self.N_test <= self.N_max
+        assert self.N_val is not None and self.N_val > 0
+        assert self.N_test is not None and self.N_test > 0
+        assert self.max_num_time_steps is not None and self.max_num_time_steps > 0
+        if self.fix_input_to_time_step is not None:
+            self.multiplier = self.max_num_time_steps
+        else:
+            self.time_indices = []
+            for i in range(self.max_num_time_steps + 1):
+                for j in range(i, self.max_num_time_steps + 1):
+                    if (
+                        self.allowed_time_transitions is not None
+                        and (j - i) not in self.allowed_time_transitions
+                    ):
+                        continue
+                    self.time_indices.append(
+                        (self.time_step_size * i, self.time_step_size * j)
+                    )
+            self.multiplier = len(self.time_indices)
+        if self.which == "train":
+            self.length = self.num_trajectories * self.multiplier
+            self.start = 0
+        elif self.which == "val":
+            self.length = self.N_val * self.multiplier
+            self.start = self.N_max - self.N_val - self.N_test
+        else:
+            self.length = self.N_test * self.multiplier
+            self.start = self.N_max - self.N_test
+        self.output_dim = self.label_description.count(",") + 1
+        descriptors, channel_slice_list = self.get_channel_lists(self.label_description)
+        self.printable_channel_description = descriptors
+        self.channel_slice_list = channel_slice_list
+class TimeWrapper(BaseTimeDataset):
+    """For time-independent problems to be plugged into time-dependent models."""
+    def __init__(self, dataset):
+        super().__init__(
+            dataset.which,
+            dataset.num_trajectories,
+            dataset.data_path,
+            None,
+            max_num_time_steps=1,
+            time_step_size=1,
+        )
+        self.dataset = dataset
+        self.resolution = dataset.resolution
+        self.input_dim = dataset.input_dim
+        self.output_dim = dataset.output_dim
+        self.channel_slice_list = dataset.channel_slice_list
+        self.printable_channel_description = dataset.printable_channel_description
+    def __len__(self):
+        return len(self.dataset)
+    def __getitem__(self, idx):
+        return {**self.dataset[idx], "time": 1.0}

external/poseidon/scOT/problems/elliptic/__init__.py ADDED Viewed

File without changes

external/poseidon/scOT/problems/elliptic/helmholtz.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import torch
+import os
+import h5py
+import numpy as np
+from scOT.problems.base import BaseDataset
+class Helmholtz(BaseDataset):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.N_max = 19675
+        self.N_val = 128
+        self.N_test = 512
+        self.resolution = 128
+        self.file_path = os.path.join(
+            self.data_path,
+            "Helmholtz.h5",
+        )
+        self.file_path = self._move_to_local_scratch(self.file_path)
+        self.reader = h5py.File(self.file_path, "r")
+        self.mean = 0.11523915668552
+        self.std = 0.8279975746000605
+        self.input_dim = 2
+        self.label_description = "[u]"
+        self.post_init()
+    def __getitem__(self, idx):
+        inputs = (
+            torch.from_numpy(self.reader["Sample_" + str(idx + self.start)]["a"][:])
+            .type(torch.float32)
+            .reshape(1, self.resolution, self.resolution)
+        )
+        inputs = inputs - 1
+        b = float(np.array(self.reader["Sample_" + str(idx + self.start)]["bc"]))
+        bc = b * torch.ones_like(inputs)
+        inputs = torch.cat((inputs, bc), dim=0)
+        labels = (
+            torch.from_numpy(self.reader["Sample_" + str(idx + self.start)]["u"][:])
+            .type(torch.float32)
+            .reshape(1, self.resolution, self.resolution)
+        )
+        labels = (labels - self.mean) / self.std
+        return {"pixel_values": inputs, "labels": labels}

external/poseidon/scOT/problems/elliptic/poisson.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import torch
+import os
+import h5py
+from scOT.problems.base import BaseDataset
+CONSTANTS = {
+    "mean_source": 0.014822142414492256,
+    "std_source": 4.755138816607612,
+    "mean_solution": 0.0005603458434937093,
+    "std_solution": 0.02401226126952699,
+}
+class Gaussians(BaseDataset):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.N_max = 20000
+        self.N_val = 120
+        self.N_test = 240
+        self.resolution = 128
+        self.file_path = os.path.join(self.data_path, "Poisson-Gauss.nc")
+        self.file_path = self._move_to_local_scratch(self.file_path)
+        self.reader = h5py.File(self.file_path, "r")
+        self.constants = CONSTANTS
+        self.input_dim = 1
+        self.label_description = "[u]"
+        self.post_init()
+    def __getitem__(self, idx):
+        inputs = (
+            torch.from_numpy(self.reader["source"][idx + self.start])
+            .type(torch.float32)
+            .reshape(1, self.resolution, self.resolution)
+        )
+        labels = (
+            torch.from_numpy(self.reader["solution"][idx + self.start])
+            .type(torch.float32)
+            .reshape(1, self.resolution, self.resolution)
+        )
+        inputs = (inputs - self.constants["mean_source"]) / self.constants["std_source"]
+        labels = (labels - self.constants["mean_solution"]) / self.constants[
+            "std_solution"
+        ]
+        return {"pixel_values": inputs, "labels": labels}

external/poseidon/scOT/problems/fluids/__init__.py ADDED Viewed

File without changes

external/poseidon/scOT/problems/fluids/compressible.py ADDED Viewed

	@@ -0,0 +1,308 @@

+import torch
+import h5py
+import copy
+from scOT.problems.base import BaseTimeDataset, BaseDataset
+from scOT.problems.fluids.normalization_constants import CONSTANTS
+class Airfoil(BaseDataset):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.N_max = 10869
+        self.N_val = 120
+        self.N_test = 240
+        self.resolution = 128
+        data_path = self.data_path + "/SE-AF.nc"
+        data_path = self._move_to_local_scratch(data_path)
+        self.reader = h5py.File(data_path, "r")
+        self.constants = {
+            "mean": 0.92984116,
+            "std": 0.10864315,
+        }
+        self.input_dim = 1
+        self.label_description = "[rho]"
+        self.post_init()
+    def __getitem__(self, idx):
+        i = idx
+        inputs = (
+            torch.from_numpy(self.reader["solution"][i + self.start, 0])
+            .type(torch.float32)
+            .reshape(1, self.resolution, self.resolution)
+        )
+        labels = (
+            torch.from_numpy(self.reader["solution"][i + self.start, 1])
+            .type(torch.float32)
+            .reshape(1, self.resolution, self.resolution)
+        )
+        labels = (labels - self.constants["mean"]) / self.constants["std"]
+        pixel_mask = inputs == 1
+        labels[pixel_mask] = 1
+        return {
+            "pixel_values": inputs,
+            "labels": labels,
+            "pixel_mask": pixel_mask,
+        }
+class RichtmyerMeshkov(BaseTimeDataset):
+    def __init__(self, *args, tracer=False, **kwargs):
+        super().__init__(*args, **kwargs)
+        assert self.max_num_time_steps * self.time_step_size <= 20
+        self.N_max = 1260
+        self.N_val = 100
+        self.N_test = 130
+        self.resolution = 128
+        data_path = self.data_path + "/CE-RM.nc"
+        data_path = self._move_to_local_scratch(data_path)
+        self.reader = h5py.File(data_path, "r")
+        self.constants = {
+            "mean": torch.tensor([1.1964245, -7.164812e-06, 2.8968952e-06, 1.5648036])
+            .unsqueeze(1)
+            .unsqueeze(1),
+            "std": torch.tensor([0.5543239, 0.24304213, 0.2430597, 0.89639103])
+            .unsqueeze(1)
+            .unsqueeze(1),
+            "time": 20.0,
+        }
+        self.input_dim = 4
+        self.label_description = "[rho],[u,v],[p]"
+        self.pixel_mask = torch.tensor([False, False, False, False])
+        self.post_init()
+    def __getitem__(self, idx):
+        i, t, t1, t2 = self._idx_map(idx)
+        time = t / self.constants["time"]
+        inputs = (
+            torch.from_numpy(self.reader["solution"][i + self.start, t1, 0:4])
+            .type(torch.float32)
+            .reshape(4, self.resolution, self.resolution)
+        )
+        label = (
+            torch.from_numpy(self.reader["solution"][i + self.start, t2, 0:4])
+            .type(torch.float32)
+            .reshape(4, self.resolution, self.resolution)
+        )
+        inputs = (inputs - self.constants["mean"]) / self.constants["std"]
+        label = (label - self.constants["mean"]) / self.constants["std"]
+        return {
+            "pixel_values": inputs,
+            "labels": label,
+            "time": time,
+            "pixel_mask": self.pixel_mask,
+        }
+class RayleighTaylor(BaseTimeDataset):
+    def __init__(self, *args, tracer=False, **kwargs):
+        super().__init__(*args, **kwargs)
+        assert self.max_num_time_steps * self.time_step_size <= 10
+        self.N_max = 1260
+        self.N_val = 100
+        self.N_test = 130
+        self.resolution = 128
+        data_path = self.data_path + "/GCE-RT.nc"
+        data_path = self._move_to_local_scratch(data_path)
+        self.reader = h5py.File(data_path, "r")
+        self.constants = {
+            "mean": torch.tensor(
+                [0.8970493, 4.0316996e-13, -1.3858967e-13, 0.7133829, -1.7055787]
+            )
+            .unsqueeze(1)
+            .unsqueeze(1),
+            "std": torch.tensor(
+                [0.12857835, 0.014896976, 0.014896975, 0.21293919, 0.40131348]
+            )
+            .unsqueeze(1)
+            .unsqueeze(1),
+            "time": 10.0,
+        }
+        self.input_dim = 5
+        self.label_description = "[rho],[u,v],[p],[g]"
+        self.pixel_mask = torch.tensor([False, False, False, False, False])
+        self.post_init()
+    def __getitem__(self, idx):
+        i, t, t1, t2 = self._idx_map(idx)
+        time = t / self.constants["time"]
+        inputs = (
+            torch.from_numpy(self.reader["solution"][i + self.start, t1, 0:4])
+            .type(torch.float32)
+            .reshape(4, self.resolution, self.resolution)
+        )
+        label = (
+            torch.from_numpy(self.reader["solution"][i + self.start, t2, 0:4])
+            .type(torch.float32)
+            .reshape(4, self.resolution, self.resolution)
+        )
+        g_1 = (
+            torch.from_numpy(self.reader["solution"][i + self.start, t1, 5:6])
+            .type(torch.float32)
+            .reshape(1, self.resolution, self.resolution)
+        )
+        g_2 = (
+            torch.from_numpy(self.reader["solution"][i + self.start, t2, 5:6])
+            .type(torch.float32)
+            .reshape(1, self.resolution, self.resolution)
+        )
+        inputs = (inputs - self.constants["mean"][:4]) / self.constants["std"][:4]
+        g_1 = (g_1 - self.constants["mean"][4]) / self.constants["std"][4]
+        g_2 = (g_2 - self.constants["mean"][4]) / self.constants["std"][4]
+        label = (label - self.constants["mean"][:4]) / self.constants["std"][:4]
+        inputs = torch.cat([inputs, g_1], dim=0)
+        label = torch.cat([label, g_2], dim=0)
+        return {
+            "pixel_values": inputs,
+            "labels": label,
+            "time": time,
+            "pixel_mask": self.pixel_mask,
+        }
+class CompressibleBase(BaseTimeDataset):
+    def __init__(self, file_path, *args, tracer=False, **kwargs):
+        super().__init__(*args, **kwargs)
+        assert self.max_num_time_steps * self.time_step_size <= 20
+        self.N_max = 10000
+        self.N_val = 120
+        self.N_test = 240
+        self.resolution = 128
+        self.tracer = tracer
+        data_path = self.data_path + file_path
+        data_path = self._move_to_local_scratch(data_path)
+        self.reader = h5py.File(data_path, "r")
+        self.constants = copy.deepcopy(CONSTANTS)
+        self.input_dim = 4 if not tracer else 5
+        self.label_description = (
+            "[rho],[u,v],[p]" if not tracer else "[rho],[u,v],[p],[tracer]"
+        )
+        self.pixel_mask = (
+            torch.tensor([False, False, False, False])
+            if not tracer
+            else torch.tensor([False, False, False, False, False])
+        )
+        self.post_init()
+    def __getitem__(self, idx):
+        i, t, t1, t2 = self._idx_map(idx)
+        time = t / self.constants["time"]
+        inputs = (
+            torch.from_numpy(self.reader["data"][i + self.start, t1, 0:4])
+            .type(torch.float32)
+            .reshape(4, self.resolution, self.resolution)
+        )
+        label = (
+            torch.from_numpy(self.reader["data"][i + self.start, t2, 0:4])
+            .type(torch.float32)
+            .reshape(4, self.resolution, self.resolution)
+        )
+        inputs[3] = inputs[3] - self.mean_pressure
+        label[3] = label[3] - self.mean_pressure
+        inputs = (inputs - self.constants["mean"]) / self.constants["std"]
+        label = (label - self.constants["mean"]) / self.constants["std"]
+        if self.tracer:
+            input_tracer = (
+                torch.from_numpy(self.reader["data"][i + self.start, t1, 4:5])
+                .type(torch.float32)
+                .reshape(1, self.resolution, self.resolution)
+            )
+            output_tracer = (
+                torch.from_numpy(self.reader["data"][i + self.start, t2, 4:5])
+                .type(torch.float32)
+                .reshape(1, self.resolution, self.resolution)
+            )
+            inputs = torch.cat([inputs, input_tracer], dim=0)
+            label = torch.cat([label, output_tracer], dim=0)
+        return {
+            "pixel_values": inputs,
+            "labels": label,
+            "time": time,
+            "pixel_mask": self.pixel_mask,
+        }
+class Gaussians(CompressibleBase):
+    def __init__(self, *args, tracer=False, **kwargs):
+        self.mean_pressure = 2.513
+        file_path = "/CE-Gauss.nc"
+        if tracer:
+            raise NotImplementedError("Tracer not implemented for Gaussians")
+        super().__init__(file_path, *args, tracer=tracer, **kwargs)
+class KelvinHelmholtz(CompressibleBase):
+    def __init__(self, *args, tracer=False, **kwargs):
+        self.mean_pressure = 1.0
+        file_path = "/CE-KH.nc"
+        if tracer:
+            raise NotImplementedError("Tracer not implemented for KelvinHelmholtz")
+        super().__init__(file_path, *args, tracer=tracer, **kwargs)
+class Riemann(CompressibleBase):
+    def __init__(self, *args, tracer=False, **kwargs):
+        self.mean_pressure = 0.215
+        file_path = "/CE-RP.nc"
+        if tracer:
+            raise NotImplementedError("Tracer not implemented for Riemann")
+        super().__init__(file_path, *args, tracer=tracer, **kwargs)
+class RiemannCurved(CompressibleBase):
+    def __init__(self, *args, tracer=False, **kwargs):
+        self.mean_pressure = 0.553
+        file_path = "/CE-CRP.nc"
+        if tracer:
+            raise NotImplementedError("Tracer not implemented for RiemannCurved")
+        super().__init__(file_path, *args, tracer=tracer, **kwargs)
+class RiemannKelvinHelmholtz(CompressibleBase):
+    def __init__(self, *args, tracer=False, **kwargs):
+        self.mean_pressure = 1.33
+        file_path = "/CE-RPUI.nc"
+        if tracer:
+            raise NotImplementedError(
+                "Tracer not implemented for RiemannKelvinHelmholtz"
+            )
+        super().__init__(file_path, *args, tracer=tracer, **kwargs)

external/poseidon/scOT/problems/fluids/incompressible.py ADDED Viewed

	@@ -0,0 +1,331 @@

+import torch
+import h5py
+import numpy as np
+import copy
+from scOT.problems.base import BaseTimeDataset
+from scOT.problems.fluids.normalization_constants import CONSTANTS
+class IncompressibleBase(BaseTimeDataset):
+    def __init__(
+        self,
+        N_max,
+        file_path,
+        *args,
+        tracer=False,
+        just_velocities=False,
+        transpose=False,
+        resolution=None,
+        **kwargs
+    ):
+        """
+        just_velocities: If True, only the velocities are used as input and output.
+        transpose: If True, the input and output are transposed.
+        """
+        super().__init__(*args, **kwargs)
+        assert self.max_num_time_steps * self.time_step_size <= 20
+        self.N_max = N_max
+        self.N_val = 120
+        self.N_test = 240
+        self.resolution = 128
+        self.tracer = tracer
+        self.just_velocities = just_velocities
+        self.transpose = transpose
+        data_path = self.data_path + file_path
+        data_path = self._move_to_local_scratch(data_path)
+        self.reader = h5py.File(data_path, "r")
+        self.constants = copy.deepcopy(CONSTANTS)
+        if just_velocities:
+            self.constants["mean"] = self.constants["mean"][1:3]
+            self.constants["std"] = self.constants["std"][1:3]
+        self.density = torch.ones(1, self.resolution, self.resolution)
+        self.pressure = torch.zeros(1, self.resolution, self.resolution)
+        self.input_dim = 4 if not tracer else 5
+        if just_velocities:
+            self.input_dim -= 2
+        self.label_description = "[u,v]"
+        if not self.just_velocities:
+            self.label_description = "[rho],[u,v],[p]"
+        if tracer:
+            self.label_description += ",[tracer]"
+        self.pixel_mask = torch.tensor([False, False])
+        if not self.just_velocities:
+            self.pixel_mask = torch.tensor([False, False, False, True])
+        if tracer:
+            self.pixel_mask = torch.cat(
+                [self.pixel_mask, torch.tensor([False])],
+                dim=0,
+            )
+        if resolution is None:
+            self.res = None
+        else:
+            if resolution > 128:
+                raise ValueError("Resolution must be <= 128")
+            self.res = resolution
+        self.post_init()
+    def _downsample(self, image, target_size):
+        image = image.unsqueeze(0)
+        image_size = image.shape[-2]
+        freqs = torch.fft.fftfreq(image_size, d=1 / image_size)
+        sel = torch.logical_and(freqs >= -target_size / 2, freqs <= target_size / 2 - 1)
+        image_hat = torch.fft.fft2(image, norm="forward")
+        image_hat = image_hat[:, :, sel, :][:, :, :, sel]
+        image = torch.fft.ifft2(image_hat, norm="forward").real
+        return image.squeeze(0)
+    def __getitem__(self, idx):
+        i, t, t1, t2 = self._idx_map(idx)
+        time = t / self.constants["time"]
+        inputs_v = (
+            torch.from_numpy(self.reader["velocity"][i + self.start, t1, 0:2])
+            .type(torch.float32)
+            .reshape(2, self.resolution, self.resolution)
+        )
+        label_v = (
+            torch.from_numpy(self.reader["velocity"][i + self.start, t2, 0:2])
+            .type(torch.float32)
+            .reshape(2, self.resolution, self.resolution)
+        )
+        if self.transpose:
+            inputs_v = inputs_v.transpose(-2, -1)
+            label_v = label_v.transpose(-2, -1)
+        if not self.just_velocities:
+            inputs = torch.cat([self.density, inputs_v, self.pressure], dim=0)
+            label = torch.cat([self.density, label_v, self.pressure], dim=0)
+        else:
+            inputs = inputs_v
+            label = label_v
+        inputs = (inputs - self.constants["mean"]) / self.constants["std"]
+        label = (label - self.constants["mean"]) / self.constants["std"]
+        if self.tracer:
+            input_tracer = (
+                torch.from_numpy(self.reader["velocity"][i + self.start, t1, 2:3])
+                .type(torch.float32)
+                .reshape(1, self.resolution, self.resolution)
+            )
+            output_tracer = (
+                torch.from_numpy(self.reader["velocity"][i + self.start, t2, 2:3])
+                .type(torch.float32)
+                .reshape(1, self.resolution, self.resolution)
+            )
+            if self.transpose:
+                input_tracer = input_tracer.transpose(-2, -1)
+                output_tracer = output_tracer.transpose(-2, -1)
+            input_tracer = (
+                input_tracer - self.constants["tracer_mean"]
+            ) / self.constants["tracer_std"]
+            output_tracer = (
+                output_tracer - self.constants["tracer_mean"]
+            ) / self.constants["tracer_std"]
+            inputs = torch.cat([inputs, input_tracer], dim=0)
+            label = torch.cat([label, output_tracer], dim=0)
+        if self.res is not None:
+            inputs = self._downsample(inputs, self.res)
+            label = self._downsample(label, self.res)
+        return {
+            "pixel_values": inputs,
+            "labels": label,
+            "time": time,
+            "pixel_mask": self.pixel_mask,
+        }
+class KolmogorovFlow(BaseTimeDataset):
+    def __init__(self, *args, tracer=False, just_velocities=False, **kwargs):
+        super().__init__(*args, **kwargs)
+        assert self.max_num_time_steps * self.time_step_size <= 20
+        assert tracer == False
+        self.N_max = 20000
+        self.N_val = 120
+        self.N_test = 240
+        self.resolution = 128
+        self.just_velocities = just_velocities
+        data_path = self.data_path + "/FNS-KF.nc"
+        data_path = self._move_to_local_scratch(data_path)
+        self.reader = h5py.File(data_path, "r")
+        self.constants = copy.deepcopy(CONSTANTS)
+        self.constants["mean"][1] = -2.2424793e-13
+        self.constants["mean"][2] = 4.1510376e-12
+        self.constants["std"][1] = 0.22017328
+        self.constants["std"][2] = 0.22078253
+        if just_velocities:
+            self.constants["mean"] = self.constants["mean"][1:3]
+            self.constants["std"] = self.constants["std"][1:3]
+        self.density = torch.ones(1, self.resolution, self.resolution)
+        self.pressure = torch.zeros(1, self.resolution, self.resolution)
+        X, Y = torch.meshgrid(
+            torch.linspace(0, 1, self.resolution),
+            torch.linspace(0, 1, self.resolution),
+            indexing="ij",
+        )
+        f = lambda x, y: 0.1 * torch.sin(2.0 * np.pi * (x + y))
+        self.forcing = f(X, Y).unsqueeze(0)
+        self.constants["mean_forcing"] = -1.2996679288335145e-09
+        self.constants["std_forcing"] = 0.0707106739282608
+        self.forcing = (self.forcing - self.constants["mean_forcing"]) / self.constants[
+            "std_forcing"
+        ]
+        self.input_dim = 5 if not tracer else 6
+        if just_velocities:
+            self.input_dim -= 2
+        self.label_description = "[u,v],[g]"
+        if not self.just_velocities:
+            self.label_description = "[rho],[u,v],[p],[g]"
+        if tracer:
+            self.label_description += ",[tracer]"
+        self.pixel_mask = torch.tensor([False, False, False])
+        if not self.just_velocities:
+            self.pixel_mask = torch.tensor([False, False, False, True, False])
+        if tracer:
+            self.pixel_mask = torch.cat(
+                [self.pixel_mask, torch.tensor([False])],
+                dim=0,
+            )
+        self.post_init()
+    def __getitem__(self, idx):
+        i, t, t1, t2 = self._idx_map(idx)
+        time = t / self.constants["time"]
+        inputs_v = (
+            torch.from_numpy(self.reader["solution"][i + self.start, t1, 0:2])
+            .type(torch.float32)
+            .reshape(2, self.resolution, self.resolution)
+        )
+        label_v = (
+            torch.from_numpy(self.reader["solution"][i + self.start, t2, 0:2])
+            .type(torch.float32)
+            .reshape(2, self.resolution, self.resolution)
+        )
+        if not self.just_velocities:
+            inputs = torch.cat([self.density, inputs_v, self.pressure], dim=0)
+            label = torch.cat([self.density, label_v, self.pressure], dim=0)
+        else:
+            inputs = inputs_v
+            label = label_v
+        inputs = (inputs - self.constants["mean"]) / self.constants["std"]
+        label = (label - self.constants["mean"]) / self.constants["std"]
+        inputs = torch.cat([inputs, self.forcing], dim=0)
+        label = torch.cat([label, self.forcing], dim=0)
+        return {
+            "pixel_values": inputs,
+            "labels": label,
+            "time": time,
+            "pixel_mask": self.pixel_mask,
+        }
+class BrownianBridge(IncompressibleBase):
+    def __init__(self, *args, tracer=False, just_velocities=False, **kwargs):
+        if tracer:
+            raise ValueError("BrownianBridge does not have a tracer")
+        file_path = "/NS-BB.nc"
+        super().__init__(
+            20000,
+            file_path,
+            *args,
+            tracer=False,
+            just_velocities=just_velocities,
+            **kwargs
+        )
+class PiecewiseConstants(IncompressibleBase):
+    def __init__(self, *args, tracer=False, just_velocities=False, **kwargs):
+        file_path = "/NS-PwC.nc"
+        super().__init__(
+            20000,
+            file_path,
+            *args,
+            tracer=tracer,
+            just_velocities=just_velocities,
+            **kwargs
+        )
+class Gaussians(IncompressibleBase):
+    def __init__(self, *args, tracer=False, just_velocities=False, **kwargs):
+        if tracer:
+            raise ValueError("Gaussians does not have a tracer")
+        file_path = "/NS-Gauss.nc"
+        super().__init__(
+            20000,
+            file_path,
+            *args,
+            tracer=False,
+            just_velocities=just_velocities,
+            **kwargs
+        )
+class ShearLayer(IncompressibleBase):
+    def __init__(self, *args, tracer=False, just_velocities=False, **kwargs):
+        if tracer:
+            raise ValueError("Shear layer does not have a tracer")
+        super().__init__(
+            40000,
+            "/NS-SL.nc",
+            *args,
+            transpose=True,
+            tracer=False,
+            just_velocities=just_velocities,
+            **kwargs
+        )
+class VortexSheet(IncompressibleBase):
+    def __init__(self, *args, tracer=False, just_velocities=False, **kwargs):
+        if tracer:
+            raise ValueError("VortexSheet does not have a tracer")
+        file_path = "/NS-SVS.nc"
+        super().__init__(
+            20000,
+            file_path,
+            *args,
+            tracer=False,
+            just_velocities=just_velocities,
+            **kwargs
+        )
+class Sines(IncompressibleBase):
+    def __init__(self, *args, tracer=False, just_velocities=False, **kwargs):
+        if tracer:
+            raise ValueError("Sines does not have a tracer")
+        file_path = "/NS-Sines.nc"
+        super().__init__(
+            20000,
+            file_path,
+            *args,
+            tracer=False,
+            just_velocities=just_velocities,
+            **kwargs
+        )

external/poseidon/scOT/problems/fluids/normalization_constants.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+CONSTANTS = {
+    "mean": torch.tensor([0.80, 0.0, 0.0, 0.0]).unsqueeze(1).unsqueeze(1),
+    "std": torch.tensor([0.31, 0.391, 0.356, 0.185]).unsqueeze(1).unsqueeze(1),
+    "time": 20.0,
+    "tracer_mean": 0.19586183,
+    "tracer_std": 0.37,
+}

external/poseidon/scOT/problems/reaction_diffusion/__init__.py ADDED Viewed

File without changes

external/poseidon/scOT/problems/reaction_diffusion/allen_cahn.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import torch
+import h5py
+from scOT.problems.base import BaseTimeDataset
+class AllenCahn(BaseTimeDataset):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        assert self.max_num_time_steps * self.time_step_size <= 19
+        self.N_max = 15000
+        self.N_val = 60
+        self.N_test = 240
+        self.resolution = 128
+        data_path = self.data_path + "/ACE.nc"
+        data_path = self._move_to_local_scratch(data_path)
+        self.reader = h5py.File(data_path, "r")
+        self.constants = {
+            "mean": 0.002484262,
+            "std": 0.65351176,
+            "time": 19.0,
+        }
+        self.input_dim = 1
+        self.label_description = "[u]"
+        self.post_init()
+    def __getitem__(self, idx):
+        i, t, t1, t2 = self._idx_map(idx)
+        time = t / self.constants["time"]
+        inputs = (
+            torch.from_numpy(self.reader["solution"][i + self.start, t1])
+            .type(torch.float32)
+            .reshape(1, self.resolution, self.resolution)
+        )
+        labels = (
+            torch.from_numpy(self.reader["solution"][i + self.start, t2])
+            .type(torch.float32)
+            .reshape(1, self.resolution, self.resolution)
+        )
+        inputs = (inputs - self.constants["mean"]) / self.constants["std"]
+        labels = (labels - self.constants["mean"]) / self.constants["std"]
+        return {
+            "pixel_values": inputs,
+            "labels": labels,
+            "time": time,
+        }

external/poseidon/scOT/problems/wave/__init__.py ADDED Viewed

File without changes

external/poseidon/scOT/problems/wave/acoustic.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import torch
+import h5py
+from scOT.problems.base import BaseTimeDataset
+class Layer(BaseTimeDataset):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        assert self.max_num_time_steps * self.time_step_size <= 20
+        self.N_max = 10512
+        self.N_val = 60
+        self.N_test = 240
+        self.resolution = 128
+        data_path = self.data_path + "/Wave-Layer.nc"
+        data_path = self._move_to_local_scratch(data_path)
+        self.reader = h5py.File(data_path, "r")
+        self.constants = {
+            "mean": 0.03467443221585092,
+            "std": 0.10442421752963911,
+            "mean_c": 3498.5644380917424,
+            "std_c": 647.843958567462,
+            "time": 20.0,
+        }
+        self.input_dim = 2
+        self.label_description = "[u],[c]"
+        self.post_init()
+    def __getitem__(self, idx):
+        i, t, t1, t2 = self._idx_map(idx)
+        time = t / self.constants["time"]
+        inputs = (
+            torch.from_numpy(self.reader["solution"][i + self.start, t1])
+            .type(torch.float32)
+            .reshape(1, self.resolution, self.resolution)
+        )
+        inputs_c = (
+            torch.from_numpy(self.reader["c"][i + self.start])
+            .type(torch.float32)
+            .reshape(1, self.resolution, self.resolution)
+        )
+        labels = (
+            torch.from_numpy(self.reader["solution"][i + self.start, t2])
+            .type(torch.float32)
+            .reshape(1, self.resolution, self.resolution)
+        )
+        inputs = (inputs - self.constants["mean"]) / self.constants["std"]
+        inputs_c = (inputs_c - self.constants["mean_c"]) / self.constants["std_c"]
+        labels = (labels - self.constants["mean"]) / self.constants["std"]
+        inputs = torch.cat([inputs, inputs_c], dim=0)
+        labels = torch.cat([labels, inputs_c], dim=0)
+        return {
+            "pixel_values": inputs,
+            "labels": labels,
+            "time": time,
+        }
+class Gaussians(BaseTimeDataset):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        assert self.max_num_time_steps * self.time_step_size <= 15
+        self.N_max = 10512
+        self.N_val = 60
+        self.N_test = 240
+        self.resolution = 128
+        data_path = self.data_path + "/Wave-Gauss.nc"
+        data_path = self._move_to_local_scratch(data_path)
+        self.reader = h5py.File(data_path, "r")
+        self.constants = {
+            "mean": 0.0334376316,
+            "std": 0.1171879068,
+            "mean_c": 2618.4593933,
+            "std_c": 601.51658913,
+            "time": 15.0,
+        }
+        self.input_dim = 2
+        self.label_description = "[u],[c]"
+        self.post_init()
+    def __getitem__(self, idx):
+        i, t, t1, t2 = self._idx_map(idx)
+        time = t / self.constants["time"]
+        inputs = (
+            torch.from_numpy(self.reader["solution"][i + self.start, t1])
+            .type(torch.float32)
+            .reshape(1, self.resolution, self.resolution)
+        )
+        inputs_c = (
+            torch.from_numpy(self.reader["c"][i + self.start])
+            .type(torch.float32)
+            .reshape(1, self.resolution, self.resolution)
+        )
+        labels = (
+            torch.from_numpy(self.reader["solution"][i + self.start, t2])
+            .type(torch.float32)
+            .reshape(1, self.resolution, self.resolution)
+        )
+        inputs = (inputs - self.constants["mean"]) / self.constants["std"]
+        inputs_c = (inputs_c - self.constants["mean_c"]) / self.constants["std_c"]
+        labels = (labels - self.constants["mean"]) / self.constants["std"]
+        inputs = torch.cat([inputs, inputs_c], dim=0)
+        labels = torch.cat([labels, inputs_c], dim=0)
+        return {
+            "pixel_values": inputs,
+            "labels": labels,
+            "time": time,
+        }

external/poseidon/scOT/train.py ADDED Viewed

	@@ -0,0 +1,537 @@

+"""
+This script trains a scOT or pretrains Poseidon on a PDE dataset.
+Can be also used for finetuning Poseidon.
+Can be used in a single config or sweep setup.
+"""
+import argparse
+import torch
+import wandb
+import numpy as np
+import random
+import json
+import psutil
+import os
+os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
+import yaml
+import matplotlib.pyplot as plt
+import transformers
+from accelerate.utils import broadcast_object_list
+from scOT.trainer import TrainingArguments, Trainer
+from transformers import EarlyStoppingCallback
+from scOT.model import ScOT, ScOTConfig
+from mpl_toolkits.axes_grid1 import ImageGrid
+from scOT.problems.base import get_dataset, BaseTimeDataset
+from scOT.utils import get_num_parameters, read_cli, get_num_parameters_no_embed
+from scOT.metrics import relative_lp_error
+SEED = 0
+torch.manual_seed(SEED)
+np.random.seed(SEED)
+random.seed(SEED)
+MODEL_MAP = {
+    "T": {
+        "num_heads": [3, 6, 12, 24],
+        "skip_connections": [2, 2, 2, 0],
+        "window_size": 16,
+        "patch_size": 4,
+        "mlp_ratio": 4.0,
+        "depths": [4, 4, 4, 4],
+        "embed_dim": 48,
+    },
+    "S": {
+        "num_heads": [3, 6, 12, 24],
+        "skip_connections": [2, 2, 2, 0],
+        "window_size": 16,
+        "patch_size": 4,
+        "mlp_ratio": 4.0,
+        "depths": [8, 8, 8, 8],
+        "embed_dim": 48,
+    },
+    "B": {
+        "num_heads": [3, 6, 12, 24],
+        "skip_connections": [2, 2, 2, 0],
+        "window_size": 16,
+        "patch_size": 4,
+        "mlp_ratio": 4.0,
+        "depths": [8, 8, 8, 8],
+        "embed_dim": 96,
+    },
+    "L": {
+        "num_heads": [3, 6, 12, 24],
+        "skip_connections": [2, 2, 2, 0],
+        "window_size": 16,
+        "patch_size": 4,
+        "mlp_ratio": 4.0,
+        "depths": [8, 8, 8, 8],
+        "embed_dim": 192,
+    },
+}
+def create_predictions_plot(predictions, labels, wandb_prefix):
+    assert predictions.shape[0] >= 4
+    indices = random.sample(range(predictions.shape[0]), 4)
+    predictions = predictions[indices]
+    labels = labels[indices]
+    fig = plt.figure()
+    grid = ImageGrid(
+        fig, 111, nrows_ncols=(predictions.shape[1] + labels.shape[1], 4), axes_pad=0.1
+    )
+    vmax, vmin = max(predictions.max(), labels.max()), min(
+        predictions.min(), labels.min()
+    )
+    for _i, ax in enumerate(grid):
+        i = _i // 4
+        j = _i % 4
+        if i % 2 == 0:
+            ax.imshow(
+                predictions[j, i // 2, :, :],
+                cmap="gist_ncar",
+                origin="lower",
+                vmin=vmin,
+                vmax=vmax,
+            )
+        else:
+            ax.imshow(
+                labels[j, i // 2, :, :],
+                cmap="gist_ncar",
+                origin="lower",
+                vmin=vmin,
+                vmax=vmax,
+            )
+        ax.set_xticks([])
+        ax.set_yticks([])
+    wandb.log({wandb_prefix + "/predictions": wandb.Image(fig)})
+    plt.close()
+def setup(params, model_map=True):
+    config = None
+    RANK = int(os.environ.get("LOCAL_RANK", -1))
+    CPU_CORES = len(psutil.Process().cpu_affinity())
+    CPU_CORES = min(CPU_CORES, 16)
+    print(f"Detected {CPU_CORES} CPU cores, will use {CPU_CORES} workers.")
+    if params.disable_tqdm:
+        transformers.utils.logging.disable_progress_bar()
+    if params.json_config:
+        config = json.loads(params.config)
+    else:
+        config = params.config
+    if RANK == 0 or RANK == -1:
+        run = wandb.init(
+            project=params.wandb_project_name, name=params.wandb_run_name, config=config
+        )
+        config = wandb.config
+    else:
+        def clean_yaml(config):
+            d = {}
+            for key, inner_dict in config.items():
+                d[key] = inner_dict["value"]
+            return d
+        if not params.json_config:
+            with open(params.config, "r") as s:
+                config = yaml.safe_load(s)
+            config = clean_yaml(config)
+        run = None
+    ckpt_dir = "./"
+    if RANK == 0 or RANK == -1:
+        if run.sweep_id is not None:
+            ckpt_dir = (
+                params.checkpoint_path
+                + "/"
+                + run.project
+                + "/"
+                + run.sweep_id
+                + "/"
+                + run.name
+            )
+        else:
+            ckpt_dir = params.checkpoint_path + "/" + run.project + "/" + run.name
+    if (RANK == 0 or RANK == -1) and not os.path.exists(ckpt_dir):
+        os.makedirs(ckpt_dir)
+    ls = broadcast_object_list([ckpt_dir], from_process=0)
+    ckpt_dir = ls[0]
+    if model_map and (
+        type(config["model_name"]) == str and config["model_name"] in MODEL_MAP.keys()
+    ):
+        config = {**config, **MODEL_MAP[config["model_name"]]}
+        if RANK == 0 or RANK == -1:
+            wandb.config.update(MODEL_MAP[config["model_name"]], allow_val_change=True)
+    return run, config, ckpt_dir, RANK, CPU_CORES
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Train scOT or pretrain Poseidon.")
+    parser.add_argument("--resume_training", action="store_true")
+    parser.add_argument(
+        "--finetune_from",
+        type=str,
+        default=None,
+        help="Set this to a str pointing to a HF Hub model checkpoint or a directory with a scOT checkpoint if you want to finetune.",
+    )
+    parser.add_argument(
+        "--replace_embedding_recovery",
+        action="store_true",
+        help="Set this if you have to replace the embeddings and recovery layers because you are not just using the density, velocity and pressure channels. Only relevant for finetuning.",
+    )
+    params = read_cli(parser).parse_args()
+    run, config, ckpt_dir, RANK, CPU_CORES = setup(params)
+    train_eval_set_kwargs = (
+        {"just_velocities": True}
+        if ("incompressible" in config["dataset"]) and params.just_velocities
+        else {}
+    )
+    if params.move_data is not None:
+        train_eval_set_kwargs["move_to_local_scratch"] = params.move_data
+    if params.max_num_train_time_steps is not None:
+        train_eval_set_kwargs["max_num_time_steps"] = params.max_num_train_time_steps
+    if params.train_time_step_size is not None:
+        train_eval_set_kwargs["time_step_size"] = params.train_time_step_size
+    if params.train_small_time_transition:
+        train_eval_set_kwargs["allowed_time_transitions"] = [1]
+    train_dataset = get_dataset(
+        dataset=config["dataset"],
+        which="train",
+        num_trajectories=config["num_trajectories"],
+        data_path=params.data_path,
+        **train_eval_set_kwargs,
+    )
+    eval_dataset = get_dataset(
+        dataset=config["dataset"],
+        which="val",
+        num_trajectories=config["num_trajectories"],
+        data_path=params.data_path,
+        **train_eval_set_kwargs,
+    )
+    config["effective_train_set_size"] = len(train_dataset)
+    time_involved = isinstance(train_dataset, BaseTimeDataset) or (
+        isinstance(train_dataset, torch.utils.data.ConcatDataset)
+        and isinstance(train_dataset.datasets[0], BaseTimeDataset)
+    )
+    if not isinstance(train_dataset, torch.utils.data.ConcatDataset):
+        resolution = train_dataset.resolution
+        input_dim = train_dataset.input_dim
+        output_dim = train_dataset.output_dim
+        channel_slice_list = train_dataset.channel_slice_list
+        printable_channel_description = train_dataset.printable_channel_description
+    else:
+        resolution = train_dataset.datasets[0].resolution
+        input_dim = train_dataset.datasets[0].input_dim
+        output_dim = train_dataset.datasets[0].output_dim
+        channel_slice_list = train_dataset.datasets[0].channel_slice_list
+        printable_channel_description = train_dataset.datasets[
+            0
+        ].printable_channel_description
+    model_config = (
+        ScOTConfig(
+            image_size=resolution,
+            patch_size=config["patch_size"],
+            num_channels=input_dim,
+            num_out_channels=output_dim,
+            embed_dim=config["embed_dim"],
+            depths=config["depths"],
+            num_heads=config["num_heads"],
+            skip_connections=config["skip_connections"],
+            window_size=config["window_size"],
+            mlp_ratio=config["mlp_ratio"],
+            qkv_bias=True,
+            hidden_dropout_prob=0.0,  # default
+            attention_probs_dropout_prob=0.0,  # default
+            drop_path_rate=0.0,
+            hidden_act="gelu",
+            use_absolute_embeddings=False,
+            initializer_range=0.02,
+            layer_norm_eps=1e-5,
+            p=1,
+            channel_slice_list_normalized_loss=channel_slice_list,
+            residual_model="convnext",
+            use_conditioning=time_involved,
+            learn_residual=False,
+        )
+        if params.finetune_from is None or params.replace_embedding_recovery
+        else None
+    )
+    train_config = TrainingArguments(
+        output_dir=ckpt_dir,
+        overwrite_output_dir=True,  #! OVERWRITE THIS DIRECTORY IN CASE, also for resuming training
+        evaluation_strategy="epoch",
+        per_device_train_batch_size=config["batch_size"],
+        per_device_eval_batch_size=config["batch_size"],
+        eval_accumulation_steps=16,
+        max_grad_norm=config["max_grad_norm"],
+        num_train_epochs=config["num_epochs"],
+        optim="adamw_torch",
+        learning_rate=config["lr"],
+        learning_rate_embedding_recovery=(
+            None
+            if (params.finetune_from is None or "lr_embedding_recovery" not in config)
+            else config["lr_embedding_recovery"]
+        ),
+        learning_rate_time_embedding=(
+            None
+            if (params.finetune_from is None or "lr_time_embedding" not in config)
+            else config["lr_time_embedding"]
+        ),
+        weight_decay=config["weight_decay"],
+        adam_beta1=0.9,  # default
+        adam_beta2=0.999,  # default
+        adam_epsilon=1e-8,  # default
+        lr_scheduler_type=config["lr_scheduler"],
+        warmup_ratio=config["warmup_ratio"],
+        log_level="passive",
+        logging_strategy="steps",
+        logging_steps=5,
+        logging_nan_inf_filter=False,
+        save_strategy="epoch",
+        save_total_limit=1,
+        seed=SEED,
+        fp16=False,
+        dataloader_num_workers=CPU_CORES,
+        load_best_model_at_end=True,
+        metric_for_best_model="loss",
+        greater_is_better=False,
+        dataloader_pin_memory=True,
+        gradient_checkpointing=False,
+        auto_find_batch_size=False,
+        full_determinism=False,
+        torch_compile=False,
+        report_to="wandb",
+        run_name=params.wandb_run_name,
+    )
+    early_stopping = EarlyStoppingCallback(
+        early_stopping_patience=config["early_stopping_patience"],
+        early_stopping_threshold=0.0,  # set no threshold for now
+    )
+    if params.finetune_from is not None:
+        model = ScOT.from_pretrained(
+            params.finetune_from, config=model_config, ignore_mismatched_sizes=True
+        )
+    else:
+        model = ScOT(model_config)
+    num_params = get_num_parameters(model)
+    config["num_params"] = num_params
+    num_params_no_embed = get_num_parameters_no_embed(model)
+    config["num_params_wout_embed"] = num_params_no_embed
+    if RANK == 0 or RANK == -1:
+        print(f"Model size: {num_params}")
+        print(f"Model size without embeddings: {num_params_no_embed}")
+    def compute_metrics(eval_preds):
+        channel_list = channel_slice_list
+        def get_statistics(errors):
+            median_error = np.median(errors, axis=0)
+            mean_error = np.mean(errors, axis=0)
+            std_error = np.std(errors, axis=0)
+            min_error = np.min(errors, axis=0)
+            max_error = np.max(errors, axis=0)
+            return {
+                "median_relative_l1_error": median_error,
+                "mean_relative_l1_error": mean_error,
+                "std_relative_l1_error": std_error,
+                "min_relative_l1_error": min_error,
+                "max_relative_l1_error": max_error,
+            }
+        error_statistics = [
+            get_statistics(
+                relative_lp_error(
+                    eval_preds.predictions[:, channel_list[i] : channel_list[i + 1]],
+                    eval_preds.label_ids[:, channel_list[i] : channel_list[i + 1]],
+                    p=1,
+                    return_percent=True,
+                )
+            )
+            for i in range(len(channel_list) - 1)
+        ]
+        if output_dim == 1:
+            error_statistics = error_statistics[0]
+            return error_statistics
+        else:
+            mean_over_means = np.mean(
+                np.array(
+                    [stats["mean_relative_l1_error"] for stats in error_statistics]
+                ),
+                axis=0,
+            )
+            mean_over_medians = np.mean(
+                np.array(
+                    [stats["median_relative_l1_error"] for stats in error_statistics]
+                ),
+                axis=0,
+            )
+            error_statistics_ = {
+                "mean_relative_l1_error": mean_over_means,
+                "mean_over_median_relative_l1_error": mean_over_medians,
+            }
+            for i, stats in enumerate(error_statistics):
+                for key, value in stats.items():
+                    error_statistics_[printable_channel_description[i] + "/" + key] = (
+                        value
+                    )
+            return error_statistics_
+    trainer = Trainer(
+        model=model,
+        args=train_config,
+        train_dataset=train_dataset,
+        eval_dataset=eval_dataset,
+        compute_metrics=compute_metrics,
+        callbacks=[early_stopping],
+    )
+    trainer.train(resume_from_checkpoint=params.resume_training)
+    trainer.save_model(train_config.output_dir)
+    if (RANK == 0 or RANK == -1) and params.push_to_hf_hub is not None:
+        model.push_to_hub(params.push_to_hf_hub)
+    do_test = (
+        True
+        if params.max_num_train_time_steps is None
+        and params.train_time_step_size is None
+        and not params.train_small_time_transition
+        and not ".time" in config["dataset"]
+        else False
+    )
+    if do_test:
+        print("Testing...")
+        test_set_kwargs = (
+            {"just_velocities": True}
+            if ("incompressible" in config["dataset"]) and params.just_velocities
+            else {}
+        )
+        out_test_set_kwargs = (
+            {"just_velocities": True}
+            if ("incompressible" in config["dataset"]) and params.just_velocities
+            else {}
+        )
+        if params.move_data is not None:
+            test_set_kwargs["move_to_local_scratch"] = params.move_data
+            out_test_set_kwargs["move_to_local_scratch"] = params.move_data
+        if time_involved:
+            test_set_kwargs = {
+                **test_set_kwargs,
+                "max_num_time_steps": 1,
+                "time_step_size": 14,
+                "allowed_time_transitions": [1],
+            }
+            out_test_set_kwargs = {
+                **out_test_set_kwargs,
+                "max_num_time_steps": 1,
+                "time_step_size": 20,
+                "allowed_time_transitions": [1],
+            }
+        if "RayleighTaylor" in config["dataset"]:
+            test_set_kwargs = {
+                **test_set_kwargs,
+                "max_num_time_steps": 1,
+                "time_step_size": 7,
+                "allowed_time_transitions": [1],
+            }
+            out_test_set_kwargs = {
+                **out_test_set_kwargs,
+                "max_num_time_steps": 1,
+                "time_step_size": 10,
+                "allowed_time_transitions": [1],
+            }
+        test_dataset = get_dataset(
+            dataset=config["dataset"],
+            which="test",
+            num_trajectories=config["num_trajectories"],
+            data_path=params.data_path,
+            **test_set_kwargs,
+        )
+        try:
+            out_dist_test_dataset = get_dataset(
+                dataset=config["dataset"] + ".out",
+                which="test",
+                num_trajectories=config["num_trajectories"],
+                data_path=params.data_path,
+                **out_test_set_kwargs,
+            )
+        except:
+            out_dist_test_dataset = None
+        predictions = trainer.predict(test_dataset, metric_key_prefix="")
+        if RANK == 0 or RANK == -1:
+            metrics = {}
+            for key, value in predictions.metrics.items():
+                metrics["test/" + key[1:]] = value
+            wandb.log(metrics)
+            create_predictions_plot(
+                predictions.predictions,
+                predictions.label_ids,
+                wandb_prefix="test",
+            )
+        # evaluate on out-of-distribution test set
+        if out_dist_test_dataset is not None:
+            predictions = trainer.predict(out_dist_test_dataset, metric_key_prefix="")
+            if RANK == 0 or RANK == -1:
+                metrics = {}
+                for key, value in predictions.metrics.items():
+                    metrics["test_out_dist/" + key[1:]] = value
+                wandb.log(metrics)
+                create_predictions_plot(
+                    predictions.predictions,
+                    predictions.label_ids,
+                    wandb_prefix="test_out_dist",
+                )
+        if time_involved and (test_set_kwargs["time_step_size"] // 2 > 0):
+            trainer.set_ar_steps(test_set_kwargs["time_step_size"] // 2)
+            predictions = trainer.predict(test_dataset, metric_key_prefix="")
+            if RANK == 0 or RANK == -1:
+                metrics = {}
+                for key, value in predictions.metrics.items():
+                    metrics["test/ar/" + key[1:]] = value
+                wandb.log(metrics)
+                create_predictions_plot(
+                    predictions.predictions,
+                    predictions.label_ids,
+                    wandb_prefix="test/ar",
+                )
+            # evaluate on out-of-distribution test set
+            if out_dist_test_dataset is not None:
+                trainer.set_ar_steps(out_test_set_kwargs["time_step_size"] // 2)
+                predictions = trainer.predict(
+                    out_dist_test_dataset, metric_key_prefix=""
+                )
+                if RANK == 0 or RANK == -1:
+                    metrics = {}
+                    for key, value in predictions.metrics.items():
+                        metrics["test_out_dist/ar/" + key[1:]] = value
+                    wandb.log(metrics)
+                    create_predictions_plot(
+                        predictions.predictions,
+                        predictions.label_ids,
+                        wandb_prefix="test_out_dist/ar",
+                    )

external/poseidon/scOT/trainer.py ADDED Viewed

	@@ -0,0 +1,762 @@

+"""
+Our version of the Huggingface Trainer class.
+It adds learning_rate_time_embedding, learning_rate_embedding_recovery as
+additional learning rates and groups parameters for the optimizer.
+It also allows for autoregressive rollouts by using
+trainer.set_ar_steps(AR_STEPS) where AR_STEPS is either a an integer for a
+homogeneous rollout of AR_STEPS steps or a list of integers for a heterogeneous
+rollout where each element is the timestep.
+If, additionally, output_all_steps is also set, the predict function will
+output all intermediate steps as well.
+We sublass a Huggingface Trainer to allow for autoregressive rollouts and multiple parameter groups in the optimizer.
+It is specifically subclassed for our purpose.
+A lot of code is copied over because only slight changes have been made.
+The original code of Huggingface Transformers is distributed under the Apache 2.0 license. See below:
+Copyright 2018- The Hugging Face team. All rights reserved.
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+"""
+import torch
+from torch import nn
+from typing import List, Optional, Dict, Tuple, Union, Any
+from transformers.trainer import *
+from transformers import Trainer as Trainer_
+from transformers import TrainingArguments as TrainingArguments_
+from scOT.model import LayerNorm, ConditionalLayerNorm
+from dataclasses import dataclass, field
+@dataclass
+class TrainingArguments(TrainingArguments_):
+    learning_rate_embedding_recovery: Optional[float] = field(
+        default=None,
+        metadata={
+            "help": "The initial learning rate for the embedding/recovery. When not provided, falls back to `learning_rate`."
+        },
+    )
+    learning_rate_time_embedding: Optional[float] = field(
+        default=None,
+        metadata={
+            "help": "The initial learning rate for the time embedding. When not provided, falls back to `learning_rate`. Only used when embedding and recovery are also fine-tuned with different lr."
+        },
+    )
+    def set_training(
+        self,
+        *args,
+        learning_rate_embedding_recovery: Optional[float] = None,
+        learning_rate_time_embedding: Optional[float] = None,
+        **kwargs,
+    ):
+        self = super().set_training(*args, **kwargs)
+        self.learning_rate_embedding_recovery = learning_rate_embedding_recovery
+        self.learning_rate_time_embedding = learning_rate_time_embedding
+        return self
+    def set_optimizer(
+        self,
+        *args,
+        learning_rate_embedding_recovery: Optional[float] = None,
+        learning_rate_time_embedding: Optional[float] = None,
+        **kwargs,
+    ):
+        self = super().set_optimizer(*args, **kwargs)
+        self.learning_rate_embedding_recovery = learning_rate_embedding_recovery
+        self.learning_rate_time_embedding = learning_rate_time_embedding
+        return self
+class Trainer(Trainer_):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.ar_steps = None
+        self.output_all_steps = False
+    def get_decay_parameter_names(self, model) -> List[str]:
+        ALL_LAYERNORM_LAYERS = [torch.nn.LayerNorm, LayerNorm, ConditionalLayerNorm]
+        decay_parameters = get_parameter_names(model, ALL_LAYERNORM_LAYERS)
+        decay_parameters = [name for name in decay_parameters if "bias" not in name]
+        return decay_parameters
+    def get_conditional_norm_params(self, model):
+        params = []
+        for name, module in model.named_modules():
+            if isinstance(module, ConditionalLayerNorm):
+                for param_name, _ in module.named_parameters():
+                    params.append(f"{name}.{param_name}")
+        return params
+    def create_optimizer(self):
+        """This is the same as in the standard trainer, except param groups"""
+        opt_model = self.model_wrapped if is_sagemaker_mp_enabled() else self.model
+        if self.optimizer is None:
+            decay_parameters = self.get_decay_parameter_names(self.model)
+            if self.args.learning_rate_embedding_recovery is not None:
+                if self.args.learning_rate_time_embedding is not None:
+                    time_embedding_params = self.get_conditional_norm_params(self.model)
+                    params = {
+                        "standard": [],
+                        "no_weight_decay": [],
+                        "embeddings": [],
+                        "time_embedding": [],
+                    }
+                    for n, p in opt_model.named_parameters():
+                        if (
+                            "embeddings" in n or "patch_recovery" in n
+                        ) and p.requires_grad:
+                            params["embeddings"].append(p)
+                        elif n in decay_parameters and p.requires_grad:
+                            params["standard"].append(p)
+                        elif p.requires_grad:
+                            if n in time_embedding_params:
+                                params["time_embedding"].append(p)
+                            else:
+                                params["no_weight_decay"].append(p)
+                    optimizer_grouped_parameters = [
+                        {
+                            "params": params["standard"],
+                            "weight_decay": self.args.weight_decay,
+                        },
+                        {
+                            "params": params["no_weight_decay"],
+                            "weight_decay": 0.0,
+                        },
+                        {
+                            "params": params["embeddings"],
+                            "lr": self.args.learning_rate_embedding_recovery,
+                            "weight_decay": self.args.weight_decay,
+                        },
+                        {
+                            "params": params["time_embedding"],
+                            "lr": self.args.learning_rate_time_embedding,
+                            "weight_decay": 0.0,
+                        },
+                    ]
+                else:
+                    params = {"standard": [], "no_weight_decay": [], "embeddings": []}
+                    for n, p in opt_model.named_parameters():
+                        if (
+                            "embeddings" in n or "patch_recovery" in n
+                        ) and p.requires_grad:
+                            params["embeddings"].append(p)
+                        elif n in decay_parameters and p.requires_grad:
+                            params["standard"].append(p)
+                        elif p.requires_grad:
+                            params["no_weight_decay"].append(p)
+                    optimizer_grouped_parameters = [
+                        {
+                            "params": params["standard"],
+                            "weight_decay": self.args.weight_decay,
+                        },
+                        {
+                            "params": params["no_weight_decay"],
+                            "weight_decay": 0.0,
+                        },
+                        {
+                            "params": params["embeddings"],
+                            "lr": self.args.learning_rate_embedding_recovery,
+                            "weight_decay": self.args.weight_decay,
+                        },
+                    ]
+            elif self.args.learning_rate_time_embedding is not None:
+                time_embedding_params = self.get_conditional_norm_params(self.model)
+                params = {"standard": [], "no_weight_decay": [], "time_embedding": []}
+                for n, p in opt_model.named_parameters():
+                    if n in decay_parameters and p.requires_grad:
+                        params["standard"].append(p)
+                    elif p.requires_grad:
+                        if n in time_embedding_params:
+                            params["time_embedding"].append(p)
+                        else:
+                            params["no_weight_decay"].append(p)
+                optimizer_grouped_parameters = [
+                    {
+                        "params": params["standard"],
+                        "weight_decay": self.args.weight_decay,
+                    },
+                    {
+                        "params": params["no_weight_decay"],
+                        "weight_decay": 0.0,
+                    },
+                    {
+                        "params": params["time_embedding"],
+                        "lr": self.args.learning_rate_time_embedding,
+                        "weight_decay": 0.0,
+                    },
+                ]
+            else:
+                optimizer_grouped_parameters = [
+                    {
+                        "params": [
+                            p
+                            for n, p in opt_model.named_parameters()
+                            if (n in decay_parameters and p.requires_grad)
+                        ],
+                        "weight_decay": self.args.weight_decay,
+                    },
+                    {
+                        "params": [
+                            p
+                            for n, p in opt_model.named_parameters()
+                            if (n not in decay_parameters and p.requires_grad)
+                        ],
+                        "weight_decay": 0.0,
+                    },
+                ]
+            optimizer_cls, optimizer_kwargs = Trainer.get_optimizer_cls_and_kwargs(
+                self.args
+            )
+            self.optimizer = optimizer_cls(
+                optimizer_grouped_parameters, **optimizer_kwargs
+            )
+            if optimizer_cls.__name__ == "Adam8bit":
+                import bitsandbytes
+                manager = bitsandbytes.optim.GlobalOptimManager.get_instance()
+                skipped = 0
+                for module in opt_model.modules():
+                    if isinstance(module, nn.Embedding):
+                        skipped += sum(
+                            {
+                                p.data_ptr(): p.numel() for p in module.parameters()
+                            }.values()
+                        )
+                        print(f"skipped {module}: {skipped/2**20}M params")
+                        manager.register_module_override(
+                            module, "weight", {"optim_bits": 32}
+                        )
+                        logger.debug(
+                            f"bitsandbytes: will optimize {module} in fp32"
+                        )
+                print(f"skipped: {skipped/2**20}M params")
+        if is_sagemaker_mp_enabled():
+            self.optimizer = smp.DistributedOptimizer(self.optimizer)
+        return self.optimizer
+    def set_ar_steps(self, ar_steps=None, output_all_steps=False):
+        self.ar_steps = ar_steps
+        if self.ar_steps is not None and output_all_steps:
+            self.output_all_steps = True
+    def _model_forward(self, model, inputs):
+        if self.ar_steps is not None and model.config.use_conditioning:
+            channel_difference = (
+                model.config.num_channels > model.config.num_out_channels
+            )
+            # TODO: if outputs is not a dataclass this will break
+            if isinstance(self.ar_steps, int):
+                inputs = {**inputs, **{"time": inputs["time"] / self.ar_steps}}
+                if self.output_all_steps:
+                    loss_ = []
+                    outputs_ = []
+                    hidden_states_ = []
+                    attentions_ = []
+                    reshaped_hidden_states_ = []
+                else:
+                    loss = 0
+                for i in range(self.ar_steps):
+                    outputs = model(**inputs)
+                    if self.output_all_steps:
+                        outputs_.append(outputs.output.detach())
+                        if outputs.hidden_states is not None:
+                            hidden_states_.append(outputs.hidden_states)
+                        if outputs.attentions is not None:
+                            attentions_.append(outputs.attentions)
+                        if outputs.reshaped_hidden_states is not None:
+                            reshaped_hidden_states_.append(
+                                outputs.reshaped_hidden_states
+                            )
+                        if outputs.loss is not None:
+                            loss_.append(outputs.loss)
+                    else:
+                        if outputs.loss is not None:
+                            loss += outputs.loss
+                    inputs = {
+                        **inputs,
+                        **{
+                            "pixel_values": (
+                                outputs.output.detach()
+                                if not channel_difference
+                                else torch.cat(
+                                    [
+                                        outputs.output.detach(),
+                                        inputs["pixel_values"][
+                                            :,
+                                            model.config.num_out_channels :,
+                                        ],
+                                    ],
+                                    dim=1,
+                                )
+                            )
+                        },
+                    }
+                if self.output_all_steps:
+                    outputs.output = torch.stack(outputs_, dim=1)
+                    if len(loss_) > 0:
+                        outputs.loss = torch.stack(loss_, dim=0)
+                    if len(hidden_states_) > 0:
+                        outputs.hidden_states = [
+                            torch.stack(hs, dim=1) for hs in zip(*hidden_states_)
+                        ]
+                    if len(attentions_) > 0:
+                        outputs.attentions = [
+                            torch.stack(att, dim=1) for att in zip(*attentions_)
+                        ]
+                    if len(reshaped_hidden_states_) > 0:
+                        outputs.reshaped_hidden_states = [
+                            torch.stack(rhs, dim=1)
+                            for rhs in zip(*reshaped_hidden_states_)
+                        ]
+                else:
+                    loss /= self.ar_steps
+                    outputs.loss = loss
+            elif isinstance(self.ar_steps, list):
+                if self.output_all_steps:
+                    loss_ = []
+                    outputs_ = []
+                    hidden_states_ = []
+                    attentions_ = []
+                    reshaped_hidden_states_ = []
+                else:
+                    loss = 0
+                lead_time = inputs["time"]
+                for i in self.ar_steps:
+                    inputs = {
+                        **inputs,
+                        **{"time": lead_time * i},
+                    }
+                    outputs = model(**inputs)
+                    if self.output_all_steps:
+                        outputs_.append(outputs.output.detach())
+                    if self.output_all_steps:
+                        outputs_.append(outputs.output.detach())
+                        if outputs.hidden_states is not None:
+                            hidden_states_.append(outputs.hidden_states)
+                        if outputs.attentions is not None:
+                            attentions_.append(outputs.attentions)
+                        if outputs.reshaped_hidden_states is not None:
+                            reshaped_hidden_states_.append(
+                                outputs.reshaped_hidden_states
+                            )
+                        if outputs.loss is not None:
+                            loss_.append(outputs.loss)
+                    else:
+                        if outputs.loss is not None:
+                            loss += outputs.loss
+                    inputs = {
+                        **inputs,
+                        **{
+                            "pixel_values": (
+                                outputs.output.detach()
+                                if not channel_difference
+                                else torch.cat(
+                                    [
+                                        outputs.output.detach(),
+                                        inputs["pixel_values"][
+                                            :,
+                                            model.config.num_out_channels :,
+                                        ],
+                                    ],
+                                    dim=1,
+                                )
+                            )
+                        },
+                    }
+                if self.output_all_steps:
+                    outputs.output = torch.stack(outputs_, dim=1)
+                    if len(loss_) > 0:
+                        outputs.loss = torch.stack(loss_, dim=1)
+                    if len(hidden_states_) > 0:
+                        outputs.hidden_states = [
+                            torch.stack(hs, dim=1) for hs in zip(*hidden_states_)
+                        ]
+                    if len(attentions_) > 0:
+                        outputs.attentions = [
+                            torch.stack(att, dim=1) for att in zip(*attentions_)
+                        ]
+                    if len(reshaped_hidden_states_) > 0:
+                        outputs.reshaped_hidden_states = [
+                            torch.stack(rhs, dim=1)
+                            for rhs in zip(*reshaped_hidden_states_)
+                        ]
+                else:
+                    loss /= len(self.ar_steps)
+                    outputs.loss = loss
+            else:
+                raise ValueError(
+                    "num_ar_steps must be an integer or a list of integers."
+                )
+        else:
+            outputs = model(**inputs)
+        return outputs
+    def compute_loss(self, model, inputs, return_outputs=False):
+        if self.label_smoother is not None and "labels" in inputs:
+            labels = inputs.pop("labels")
+        else:
+            labels = None
+        outputs = self._model_forward(model, inputs)
+        # Save past state if it exists
+        # TODO: this needs to be fixed and made cleaner later.
+        if self.args.past_index >= 0:
+            self._past = outputs[self.args.past_index]
+        if labels is not None:
+            unwrapped_model = unwrap_model(model)
+            if _is_peft_model(unwrapped_model):
+                model_name = unwrapped_model.base_model.model._get_name()
+            else:
+                model_name = unwrapped_model._get_name()
+            if model_name in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES.values():
+                loss = self.label_smoother(outputs, labels, shift_labels=True)
+            else:
+                loss = self.label_smoother(outputs, labels)
+        else:
+            if isinstance(outputs, dict) and "loss" not in outputs:
+                raise ValueError(
+                    "The model did not return a loss from the inputs, only the following keys: "
+                    f"{','.join(outputs.keys())}. For reference, the inputs it received are {','.join(inputs.keys())}."
+                )
+            # We don't use .loss here since the model may return tuples instead of ModelOutput.
+            loss = outputs["loss"] if isinstance(outputs, dict) else outputs[0]
+        return (loss, outputs) if return_outputs else loss
+    def prediction_step(
+        self,
+        model: nn.Module,
+        inputs: Dict[str, Union[torch.Tensor, Any]],
+        prediction_loss_only: bool,
+        ignore_keys: Optional[List[str]] = None,
+    ) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
+        """
+        Perform an evaluation step on `model` using `inputs`.
+        Subclass and override to inject custom behavior.
+        Args:
+            model (`nn.Module`):
+                The model to evaluate.
+            inputs (`Dict[str, Union[torch.Tensor, Any]]`):
+                The inputs and targets of the model.
+                The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
+                argument `labels`. Check your model's documentation for all accepted arguments.
+            prediction_loss_only (`bool`):
+                Whether or not to return the loss only.
+            ignore_keys (`List[str]`, *optional*):
+                A list of keys in the output of your model (if it is a dictionary) that should be ignored when
+                gathering predictions.
+        Return:
+            Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]: A tuple with the loss,
+            logits and labels (each being optional).
+        """
+        has_labels = (
+            False
+            if len(self.label_names) == 0
+            else all(inputs.get(k) is not None for k in self.label_names)
+        )
+        # For CLIP-like models capable of returning loss values.
+        # If `return_loss` is not specified or being `None` in `inputs`, we check if the default value of `return_loss`
+        # is `True` in `model.forward`.
+        return_loss = inputs.get("return_loss", None)
+        if return_loss is None:
+            return_loss = self.can_return_loss
+        loss_without_labels = (
+            True if len(self.label_names) == 0 and return_loss else False
+        )
+        inputs = self._prepare_inputs(inputs)
+        if ignore_keys is None:
+            if hasattr(self.model, "config"):
+                ignore_keys = getattr(
+                    self.model.config, "keys_to_ignore_at_inference", []
+                )
+            else:
+                ignore_keys = []
+        # labels may be popped when computing the loss (label smoothing for instance) so we grab them first.
+        if has_labels or loss_without_labels:
+            labels = nested_detach(tuple(inputs.get(name) for name in self.label_names))
+            if len(labels) == 1:
+                labels = labels[0]
+        else:
+            labels = None
+        with torch.no_grad():
+            if is_sagemaker_mp_enabled():
+                raw_outputs = smp_forward_only(model, inputs)
+                if has_labels or loss_without_labels:
+                    if isinstance(raw_outputs, dict):
+                        loss_mb = raw_outputs["loss"]
+                        logits_mb = tuple(
+                            v
+                            for k, v in raw_outputs.items()
+                            if k not in ignore_keys + ["loss"]
+                        )
+                    else:
+                        loss_mb = raw_outputs[0]
+                        logits_mb = raw_outputs[1:]
+                    loss = loss_mb.reduce_mean().detach().cpu()
+                    logits = smp_nested_concat(logits_mb)
+                else:
+                    loss = None
+                    if isinstance(raw_outputs, dict):
+                        logits_mb = tuple(
+                            v for k, v in raw_outputs.items() if k not in ignore_keys
+                        )
+                    else:
+                        logits_mb = raw_outputs
+                    logits = smp_nested_concat(logits_mb)
+            else:
+                if has_labels or loss_without_labels:
+                    with self.compute_loss_context_manager():
+                        loss, outputs = self.compute_loss(
+                            model, inputs, return_outputs=True
+                        )
+                    loss = loss.mean().detach()
+                    if isinstance(outputs, dict):
+                        logits = tuple(
+                            v
+                            for k, v in outputs.items()
+                            if k not in ignore_keys + ["loss"]
+                        )
+                    else:
+                        logits = outputs[1:]
+                else:
+                    loss = None
+                    with self.compute_loss_context_manager():
+                        outputs = self._model_forward(model, inputs)
+                    if isinstance(outputs, dict):
+                        logits = tuple(
+                            v for k, v in outputs.items() if k not in ignore_keys
+                        )
+                    else:
+                        logits = outputs
+                    # TODO: this needs to be fixed and made cleaner later.
+                    if self.args.past_index >= 0:
+                        self._past = outputs[self.args.past_index - 1]
+        if prediction_loss_only:
+            return (loss, None, None)
+        logits = nested_detach(logits)
+        if len(logits) == 1:
+            logits = logits[0]
+        return (loss, logits, labels)

external/poseidon/scOT/utils.py ADDED Viewed

	@@ -0,0 +1,97 @@

+"""Utility functions."""
+def read_cli(parser):
+    """Reads command line arguments."""
+    parser.add_argument(
+        "--config",
+        type=str,
+        required=True,
+        help="Path to config file or JSON string",
+    )
+    parser.add_argument(
+        "--json_config",
+        action="store_true",
+        help="Whether the config is a JSON string",
+    )
+    parser.add_argument(
+        "--wandb_run_name",
+        type=str,
+        required=False,
+        default=None,
+        help="Name of the run in wandb",
+    )
+    parser.add_argument(
+        "--wandb_project_name",
+        type=str,
+        default="scOT",
+        help="Name of the wandb project",
+    )
+    parser.add_argument(
+        "--max_num_train_time_steps",
+        type=int,
+        default=None,
+        help="Maximum number of time steps to use for training and validation.",
+    )
+    parser.add_argument(
+        "--train_time_step_size",
+        type=int,
+        default=None,
+        help="Time step size to use for training and validation.",
+    )
+    parser.add_argument(
+        "--train_small_time_transition",
+        action="store_true",
+        help="Whether to train only for next step prediction.",
+    )
+    parser.add_argument(
+        "--data_path",
+        type=str,
+        required=True,
+        help="Base path to data.",
+    )
+    parser.add_argument(
+        "--checkpoint_path",
+        type=str,
+        required=True,
+        help="Path to checkpoint directory. Will be prepended by wandb project and run name.",
+    )
+    parser.add_argument(
+        "--disable_tqdm",
+        action="store_true",
+        help="Whether to disable tqdm progress bar",
+    )
+    parser.add_argument(
+        "--push_to_hf_hub",
+        type=str,
+        default=None,
+        help="Whether to push the model to Huggingface Hub. Specify the model repository name.",
+    )
+    parser.add_argument(
+        "--just_velocities",
+        action="store_true",
+        help="Whether to only use velocities as input. Only relevant for incompressible flow datasets.",
+    )
+    parser.add_argument(
+        "--move_data",
+        type=str,
+        default=None,
+        help="If set, moves the data to this directory and trains from there.",
+    )
+    return parser
+def get_num_parameters(model):
+    """Returns the number of trainable parameters in a model."""
+    return sum(p.numel() for p in model.parameters() if p.requires_grad)
+def get_num_parameters_no_embed(model):
+    """Returns the number of trainable parameters in a scOT model without embedding and recovery."""
+    out = 0
+    for name, p in model.named_parameters():
+        if not ("embeddings" in name or "patch_recovery" in name) and p.requires_grad:
+            out += p.numel()
+    return out

poseidon_model.py ADDED Viewed

	@@ -0,0 +1,211 @@

+import sys
+import os
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import xarray as xr
+from huggingface_hub import hf_hub_download
+from torchvision.transforms.functional import resize
+sys.path.append(os.path.abspath("poseidon_demo/external/poseidon"))
+from scOT.model import ScOT, ScOTConfig
+def load_model():
+    """
+    Initializes and loads a POSEIDON model with fixed configuration.
+    Returns:
+        model (ScOT): An instance of the POSEIDON model in evaluation mode.
+    """
+    config = ScOTConfig(
+        num_channels=4,
+        skip_connections=[True, True, True, True]
+    )
+    model = ScOT(config)
+    model.eval()
+    return model
+def run_inference_by_domain(model, domain):
+    """
+    Runs the model on a synthetic input based on the chosen domain.
+    Args:
+        model (ScOT): The POSEIDON model.
+        domain (str): Domain to simulate input for. One of: 'Fluid Dynamics', 'Finance', 'Quantum', 'Biology / Medicine'.
+    Returns:
+        np.ndarray: The predicted model output.
+    """
+    if domain == "Fluid Dynamics":
+        x = torch.linspace(-1, 1, 224)
+        y = torch.linspace(-1, 1, 224)
+        X, Y = torch.meshgrid(x, y, indexing="ij")
+        blob = torch.exp(-(X**2 + Y**2) * 10)
+        input_tensor = blob.expand(4, 224, 224).unsqueeze(0)
+    elif domain == "Finance":
+        base = torch.linspace(0, 1, 224).reshape(1, -1).repeat(224, 1)
+        noise = torch.randn(4, 224, 224) * 0.05
+        input_tensor = (base + noise).unsqueeze(0)
+    elif domain == "Quantum":
+        x = torch.linspace(0, 4 * torch.pi, 224)
+        y = torch.linspace(0, 4 * torch.pi, 224)
+        X, Y = torch.meshgrid(x, y, indexing="ij")
+        sin_grid = torch.sin(X) * torch.sin(Y)
+        input_tensor = sin_grid.expand(4, 224, 224).unsqueeze(0)
+    elif domain == "Biology / Medicine":
+        x = torch.linspace(-1, 1, 224)
+        y = torch.linspace(-1, 1, 224)
+        X, Y = torch.meshgrid(x, y, indexing="ij")
+        base_blob = torch.exp(-(X**2 + Y**2) * 5)
+        blob = torch.randn(4, 224, 224) * 0.2 + base_blob
+        input_tensor = blob.unsqueeze(0)
+    else:
+        input_tensor = torch.randn(1, 4, 224, 224)
+    time_tensor = torch.tensor([0.0])
+    with torch.no_grad():
+        output = model(pixel_values=input_tensor, time=time_tensor).output
+    return output.squeeze().numpy()
+def run_inference_on_dataset(model, dataset_name):
+    """
+    Downloads and runs inference on a real scientific dataset using POSEIDON.
+    Args:
+        model (ScOT): The POSEIDON model.
+        dataset_name (str): Identifier for the dataset.
+    Returns:
+        tuple: (input_array, output_array) as numpy arrays.
+    """
+    dataset_mapping = {
+        "fluids.incompressible.Sines": {
+            "repo_id": "camlab-ethz/NS-Sines",
+            "filename": "velocity_0.nc",
+            "variable": "velocity"
+        },
+        "fluids.compressible.Riemann": {
+            "repo_id": "camlab-ethz/CE-RP",
+            "filename": "data_0.nc",
+            "variable": "data"
+        },
+        "reaction_diffusion.AllenCahn": {
+            "repo_id": "camlab-ethz/ACE",
+            "filename": "solution_0.nc",
+            "variable": "solution"
+        }
+    }
+    entry = dataset_mapping.get(dataset_name)
+    if entry is None:
+        raise ValueError(f"Unknown dataset name: {dataset_name}")
+    file_path = hf_hub_download(
+        repo_id=entry["repo_id"],
+        filename=entry["filename"],
+        repo_type="dataset"
+    )
+    ds = xr.open_dataset(file_path)
+    var = ds[entry["variable"]]
+    print(f"Loaded shape: {var.shape}, dims: {var.dims}")
+    if "sample" in var.dims:
+        sample = var.isel(sample=0, time=0).values.astype(np.float32)
+    else:
+        sample = var.isel(time=0).values.astype(np.float32)
+    if sample.ndim > 3:
+        sample = np.squeeze(sample)
+    while sample.ndim < 3:
+        sample = np.expand_dims(sample, 0)
+    tensor = torch.tensor(sample)
+    if tensor.shape[-1] != 224 or tensor.shape[-2] != 224:
+        tensor = resize(tensor, size=[224, 224])
+    if tensor.shape[0] < 4:
+        pad = 4 - tensor.shape[0]
+        extra = torch.zeros((pad, 224, 224))
+        tensor = torch.cat([tensor, extra], dim=0)
+    elif tensor.shape[0] > 4:
+        tensor = tensor[:4]
+    input_tensor = tensor.unsqueeze(0)
+    time_tensor = torch.tensor([0.0])
+    with torch.no_grad():
+        output = model(pixel_values=input_tensor, time=time_tensor).output
+    return tensor.squeeze().numpy(), output.squeeze().numpy()
+def plot_output(output_array, cmap="inferno", contrast=2.0):
+    """
+    Plots the output array from the model using a heatmap.
+    Args:
+        output_array (np.ndarray): Output from the model.
+        cmap (str): Colormap used for visualization.
+        contrast (float): Contrast scaling factor.
+    Returns:
+        matplotlib.figure.Figure: The heatmap figure.
+    """
+    output_array = output_array - output_array.min()
+    output_array = output_array / output_array.max()
+    output_array = output_array ** contrast
+    fig, ax = plt.subplots(figsize=(6, 5))
+    sns.heatmap(
+        output_array,
+        ax=ax,
+        cmap=cmap,
+        cbar=True,
+        square=True,
+        xticklabels=False,
+        yticklabels=False,
+        linewidths=0,
+    )
+    ax.set_title("POSEIDON Output")
+    ax.axis("off")
+    return fig
+def plot_comparison(input_array, output_array, cmap="inferno"):
+    """
+    Plots a side-by-side comparison of the input and the model output.
+    Args:
+        input_array (np.ndarray): Ground truth or input data.
+        output_array (np.ndarray): Output predicted by the model.
+        cmap (str): Colormap used for both plots.
+    Returns:
+        matplotlib.figure.Figure: Figure showing input vs output.
+    """
+    fig, axs = plt.subplots(1, 2, figsize=(10, 4))
+    axs[0].imshow(input_array[0], cmap=cmap)
+    axs[0].set_title("Ground Truth")
+    axs[0].axis("off")
+    axs[1].imshow(output_array, cmap=cmap)
+    axs[1].set_title("POSEIDON Prediction")
+    axs[1].axis("off")
+    plt.tight_layout()
+    return fig

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+gradio
+matplotlib
+numpy
+torch
+torchvision
+scipy
+plotly
+seaborn
+huggingface_hub
+xarray

simulations.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import matplotlib.pyplot as plt
+import numpy as np
+from scipy.stats import norm
+def finance_demo():
+    """
+    Simulates a Black-Scholes pricing scenario for European call options.
+    Returns:
+        matplotlib.figure.Figure: A plot of option price vs stock price using the Black-Scholes formula.
+    """
+    fig, ax = plt.subplots()
+    S = np.linspace(1, 100, 100)
+    K = 50  # strike price
+    T = 1   # time to maturity
+    r = 0.05  # risk-free rate
+    sigma = 0.2  # volatility
+    d1 = (np.log(S / K) + (r + sigma**2 / 2) * T) / (sigma * np.sqrt(T))
+    d2 = d1 - sigma * np.sqrt(T)
+    call_price = S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
+    ax.plot(S, call_price)
+    ax.set_title("Black-Scholes Call Option Price")
+    ax.set_xlabel("Stock Price")
+    ax.set_ylabel("Option Price")
+    return fig
+def quantum_demo():
+    """
+    Simulates a 1D quantum wavefunction as a product of a Gaussian envelope and a cosine wave.
+    Returns:
+        matplotlib.figure.Figure: A plot representing a wavefunction in space.
+    """
+    x = np.linspace(-5, 5, 500)
+    t = 0.1
+    psi = np.exp(-x**2) * np.cos(5 * x - t)
+    fig, ax = plt.subplots()
+    ax.plot(x, psi)
+    ax.set_title("Wavefunction: Particle in a Potential")
+    ax.set_xlabel("Position")
+    ax.set_ylabel("Amplitude")
+    return fig
+def fluid_demo():
+    """
+    Simulates a 1D velocity field representing wave-like fluid behavior.
+    Returns:
+        matplotlib.figure.Figure: A sine wave representing fluid velocity over space.
+    """
+    x = np.linspace(0, 2 * np.pi, 100)
+    t = 1.0
+    u = np.sin(x - t)
+    fig, ax = plt.subplots()
+    ax.plot(x, u)
+    ax.set_title("1D Fluid Velocity Field")
+    ax.set_xlabel("x")
+    ax.set_ylabel("u(x, t)")
+    return fig
+def bio_demo():
+    """
+    Simulates a reaction-diffusion pattern, commonly seen in developmental biology.
+    Returns:
+        matplotlib.figure.Figure: A morphogen concentration gradient over space.
+    """
+    x = np.linspace(0, 1, 100)
+    t = 0.1
+    u = np.exp(-10 * (x - 0.5) ** 2) * np.exp(-t)
+    fig, ax = plt.subplots()
+    ax.plot(x, u)
+    ax.set_title("Reaction-Diffusion: Morphogen Gradient")
+    ax.set_xlabel("Position")
+    ax.set_ylabel("Concentration")
+    return fig