@inproceedings{Gong:StabilityPreservingCompression:2025, author = {Gong, Qian and Ainsworth, Mark and Chen, Jieyang and Liang, Xin and Zhu, Liangji and Klasky, Ethan and Athawale, Tushar and Liu, Qing and Rangarajan, Anand and Ranka, Sanjay and Klasky, Scott}, title = {Stability-preserving Lossy Compression for Large-scale Partial Differential Equations}, year = {2025}, isbn = {9798400714665}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3712285.3759878}, doi = {10.1145/3712285.3759878}, abstract = {Checkpoint/Restart (C/R) strategies are vital for fault tolerance in PDE-based scientific simulations, yet traditional checkpointing incurs significant I/O overhead. Lossy compression offers a scalable solution by reducing checkpoint data size, but conventional methods often lack control over physical invariants (e.g., energy), leading to instability such as oscillations or divergence in Partial Differential Equations (PDE) systems. This paper introduces a stability-preserving compression approach tailored for PDE simulations by explicitly controlling kinetic and potential energy perturbations to ensure stable restarts. Extensive experiments conducted across diverse PDE configurations demonstrate that our method maintains numerical stability with minimal error magnification—even across multiple checkpoint-restart cycles—outperforming state-of-the-art lossy compressors. Parallel evaluations on the Frontier supercomputer show up to 8.4\texttimes{} improvement in checkpoint write performance and 6.3\texttimes{} in read performance, while maintaining relative L2 errors ∼ 2e-6 throughout continued simulation. These results provide practical guidance for balancing compression accuracy, stability, and computational efficiency in large-scale PDE applications.}, booktitle = {Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis}, pages = {1992–2005}, numpages = {14}, keywords = {Checkpoint-restart, lossy compression, stability preservation, large-scale PDEs}, location = { }, series = {SC '25} }