Issue with checkpoints

Hello everybody !

I’m running a simulation and I want to get data at certain time, so I tried using checkpoints.
Getting to high duration, I felt like my simulation was starting over everytime instead of starting from the checkpoint.
Here is a simplified version of my code :

import basix
import festim as F
import ufl
import numpy as np
import numpy.typing as npt
import matplotlib.pyplot as plt
from mpi4py import MPI
import scipy as scp
import adios4dolfinx
import sys

import dolfinx
from dolfinx import fem
from dolfinx.io import VTXWriter

assert MPI.COMM_WORLD.size == 1

temps = [1.5, 3, 4.5, 6]
data_dir = "data_simu_test/"

for i in range(len(temps)):

    model = F.HydrogenTransportProblem()
    n = 1000
    vertice = []
    L = 10e-5

    vertice = np.concatenate(
        [
            np.linspace(0, 100e-9, num=100, endpoint=False),
            np.linspace(1e-7, 1e-5, num=198, endpoint=False),
            np.linspace(1e-5, L, num=200),
        ]
    )

    model.mesh = F.Mesh1D(vertice)  


    difW = 1.9e-7
    E_diff = 0.2
    tungst = F.Material(name="W", D_0=difW, E_D=E_diff)
    vol = F.VolumeSubdomain1D(id=1, borders=[0, L], material=tungst)
    surf_left = F.SurfaceSubdomain1D(id=1, x=0)
    surf_right = F.SurfaceSubdomain1D(id=2, x=L)

    W_atom_density = 19.25e3 / (
        183.84 * 1.66e-27
    )

    model.subdomains = [vol, surf_left, surf_right]

    mobile_H = F.Species("H")
    mobile_D = F.Species("D")

    trapH = F.Species("trapH", mobile=False)
    trapD = F.Species("trapD", mobile=False)
    def density_trap1(mod=np) : return lambda x: 0.002 * W_atom_density
    density_trap1_ufl = density_trap1(mod=ufl)
    trapA = F.ImplicitSpecies(n=density_trap1_ufl, others=[trapD, trapH], name="trapA")


    model.species = [
        mobile_D,
        mobile_H,
        trapD,
        trapH,
    ]

    if i != 0:
        #if it's not the first iteration of the loop, use the checkpoint to start from where the previous simulation ended
        model.initial_conditions = [
            F.InitialCondition(
                value=F.read_function_from_file(
                    filename=data_dir + "checkpoint.bp",
                    name="H",
                    timestamp=temps[i-1]*3600,
                ),
                species=mobile_H,
            ),
            F.InitialCondition(
                value=F.read_function_from_file(
                    filename=data_dir + "checkpoint.bp",
                    name="D",
                    timestamp=temps[i-1]*3600,
                ),
                species=mobile_D,
            ),
        ]


    model.reactions = [
        F.Reaction(  # trap A D
            k_0=1e13 / (6*W_atom_density),
            E_k=E_diff,
            p_0=1e13,
            E_p=1.5,
            reactant=[mobile_D, trapA],
            product=trapD,
            volume=vol,
        ),
        F.Reaction(  # trap A H
            k_0=1e13 / W_atom_density,
            E_k=E_diff,
            p_0=1e13,
            E_p=1.5,
            reactant=[mobile_H, trapA],
            product=trapH,
            volume=vol,
        ),
    ]

    model.boundary_conditions = [
        F.DirichletBC(subdomain=surf_left, value=2e16, species=mobile_D),
        F.DirichletBC(subdomain=surf_left, value=5e16, species=mobile_H),
        F.DirichletBC(subdomain=surf_right, value=0, species=mobile_D),
        F.DirichletBC(subdomain=surf_right, value=0, species=mobile_H),
    ]

    model.temperature = 600

    invent_mobD = F.TotalVolume(field=mobile_D, volume=vol)
    invent_trapAD = F.TotalVolume(field=trapD, volume=vol)
    checkpoint1 = F.VTXSpeciesExport(filename=data_dir+"checkpoint.bp", field=[
        mobile_D,
        mobile_H], checkpoint=True )
    

    model.exports = [
        invent_mobD,
        invent_trapAD,
        checkpoint1,
    ]

    phase_s=temps[i]*3600
    model.settings = F.Settings(atol=1e8, rtol=1e-12, max_iterations=500, final_time=phase_s)
    model.settings.stepsize = F.Stepsize(
        initial_value=0.5,
        growth_factor=1.05,
        target_nb_iterations=20,
        cutback_factor=0.95,
        max_stepsize=100,
    )

    model.settings.stepsize.milestones= [phase_s*0.5, phase_s*0.75, phase_s*0.95, phase_s]

    model.initialise()
    model.run()

    np.savetxt(data_dir+"trap_D_"+str(temps[i])+".txt", np.array(trapD.post_processing_solution.x.array))

When running the code, I get the following in the terminal :
4_checkpoints

If I understand well, we can see the 4 step getting increasingly longer (5.4k, 10.8k, 16.2k and 21.6k) where (if I understood well) they should all be of the same length (all four being 1.5h simulated).

Thank you in advance for your help, hope it’s not just me getting everything wrong with checkpoints.

what version of FESTIM are you using?

I am using the fenicsx branch and I am up to date with all the recent changes.

Here’s a MWE running on v2.0-alpha-4

import festim as F
import numpy as np

temps = [1.5, 3, 4.5, 6]
data_dir = "data_simu_test/"

for i in range(len(temps)):

    model = F.HydrogenTransportProblem()
    L = 10e-5
    model.mesh = F.Mesh1D(np.linspace(0, L, num=200))

    tungst = F.Material(name="W", D_0=1, E_D=0)
    vol = F.VolumeSubdomain1D(id=1, borders=[0, L], material=tungst)
    surf_left = F.SurfaceSubdomain1D(id=1, x=0)
    surf_right = F.SurfaceSubdomain1D(id=2, x=L)

    model.subdomains = [vol, surf_left, surf_right]

    mobile_H = F.Species("H")
    mobile_D = F.Species("D")

    model.species = [mobile_D, mobile_H]

    if i != 0:
        # if it's not the first iteration of the loop, use the checkpoint to start from where the previous simulation ended
        model.initial_conditions = [
            F.InitialCondition(
                value=F.read_function_from_file(
                    filename=data_dir + "checkpoint.bp",
                    name="H",
                    timestamp=temps[i - 1] * 3600,
                ),
                species=mobile_H,
            ),
            F.InitialCondition(
                value=F.read_function_from_file(
                    filename=data_dir + "checkpoint.bp",
                    name="D",
                    timestamp=temps[i - 1] * 3600,
                ),
                species=mobile_D,
            ),
        ]

    model.temperature = 600

    checkpoint1 = F.VTXSpeciesExport(
        filename=data_dir + "checkpoint.bp", field=[mobile_D, mobile_H], checkpoint=True
    )

    model.exports = [checkpoint1]

    phase_s = temps[i] * 3600
    model.settings = F.Settings(atol=1e8, rtol=1e-12, final_time=phase_s)
    model.settings.stepsize = F.Stepsize(initial_value=100)

    model.settings.stepsize.milestones = [
        phase_s * 0.5,
        phase_s * 0.75,
        phase_s * 0.95,
        phase_s,
    ]

    model.initialise()

    print(f"Running simulation for time: {temps[i]} hours")
    print(f"Initial time: {float(model.t)} s")
    print(f"Final time: {float(model.settings.final_time)} s")

    model.run()

Produces:

Running simulation for time: 1.5 hours
Initial time: 0.0 s
Final time: 5400.0 s
Solving HydrogenTransportProblem: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 5.40k/5.40k [00:00<00:00, 96.8kit/s]
Running simulation for time: 3 hours
Initial time: 0.0 s
Final time: 10800.0 s
Solving HydrogenTransportProblem: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 10.8k/10.8k [00:00<00:00, 90.5kit/s]
Running simulation for time: 4.5 hours
Initial time: 0.0 s
Final time: 16200.0 s
Solving HydrogenTransportProblem: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 16.2k/16.2k [00:00<00:00, 107kit/s]
Running simulation for time: 6 hours
Initial time: 0.0 s
Final time: 21600.0 s
Solving HydrogenTransportProblem: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 21.6k/21.6k [00:00<00:00, 109kit/s]

The β€œissue” here is that by default, the starting time of a new model is t=0. You are just telling FESTIM β€œuse this timestamp from this other VTX file as an initial condition for t=0”. Therefore the simulated time increases.

This means there are two solutions to your issue:

  1. simply accept that simulation times are relative and handle that in post-process. Set the final simulation time to a fixed number
  2. after initialise() change the value of model.t. This isn’t ideal as it messes up the progress bar (minor) and could potentially mess with any time dependent boundary condition, source, etc (major).
model.t.value = temps[i - 1] * 3600 if i != 0 else 0

Okay thank you very much for the help and the explanation.
I will go back to my code and adapt what nedd to be and get what I’m looking for.

1 Like