Issue with checkpoints

JonathanGSDUFOUR · June 11, 2025, 1:31pm

Hello everybody !

I’m running a simulation and I want to get data at certain time, so I tried using checkpoints.
Getting to high duration, I felt like my simulation was starting over everytime instead of starting from the checkpoint.
Here is a simplified version of my code :

import basix
import festim as F
import ufl
import numpy as np
import numpy.typing as npt
import matplotlib.pyplot as plt
from mpi4py import MPI
import scipy as scp
import adios4dolfinx
import sys

import dolfinx
from dolfinx import fem
from dolfinx.io import VTXWriter

assert MPI.COMM_WORLD.size == 1

temps = [1.5, 3, 4.5, 6]
data_dir = "data_simu_test/"

for i in range(len(temps)):

    model = F.HydrogenTransportProblem()
    n = 1000
    vertice = []
    L = 10e-5

    vertice = np.concatenate(
        [
            np.linspace(0, 100e-9, num=100, endpoint=False),
            np.linspace(1e-7, 1e-5, num=198, endpoint=False),
            np.linspace(1e-5, L, num=200),
        ]
    )

    model.mesh = F.Mesh1D(vertice)  


    difW = 1.9e-7
    E_diff = 0.2
    tungst = F.Material(name="W", D_0=difW, E_D=E_diff)
    vol = F.VolumeSubdomain1D(id=1, borders=[0, L], material=tungst)
    surf_left = F.SurfaceSubdomain1D(id=1, x=0)
    surf_right = F.SurfaceSubdomain1D(id=2, x=L)

    W_atom_density = 19.25e3 / (
        183.84 * 1.66e-27
    )

    model.subdomains = [vol, surf_left, surf_right]

    mobile_H = F.Species("H")
    mobile_D = F.Species("D")

    trapH = F.Species("trapH", mobile=False)
    trapD = F.Species("trapD", mobile=False)
    def density_trap1(mod=np) : return lambda x: 0.002 * W_atom_density
    density_trap1_ufl = density_trap1(mod=ufl)
    trapA = F.ImplicitSpecies(n=density_trap1_ufl, others=[trapD, trapH], name="trapA")


    model.species = [
        mobile_D,
        mobile_H,
        trapD,
        trapH,
    ]

    if i != 0:
        #if it's not the first iteration of the loop, use the checkpoint to start from where the previous simulation ended
        model.initial_conditions = [
            F.InitialCondition(
                value=F.read_function_from_file(
                    filename=data_dir + "checkpoint.bp",
                    name="H",
                    timestamp=temps[i-1]*3600,
                ),
                species=mobile_H,
            ),
            F.InitialCondition(
                value=F.read_function_from_file(
                    filename=data_dir + "checkpoint.bp",
                    name="D",
                    timestamp=temps[i-1]*3600,
                ),
                species=mobile_D,
            ),
        ]


    model.reactions = [
        F.Reaction(  # trap A D
            k_0=1e13 / (6*W_atom_density),
            E_k=E_diff,
            p_0=1e13,
            E_p=1.5,
            reactant=[mobile_D, trapA],
            product=trapD,
            volume=vol,
        ),
        F.Reaction(  # trap A H
            k_0=1e13 / W_atom_density,
            E_k=E_diff,
            p_0=1e13,
            E_p=1.5,
            reactant=[mobile_H, trapA],
            product=trapH,
            volume=vol,
        ),
    ]

    model.boundary_conditions = [
        F.DirichletBC(subdomain=surf_left, value=2e16, species=mobile_D),
        F.DirichletBC(subdomain=surf_left, value=5e16, species=mobile_H),
        F.DirichletBC(subdomain=surf_right, value=0, species=mobile_D),
        F.DirichletBC(subdomain=surf_right, value=0, species=mobile_H),
    ]

    model.temperature = 600

    invent_mobD = F.TotalVolume(field=mobile_D, volume=vol)
    invent_trapAD = F.TotalVolume(field=trapD, volume=vol)
    checkpoint1 = F.VTXSpeciesExport(filename=data_dir+"checkpoint.bp", field=[
        mobile_D,
        mobile_H], checkpoint=True )
    

    model.exports = [
        invent_mobD,
        invent_trapAD,
        checkpoint1,
    ]

    phase_s=temps[i]*3600
    model.settings = F.Settings(atol=1e8, rtol=1e-12, max_iterations=500, final_time=phase_s)
    model.settings.stepsize = F.Stepsize(
        initial_value=0.5,
        growth_factor=1.05,
        target_nb_iterations=20,
        cutback_factor=0.95,
        max_stepsize=100,
    )

    model.settings.stepsize.milestones= [phase_s*0.5, phase_s*0.75, phase_s*0.95, phase_s]

    model.initialise()
    model.run()

    np.savetxt(data_dir+"trap_D_"+str(temps[i])+".txt", np.array(trapD.post_processing_solution.x.array))

When running the code, I get the following in the terminal :

If I understand well, we can see the 4 step getting increasingly longer (5.4k, 10.8k, 16.2k and 21.6k) where (if I understood well) they should all be of the same length (all four being 1.5h simulated).

Thank you in advance for your help, hope it’s not just me getting everything wrong with checkpoints.

remidm · June 11, 2025, 1:43pm

what version of FESTIM are you using?

JonathanGSDUFOUR · June 11, 2025, 1:52pm

I am using the fenicsx branch and I am up to date with all the recent changes.

remidm · June 11, 2025, 1:54pm

Here’s a MWE running on v2.0-alpha-4

import festim as F
import numpy as np

temps = [1.5, 3, 4.5, 6]
data_dir = "data_simu_test/"

for i in range(len(temps)):

    model = F.HydrogenTransportProblem()
    L = 10e-5
    model.mesh = F.Mesh1D(np.linspace(0, L, num=200))

    tungst = F.Material(name="W", D_0=1, E_D=0)
    vol = F.VolumeSubdomain1D(id=1, borders=[0, L], material=tungst)
    surf_left = F.SurfaceSubdomain1D(id=1, x=0)
    surf_right = F.SurfaceSubdomain1D(id=2, x=L)

    model.subdomains = [vol, surf_left, surf_right]

    mobile_H = F.Species("H")
    mobile_D = F.Species("D")

    model.species = [mobile_D, mobile_H]

    if i != 0:
        # if it's not the first iteration of the loop, use the checkpoint to start from where the previous simulation ended
        model.initial_conditions = [
            F.InitialCondition(
                value=F.read_function_from_file(
                    filename=data_dir + "checkpoint.bp",
                    name="H",
                    timestamp=temps[i - 1] * 3600,
                ),
                species=mobile_H,
            ),
            F.InitialCondition(
                value=F.read_function_from_file(
                    filename=data_dir + "checkpoint.bp",
                    name="D",
                    timestamp=temps[i - 1] * 3600,
                ),
                species=mobile_D,
            ),
        ]

    model.temperature = 600

    checkpoint1 = F.VTXSpeciesExport(
        filename=data_dir + "checkpoint.bp", field=[mobile_D, mobile_H], checkpoint=True
    )

    model.exports = [checkpoint1]

    phase_s = temps[i] * 3600
    model.settings = F.Settings(atol=1e8, rtol=1e-12, final_time=phase_s)
    model.settings.stepsize = F.Stepsize(initial_value=100)

    model.settings.stepsize.milestones = [
        phase_s * 0.5,
        phase_s * 0.75,
        phase_s * 0.95,
        phase_s,
    ]

    model.initialise()

    print(f"Running simulation for time: {temps[i]} hours")
    print(f"Initial time: {float(model.t)} s")
    print(f"Final time: {float(model.settings.final_time)} s")

    model.run()

Produces:

Running simulation for time: 1.5 hours
Initial time: 0.0 s
Final time: 5400.0 s
Solving HydrogenTransportProblem: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5.40k/5.40k [00:00<00:00, 96.8kit/s]
Running simulation for time: 3 hours
Initial time: 0.0 s
Final time: 10800.0 s
Solving HydrogenTransportProblem: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10.8k/10.8k [00:00<00:00, 90.5kit/s]
Running simulation for time: 4.5 hours
Initial time: 0.0 s
Final time: 16200.0 s
Solving HydrogenTransportProblem: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16.2k/16.2k [00:00<00:00, 107kit/s]
Running simulation for time: 6 hours
Initial time: 0.0 s
Final time: 21600.0 s
Solving HydrogenTransportProblem: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21.6k/21.6k [00:00<00:00, 109kit/s]

The “issue” here is that by default, the starting time of a new model is t=0. You are just telling FESTIM “use this timestamp from this other VTX file as an initial condition for t=0”. Therefore the simulated time increases.

remidm · June 11, 2025, 1:58pm

This means there are two solutions to your issue:

simply accept that simulation times are relative and handle that in post-process. Set the final simulation time to a fixed number
after initialise() change the value of model.t. This isn’t ideal as it messes up the progress bar (minor) and could potentially mess with any time dependent boundary condition, source, etc (major).

model.t.value = temps[i - 1] * 3600 if i != 0 else 0

JonathanGSDUFOUR · June 11, 2025, 2:09pm

Okay thank you very much for the help and the explanation.
I will go back to my code and adapt what nedd to be and get what I’m looking for.

JonathanGSDUFOUR · June 16, 2025, 1:57pm

Hello again, I solved my previous issue with the time of simulation, but I’ve realized that when I try to add the trapH or trapD species in the field argument of the checkpoint, the first simulation using a checkpoint encounters an error and stops immediatley with teh following message :
File "/Home/JD270099/.conda/envs/hisp-env/lib/python3.13/site-packages/adios4dolfinx/checkpointing.py", line 358, in read_function
dof_owner = index_owner(comm, input_dofmap.array, num_dofs_global)
File "/Home/JD270099/.conda/envs/hisp-env/lib/python3.13/site-packages/adios4dolfinx/utils.py", line 127, in index_owner
assert (indices < N).all()
~~~~~~~~~~~~~^^
AssertionError

Is there a specific way to use the checkpoints with non-mobile species ?
Thanks in advance for your answers !

remidm · June 16, 2025, 4:15pm

@JonathanGSDUFOUR can you produce a MWE reproducing the error you shared please?

JonathanGSDUFOUR · June 17, 2025, 8:12am

Here is the code which gives the error. If I remove tradH and trapD from the checkpoint and the InitialCondition everything is working correctly.

import basix
import festim as F
import ufl
import numpy as np
import numpy.typing as npt
import matplotlib.pyplot as plt
from mpi4py import MPI
import scipy as scp
import adios4dolfinx
import sys

import dolfinx
from dolfinx import fem
from dolfinx.io import VTXWriter

assert MPI.COMM_WORLD.size == 1

temps = [2, 4, 7 ,10, 15]
data_dir = "data_simu_test/"
duree = 0
duree_prec = 0

for i in range(len(temps)):

    model = F.HydrogenTransportProblem()
    duree_prec = duree
    if i == 0:
        duree= temps[i]*3600
    else:
        duree=(temps[i]-temps[i-1])*3600
    n = 1000
    vertice = []
    L = 10e-5
    model.mesh = F.Mesh1D(np.linspace(0, L, num=200))
    print("duree is"+str(duree))
    print("duree_prec is"+str(duree_prec))


    tungst = F.Material(name="W", D_0=1, E_D=0)
    vol = F.VolumeSubdomain1D(id=1, borders=[0, L], material=tungst)
    surf_left = F.SurfaceSubdomain1D(id=1, x=0)
    surf_right = F.SurfaceSubdomain1D(id=2, x=L)

    W_atom_density = 19.25e3 / (
        183.84 * 1.66e-27
    )

    model.subdomains = [vol, surf_left, surf_right]

    mobile_H = F.Species("H")
    mobile_D = F.Species("D")

    trapH = F.Species("trapH", mobile=False)
    trapD = F.Species("trapD", mobile=False)
    def density_trap1(mod=np) : return lambda x: 0.002 * W_atom_density
    density_trap1_ufl = density_trap1(mod=ufl)
    trapA = F.ImplicitSpecies(n=density_trap1_ufl, others=[trapD, trapH], name="trapA")


    model.species = [
        mobile_D,
        mobile_H,
        trapD,
        trapH,
    ]

    if i != 0:
        #if it's not the first iteration of the loop, use the checkpoint to start from where the previous simulation ended
        model.initial_conditions = [
            F.InitialCondition(
                value=F.read_function_from_file(
                    filename=data_dir + "checkpoint.bp",
                    name="H",
                    timestamp=duree_prec,
                ),
                species=mobile_H,
            ),
            F.InitialCondition(
                value=F.read_function_from_file(
                    filename=data_dir + "checkpoint.bp",
                    name="D",
                    timestamp=duree_prec,
                ),
                species=trapH,
            ),
            F.InitialCondition(
                value=F.read_function_from_file(
                    filename=data_dir + "checkpoint.bp",
                    name="trapD",
                    timestamp=duree_prec,
                ),
                species=trapD,
            ),
            F.InitialCondition(
                value=F.read_function_from_file(
                    filename=data_dir + "checkpoint.bp",
                    name="trapH",
                    timestamp=duree_prec,
                ),
                species=trapH,
            ),
        ]


    model.reactions = [
        F.Reaction( 
            k_0=1e13 / (6*W_atom_density),
            E_k=1,
            p_0=1e13,
            E_p=1.5,
            reactant=[mobile_D, trapA],
            product=trapD,
            volume=vol,
        ),
        F.Reaction( 
            k_0=1e13 / W_atom_density,
            E_k=1,
            p_0=1e13,
            E_p=1.5,
            reactant=[mobile_H, trapA],
            product=trapH,
            volume=vol,
        ),
    ]

    model.boundary_conditions = [
        F.DirichletBC(subdomain=surf_left, value=2e16, species=mobile_D),
        F.DirichletBC(subdomain=surf_left, value=5e16, species=mobile_H),
        F.DirichletBC(subdomain=surf_right, value=0, species=mobile_D),
        F.DirichletBC(subdomain=surf_right, value=0, species=mobile_H),
    ]

    model.temperature = 600

    invent_mobD = F.TotalVolume(field=mobile_D, volume=vol)
    invent_trapAD = F.TotalVolume(field=trapD, volume=vol)
    checkpoint1 = F.VTXSpeciesExport(filename=data_dir+"checkpoint.bp", field=[
        mobile_D,
        mobile_H,
        trapD, 
        trapH
        ], checkpoint=True )
       

    model.exports = [
        checkpoint1,
    ]

    
    
    model.settings = F.Settings(atol=1e8, rtol=1e-12, max_iterations=500, final_time=duree)
    model.settings.stepsize = F.Stepsize(
        initial_value=0.5,
        growth_factor=1.05,
        target_nb_iterations=20,
        cutback_factor=0.95,
        max_stepsize=100,
    )
    if i!= 0:
       print(temps[i-1]*3600)
       print(duree_prec)
       print(duree)
     

    model.settings.stepsize.milestones= [duree*0.5, duree*0.75, duree*0.95, duree]

    model.initialise()
    model.run()

    np.savetxt(data_dir+"trap_D_"+str(temps[i])+".txt", np.array(trapD.post_processing_solution.x.array))
    np.savetxt(data_dir+"time_trap"+str(temps[i])+".txt", np.array(invent_trapAD.t))
    np.savetxt(data_dir+"invent_D"+str(temps[i])+".txt", invent_trapAD.data)

remidm · June 17, 2025, 1:10pm

@JonathanGSDUFOUR this is not really a MWE and can be quite hard to parse:

most of the imports are not needed
the reactions are not needed
the BCs are not needed
the last np.savetxt calls are not needed
the TotalVolume isn’t needed

Here’s a MWE that reproduces your issue:

import festim as F
import numpy as np

model = F.HydrogenTransportProblem()
model.mesh = F.Mesh1D(np.linspace(0, 1, num=200))

tungst = F.Material(name="W", D_0=1, E_D=0)
vol = F.VolumeSubdomain1D(id=1, borders=[0, 1], material=tungst)
surf_left = F.SurfaceSubdomain1D(id=1, x=0)
surf_right = F.SurfaceSubdomain1D(id=2, x=1)

model.subdomains = [vol, surf_left, surf_right]

mobile_H = F.Species("H")
trapH = F.Species("trapH", mobile=False)

model.species = [mobile_H, trapH]

model.settings = F.Settings(atol=1e8, rtol=1e-12, final_time=100)
model.settings.stepsize = F.Stepsize(10)
model.temperature = 600


model.exports = [
    F.VTXSpeciesExport(
        filename="checkpoint.bp",
        field=[mobile_H, trapH],
        checkpoint=True,
    )
]

model.initialise()
model.run()

# try and read the file

c_H_in = F.read_function_from_file(
    filename="checkpoint.bp",
    name="H",
    timestamp=100,
)

c_H_trap_in = F.read_function_from_file(
    filename="checkpoint.bp",
    name="trapH",
    timestamp=100,
)

45 lines vs 172

The issue is that in FESTIM, by default (in fact it’s hard coded), immobile species are defined on a DG1 functionspace and not a CG1 functionspace like mobile ones.

The read_function_from_file has a family argument that defaults to "P" (ie. CG).

To make your code work, simply change it to "DG":

c_H_trap_in = F.read_function_from_file(
    filename="checkpoint.bp",
    name="trapH",
    timestamp=100,
    family="DG",
)

Topic		Replies	Views
Problems when reproducing the results of the article User Support	20	159	May 31, 2024
Simulation doesn't converge under local traps condition User Support	3	35	March 10, 2025
Wrong solution with 2D simulation User Support	15	79	January 29, 2025
File Formatting Bug in the task4 Workshop File User Support	3	62	December 22, 2023
Issue with 0 iteration solving User Support festim2	7	45	March 24, 2025

Issue with checkpoints

Related topics