From 624b162ea879a69ab84e10babe34e7799ca0cce1 Mon Sep 17 00:00:00 2001 From: Christian Date: Thu, 16 Jan 2025 12:29:58 +0100 Subject: [PATCH] added postfail phase --- src/atomiq/atomiq.py | 44 +++++++++++++++++++++++++++++++++++++------- src/atomiq/hooks.py | 2 +- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/atomiq/atomiq.py b/src/atomiq/atomiq.py index 2e51b46..1873bfc 100644 --- a/src/atomiq/atomiq.py +++ b/src/atomiq/atomiq.py @@ -66,6 +66,14 @@ class AtomiqExperiment(EnvExperiment): self.__terminate_asap__ = False + # latch to the core device logger to handle kernel sequence and collision errors. + def _sequence_error_filter(record): + if "sequence error" in record.msg or "Collisions reported" in record.msg: + self._sequence_error = True + return True + self._sequence_error = False + logging.getLogger("artiq.coredevice.comm_kernel").addFilter(_sequence_error_filter) + super().__init__(managers_or_parent) def _getmro(self): @@ -335,7 +343,6 @@ class AtomiqExperiment(EnvExperiment): :param chunks: (list) list of chunks. Each entry is again an iterable with CHUNKSIZE Scanpoint objects inside """ current_chunk = 0 - for points in chunk_list: try: self._prechunk_host(points) @@ -348,6 +355,10 @@ class AtomiqExperiment(EnvExperiment): self.log.warning(traceback.format_exc()) self._prerun_core(reset=True) finally: + if self._sequence_error: + self.log.warning(f"Sequence error reported during chunk {current_chunk}, reseting kernel...") + self._prerun_core(reset=True) + self._sequence_error = False current_chunk += 1 if self.scheduler.check_pause(): self.core.comm.close() @@ -379,13 +390,25 @@ class AtomiqExperiment(EnvExperiment): logger.info(f"running with component list {self._components}") def run(self): - self._run_init() try: - self._loop_chunks(self.chunker(self.msm, self.CHUNKSIZE)) - except TerminationRequested: - logger.info(f"I'll be back - RID {self.scheduler.rid}") - self._postrun() - self._postrun_host() + self._run_init() + try: + self._loop_chunks(self.chunker(self.msm, self.CHUNKSIZE)) + except TerminationRequested: + logger.info(f"I'll be back - RID {self.scheduler.rid}") + self._postrun() + self._postrun_host() + except Exception as e_run: + logger.exception("Run sub-phase failed with %s - RID %i ", e_run, self.scheduler.rid) + try: + self._postfail() + except Exception as e_postfail_kernel: + logger.exception("Post-fail kernel phase failed with %s - RID %i ", e_postfail_kernel, self.scheduler.rid) + try: + self._postfail_host() + except Exception as e_postfail_host: + logger.exception("Post-fail host phase failed with %s - RID %i ", e_postfail_host, self.scheduler.rid) + raise e_run @kernel def _runchunk(self, points): @@ -430,6 +453,13 @@ class AtomiqExperiment(EnvExperiment): def prerun_host(self): pass + @kernel + def postfail(self): + pass + + def postfail_host(self): + pass + @kernel def postrun(self): pass diff --git a/src/atomiq/hooks.py b/src/atomiq/hooks.py index 2e33764..e7b74cc 100644 --- a/src/atomiq/hooks.py +++ b/src/atomiq/hooks.py @@ -1,3 +1,3 @@ hooks = [("prerun", 0), ("postrun", 0), ("prechunk", 1), ("postchunk", 1), ("prestep", 1), ("poststep", 1), ("prerun_host", 0), ("postrun_host", 0), ("prechunk_host", 1), - ("postchunk_host", 1)] + ("postchunk_host", 1), ("postfail",0), ("postfail_host",0)] -- GitLab