Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix
DetachedInstanceError
when finding zombies in Dag Parsing proce…
…ss (apache#28198) ``` [2022-12-06T14:20:21.622+0000] {base_job.py:229} DEBUG - [heartbeat] [2022-12-06T14:20:21.623+0000] {scheduler_job.py:1495} DEBUG - Finding 'running' jobs without a recent heartbeat [2022-12-06T14:20:21.637+0000] {scheduler_job.py:1515} WARNING - Failing (2) jobs without heartbeat after 2022-12-06 14:15:21.623199+00:00 [2022-12-06T14:20:21.641+0000] {scheduler_job.py:1526} ERROR - Detected zombie job: {'full_filepath': '/opt/airflow/dags/xxx_dag.py', 'processor_subdir': '/opt/airflow/dags', 'msg': "{'DAG Id': 'xxx', 'Task Id': 'xxx', 'Run Id': 'scheduled__2022-12-05T00:15:00+00:00', 'Hostname': 'airflow-worker-0.airflow-worker.airflow2.svc.cluster.local', 'External Executor Id': '9520cb9f-3245-497a-8e17-e9dec29d4549'}", 'simple_task_instance': <airflow.models.taskinstance.SimpleTaskInstance object at 0x7f1cd4de4130>, 'is_failure_callback': True} [2022-12-06T14:20:21.645+0000] {scheduler_job.py:763} ERROR - Exception when executing SchedulerJob._run_scheduler_loop Traceback (most recent call last): File "/home/airflow/.local/lib/python3.10/site-packages/airflow/jobs/scheduler_job.py", line 746, in _execute self._run_scheduler_loop() File "/home/airflow/.local/lib/python3.10/site-packages/airflow/jobs/scheduler_job.py", line 878, in _run_scheduler_loop next_event = timers.run(blocking=False) File "/usr/local/lib/python3.10/sched.py", line 151, in run action(*argument, **kwargs) File "/home/airflow/.local/lib/python3.10/site-packages/airflow/utils/event_scheduler.py", line 37, in repeat action(*args, **kwargs) File "/home/airflow/.local/lib/python3.10/site-packages/airflow/utils/session.py", line 75, in wrapper return func(*args, session=session, **kwargs) File "/home/airflow/.local/lib/python3.10/site-packages/airflow/jobs/scheduler_job.py", line 1522, in _find_zombies processor_subdir=ti.dag_model.processor_subdir, File "/home/airflow/.local/lib/python3.10/site-packages/sqlalchemy/orm/attributes.py", line 481, in __get__ return self.impl.get(state, dict_) File "/home/airflow/.local/lib/python3.10/site-packages/sqlalchemy/orm/attributes.py", line 926, in get value = self._fire_loader_callables(state, key, passive) File "/home/airflow/.local/lib/python3.10/site-packages/sqlalchemy/orm/attributes.py", line 962, in _fire_loader_callables return self.callable_(state, passive) File "/home/airflow/.local/lib/python3.10/site-packages/sqlalchemy/orm/strategies.py", line 861, in _load_for_state raise orm_exc.DetachedInstanceError( sqlalchemy.orm.exc.DetachedInstanceError: Parent instance <TaskInstance at 0x7f1ccc3e8520> is not bound to a Session; lazy load operation of attribute 'dag_model' cannot proceed (Background on this error at: https://sqlalche.me/e/14/bhk3) [2022-12-06T14:20:21.647+0000] {celery_executor.py:443} DEBUG - Inquiring about 5 celery task(s) [2022-12-06T14:20:21.669+0000] {celery_executor.py:602} DEBUG - Fetched 5 state(s) for 5 task(s) [2022-12-06T14:20:21.669+0000] {celery_executor.py:446} DEBUG - Inquiries completed. [2022-12-06T14:20:21.669+0000] {scheduler_job.py:775} INFO - Exited execute loop [2022-12-06T14:20:21.674+0000] {cli_action_loggers.py:83} DEBUG - Calling callbacks: [] Traceback (most recent call last): File "/home/airflow/.local/bin/airflow", line 8, in <module> sys.exit(main()) File "/home/airflow/.local/lib/python3.10/site-packages/airflow/__main__.py", line 39, in main args.func(args) File "/home/airflow/.local/lib/python3.10/site-packages/airflow/cli/cli_parser.py", line 52, in command return func(*args, **kwargs) File "/home/airflow/.local/lib/python3.10/site-packages/airflow/utils/cli.py", line 103, in wrapper return f(*args, **kwargs) File "/home/airflow/.local/lib/python3.10/site-packages/airflow/cli/commands/scheduler_command.py", line 85, in scheduler _run_scheduler_job(args=args) File "/home/airflow/.local/lib/python3.10/site-packages/airflow/cli/commands/scheduler_command.py", line 50, in _run_scheduler_job job.run() File "/home/airflow/.local/lib/python3.10/site-packages/airflow/jobs/base_job.py", line 247, in run self._execute() File "/home/airflow/.local/lib/python3.10/site-packages/airflow/jobs/scheduler_job.py", line 746, in _execute self._run_scheduler_loop() File "/home/airflow/.local/lib/python3.10/site-packages/airflow/jobs/scheduler_job.py", line 878, in _run_scheduler_loop next_event = timers.run(blocking=False) File "/usr/local/lib/python3.10/sched.py", line 151, in run action(*argument, **kwargs) File "/home/airflow/.local/lib/python3.10/site-packages/airflow/utils/event_scheduler.py", line 37, in repeat action(*args, **kwargs) File "/home/airflow/.local/lib/python3.10/site-packages/airflow/utils/session.py", line 75, in wrapper return func(*args, session=session, **kwargs) File "/home/airflow/.local/lib/python3.10/site-packages/airflow/jobs/scheduler_job.py", line 1522, in _find_zombies processor_subdir=ti.dag_model.processor_subdir, File "/home/airflow/.local/lib/python3.10/site-packages/sqlalchemy/orm/attributes.py", line 481, in __get__ return self.impl.get(state, dict_) File "/home/airflow/.local/lib/python3.10/site-packages/sqlalchemy/orm/attributes.py", line 926, in get value = self._fire_loader_callables(state, key, passive) File "/home/airflow/.local/lib/python3.10/site-packages/sqlalchemy/orm/attributes.py", line 962, in _fire_loader_callables return self.callable_(state, passive) File "/home/airflow/.local/lib/python3.10/site-packages/sqlalchemy/orm/strategies.py", line 861, in _load_for_state raise orm_exc.DetachedInstanceError( sqlalchemy.orm.exc.DetachedInstanceError: Parent instance <TaskInstance at 0x7f1ccc3e8520> is not bound to a Session; lazy load operation of attribute 'dag_model' cannot proceed (Background on this error at: https://sqlalche.me/e/14/bhk3) ``` When in standalone dag processor mode, will use `DatabaseCallbackSink` `_find_zombies` func call `self.executor.send_callback(request)` func. But not propagation orm `session` , provide_session in `send` func again. ``` class DatabaseCallbackSink(BaseCallbackSink): """Sends callbacks to database.""" @provide_session def send(self, callback: CallbackRequest, session: Session = NEW_SESSION) -> None: """Sends callback for execution.""" db_callback = DbCallbackRequest(callback=callback, priority_weight=10) session.add(db_callback) ``` Signed-off-by: BobDu <i@bobdu.cc> (cherry picked from commit 4b340b7)
- Loading branch information