)]}'
{"/PATCHSET_LEVEL":[{"author":{"_account_id":38752,"name":"Pavlo Kostianov","email":"pkostian@redhat.com","username":"pkostian"},"change_message_id":"d90b25a4bff9e8fdbfe50b60c8d326011dc40713","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":3,"id":"3a9bb6ee_5e9dfec0","updated":"2026-03-19 15:45:43.000000000","message":"Let\u0027s discuss the behavior if a task times out. Currently the timeout will unblock result.get(), but pool.join() will still indefinitely hang waiting for all workers to finish. Should we maybe terminate the pool when any task times out or add a separate grace period for cleanup (e.g., try join for 60s, then terminate)? \n\nOther nits \u0026 thoughts:\n- Commit message contains \"or graceful shutdown\" which doesn\u0027t seem fit there.\n- Might be a good idea to add timeout behavior test(s).\n- Consider adding timeout behavior explanation to release notes.","commit_id":"13ea040712dbbea7f7e9889700addffad0e7d6f0"},{"author":{"_account_id":38752,"name":"Pavlo Kostianov","email":"pkostian@redhat.com","username":"pkostian"},"change_message_id":"03170309090f9649afcb8423867257fb12f9d301","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":3,"id":"c495cdd3_db73997d","in_reply_to":"3a9bb6ee_5e9dfec0","updated":"2026-03-24 11:21:38.000000000","message":"Done, _close_pool_with_join_timeout() addressed the main concern","commit_id":"13ea040712dbbea7f7e9889700addffad0e7d6f0"},{"author":{"_account_id":38752,"name":"Pavlo Kostianov","email":"pkostian@redhat.com","username":"pkostian"},"change_message_id":"03170309090f9649afcb8423867257fb12f9d301","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":7,"id":"b31d6b6c_1da0fdba","updated":"2026-03-24 11:21:38.000000000","message":"An idea: A follow-up change adding an integration test to check the full workflow might be nice to have.","commit_id":"0a94315c65648055f0144849e0398370e8099c89"},{"author":{"_account_id":9816,"name":"Takashi Kajinami","email":"kajinamit@oss.nttdata.com","username":"kajinamit"},"change_message_id":"fea3143ddd62484f8545298b794f9389bd039af5","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":8,"id":"8257ab6d_09d01ae1","updated":"2026-03-25 17:15:46.000000000","message":"This makes me think that if we really want to maintain the existing periodic task implementation really for concurrent version. The problem with the existing design is that it is heavily dependent on serial execution but aims to provide psuedo concurrency using due-time calculation.\n\nHowever for concurrent mode I think preparing the persistent thead pool and handle the full lifecycle of periodic executions may be much simpler and helps us avoid such interference between tasks, which you are trying to resolve by this.","commit_id":"54aec4d3fc0a92e2dec4e62e76a0965feabc7a76"},{"author":{"_account_id":31245,"name":"Daniel Bengtsson","email":"dbengt@redhat.com","username":"damani42"},"change_message_id":"97a1ac2a304f1f6702294264eb4a02ce6cf7522a","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":8,"id":"bbe6c704_16842317","updated":"2026-03-25 14:50:13.000000000","message":"recheck","commit_id":"54aec4d3fc0a92e2dec4e62e76a0965feabc7a76"},{"author":{"_account_id":9816,"name":"Takashi Kajinami","email":"kajinamit@oss.nttdata.com","username":"kajinamit"},"change_message_id":"55e7bcb8b0b344f15ea4a4151a1c2ff8545edab7","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":8,"id":"e485219e_07062088","in_reply_to":"8257ab6d_09d01ae1","updated":"2026-03-25 18:01:57.000000000","message":"So let me clarify my point a bit more.\n\nThe existing implementation is expected to be run in a single thread but provide a pseudo concurrency to handle multiple tasks in multiple timings. For this purpose current run_periodic_tasks attempts to run all jobs which can run at a timing.\n\nHowever if we prepare multiple threads for periodic workers, general expectation is that each thead may pick up due tasks independently. However with current implementation, which inherits the concept of run_periodic_tasks, the other threads are blocked and idle if a single task takes long time.\n\nI think what we likely need really is an interface which returns a single next task and time until next task.\nThis allows us to run these tasks concurrently with external persistent thread pool.\n\n```\npool \u003d _multiprocessing.get_spawn_pool(processes\u003dprocesses)\nwhile True:\n    task_name, task, sleep_time \u003d periodic_tasks.get_next_task()\n    time.sleep(sleep_time)\n    result \u003d pool.apply_async(...)\n```","commit_id":"54aec4d3fc0a92e2dec4e62e76a0965feabc7a76"}],"oslo_service/_options.py":[{"author":{"_account_id":9816,"name":"Takashi Kajinami","email":"kajinamit@oss.nttdata.com","username":"kajinamit"},"change_message_id":"fea3143ddd62484f8545298b794f9389bd039af5","unresolved":true,"context_lines":[{"line_number":54,"context_line":"               default\u003d300,"},{"line_number":55,"context_line":"               min\u003d0,"},{"line_number":56,"context_line":"               help\u003d\u0027When using run_periodic_tasks_in_parallel(), maximum \u0027"},{"line_number":57,"context_line":"                    \u0027number of seconds to wait for each worker result via \u0027"},{"line_number":58,"context_line":"                    \u0027result.get(timeout\u003d...). This limits how long one stuck \u0027"},{"line_number":59,"context_line":"                    \u0027task can block retrieval of results; it does not bound \u0027"},{"line_number":60,"context_line":"                    \u0027pool shutdown (see periodic_task_shutdown_timeout). If a \u0027"},{"line_number":61,"context_line":"                    \u0027task exceeds this time it is logged and the next task is \u0027"}],"source_content_type":"text/x-python","patch_set":8,"id":"80544576_1a709021","line":58,"range":{"start_line":57,"start_character":70,"end_line":58,"end_character":44},"updated":"2026-03-25 17:15:46.000000000","message":"IMO describing internal logic call is too much for users just consuming it. I\u0027d remove this and futher mention to result.get. (In addtion it\u0027s not clear what \"result\" is).\n\nAlso I wonder if we should set timeout for whole task execution rather thant this result.get. This allows us to predict effect of this timeout tuning more clear, because we don\u0027t know how many tasks can be picked up at once.","commit_id":"54aec4d3fc0a92e2dec4e62e76a0965feabc7a76"},{"author":{"_account_id":9816,"name":"Takashi Kajinami","email":"kajinamit@oss.nttdata.com","username":"kajinamit"},"change_message_id":"fea3143ddd62484f8545298b794f9389bd039af5","unresolved":true,"context_lines":[{"line_number":59,"context_line":"                    \u0027task can block retrieval of results; it does not bound \u0027"},{"line_number":60,"context_line":"                    \u0027pool shutdown (see periodic_task_shutdown_timeout). If a \u0027"},{"line_number":61,"context_line":"                    \u0027task exceeds this time it is logged and the next task is \u0027"},{"line_number":62,"context_line":"                    \u0027processed. Zero disables this wait (result.get() has no \u0027"},{"line_number":63,"context_line":"                    \u0027timeout).\u0027),"},{"line_number":64,"context_line":"    cfg.IntOpt(\u0027periodic_task_shutdown_timeout\u0027,"},{"line_number":65,"context_line":"               default\u003d60,"}],"source_content_type":"text/x-python","patch_set":8,"id":"52ad6ac8_298637a8","line":62,"range":{"start_line":62,"start_character":21,"end_line":62,"end_character":30},"updated":"2026-03-25 17:15:46.000000000","message":"This is a bit confusing because tasks are processed concurrently. Maybe","commit_id":"54aec4d3fc0a92e2dec4e62e76a0965feabc7a76"}],"oslo_service/periodic_task.py":[{"author":{"_account_id":38752,"name":"Pavlo Kostianov","email":"pkostian@redhat.com","username":"pkostian"},"change_message_id":"d90b25a4bff9e8fdbfe50b60c8d326011dc40713","unresolved":true,"context_lines":[{"line_number":314,"context_line":"            timeout \u003d self.conf.periodic_task_timeout"},{"line_number":315,"context_line":"            for full_task_name, result in async_results:"},{"line_number":316,"context_line":"                try:"},{"line_number":317,"context_line":"                    if timeout and timeout \u003e 0:"},{"line_number":318,"context_line":"                        result.get(timeout\u003dtimeout)"},{"line_number":319,"context_line":"                    else:"},{"line_number":320,"context_line":"                        result.get()"}],"source_content_type":"text/x-python","patch_set":3,"id":"3ad5af6f_32c87e04","line":317,"updated":"2026-03-19 15:45:43.000000000","message":"Just \u0027timeout \u003e 0\u0027 would be sufficient as \u0027if timeout and timeout \u003e 0:\u0027 means \u0027if timeout \u003e 0:\u0027 (since 0 is falsy)","commit_id":"13ea040712dbbea7f7e9889700addffad0e7d6f0"},{"author":{"_account_id":31245,"name":"Daniel Bengtsson","email":"dbengt@redhat.com","username":"damani42"},"change_message_id":"475a89113833f1e7c88ea64d518df8f2ab8ebb9b","unresolved":false,"context_lines":[{"line_number":314,"context_line":"            timeout \u003d self.conf.periodic_task_timeout"},{"line_number":315,"context_line":"            for full_task_name, result in async_results:"},{"line_number":316,"context_line":"                try:"},{"line_number":317,"context_line":"                    if timeout and timeout \u003e 0:"},{"line_number":318,"context_line":"                        result.get(timeout\u003dtimeout)"},{"line_number":319,"context_line":"                    else:"},{"line_number":320,"context_line":"                        result.get()"}],"source_content_type":"text/x-python","patch_set":3,"id":"50da200b_225247ef","line":317,"in_reply_to":"207fa092_92d5b700","updated":"2026-03-20 10:44:19.000000000","message":"Done","commit_id":"13ea040712dbbea7f7e9889700addffad0e7d6f0"},{"author":{"_account_id":31245,"name":"Daniel Bengtsson","email":"dbengt@redhat.com","username":"damani42"},"change_message_id":"4568706b87169317ffc284dd5c9faea18693a930","unresolved":true,"context_lines":[{"line_number":314,"context_line":"            timeout \u003d self.conf.periodic_task_timeout"},{"line_number":315,"context_line":"            for full_task_name, result in async_results:"},{"line_number":316,"context_line":"                try:"},{"line_number":317,"context_line":"                    if timeout and timeout \u003e 0:"},{"line_number":318,"context_line":"                        result.get(timeout\u003dtimeout)"},{"line_number":319,"context_line":"                    else:"},{"line_number":320,"context_line":"                        result.get()"}],"source_content_type":"text/x-python","patch_set":3,"id":"207fa092_92d5b700","line":317,"in_reply_to":"3ad5af6f_32c87e04","updated":"2026-03-20 10:37:36.000000000","message":"Good point, since this is an IntOpt with a default, `timeout \u003e 0` is sufficient here. I will simplify it.","commit_id":"13ea040712dbbea7f7e9889700addffad0e7d6f0"},{"author":{"_account_id":38752,"name":"Pavlo Kostianov","email":"pkostian@redhat.com","username":"pkostian"},"change_message_id":"03170309090f9649afcb8423867257fb12f9d301","unresolved":true,"context_lines":[{"line_number":67,"context_line":"            {\"timeout\": join_timeout_sec},"},{"line_number":68,"context_line":"        )"},{"line_number":69,"context_line":"        pool.terminate()"},{"line_number":70,"context_line":"        deadline_cleanup \u003d time.monotonic() + join_timeout_sec"},{"line_number":71,"context_line":"        while join_thread.is_alive() and time.monotonic() \u003c deadline_cleanup:"},{"line_number":72,"context_line":"            time.sleep(0.05)"},{"line_number":73,"context_line":""},{"line_number":74,"context_line":""},{"line_number":75,"context_line":"def list_opts():"}],"source_content_type":"text/x-python","patch_set":7,"id":"74486e0f_4cab00bd","line":72,"range":{"start_line":70,"start_character":0,"end_line":72,"end_character":28},"updated":"2026-03-24 11:21:38.000000000","message":"After `pool.terminate()`, the code waits `join_timeout_sec` (60s by default) for the join thread to finish. Since `terminate()` typically kills workers within milliseconds-to-seconds, a shorter timeout would be more appropriate - e.g., 5-10s. This optimizes for the common case where workers die quickly.","commit_id":"0a94315c65648055f0144849e0398370e8099c89"},{"author":{"_account_id":38752,"name":"Pavlo Kostianov","email":"pkostian@redhat.com","username":"pkostian"},"change_message_id":"03170309090f9649afcb8423867257fb12f9d301","unresolved":true,"context_lines":[{"line_number":70,"context_line":"        deadline_cleanup \u003d time.monotonic() + join_timeout_sec"},{"line_number":71,"context_line":"        while join_thread.is_alive() and time.monotonic() \u003c deadline_cleanup:"},{"line_number":72,"context_line":"            time.sleep(0.05)"},{"line_number":73,"context_line":""},{"line_number":74,"context_line":""},{"line_number":75,"context_line":"def list_opts():"},{"line_number":76,"context_line":"    \"\"\"Entry point for oslo-config-generator.\"\"\""}],"source_content_type":"text/x-python","patch_set":7,"id":"9c3ecf7e_7095c957","line":73,"range":{"start_line":73,"start_character":0,"end_line":73,"end_character":1},"updated":"2026-03-24 11:21:38.000000000","message":"In rare cases where workers survive terminate + cleanup timeout (e.g. kernel deadlock), the join thread remains alive and the function silently returns. Consider logging a warning (or even an exception as this rare case most likely means the system is in a bad state): \"Pool join thread still running after terminate; worker processes may not have exited cleanly.\" This helps diagnose the exceptional case.","commit_id":"0a94315c65648055f0144849e0398370e8099c89"},{"author":{"_account_id":31245,"name":"Daniel Bengtsson","email":"dbengt@redhat.com","username":"damani42"},"change_message_id":"4140a35386f13eab3b3364b66a17a68885b5c428","unresolved":false,"context_lines":[{"line_number":70,"context_line":"        deadline_cleanup \u003d time.monotonic() + join_timeout_sec"},{"line_number":71,"context_line":"        while join_thread.is_alive() and time.monotonic() \u003c deadline_cleanup:"},{"line_number":72,"context_line":"            time.sleep(0.05)"},{"line_number":73,"context_line":""},{"line_number":74,"context_line":""},{"line_number":75,"context_line":"def list_opts():"},{"line_number":76,"context_line":"    \"\"\"Entry point for oslo-config-generator.\"\"\""}],"source_content_type":"text/x-python","patch_set":7,"id":"36515123_1e475d45","line":73,"range":{"start_line":73,"start_character":0,"end_line":73,"end_character":1},"in_reply_to":"9c3ecf7e_7095c957","updated":"2026-03-24 12:26:17.000000000","message":"Done","commit_id":"0a94315c65648055f0144849e0398370e8099c89"},{"author":{"_account_id":31245,"name":"Daniel Bengtsson","email":"dbengt@redhat.com","username":"damani42"},"change_message_id":"6532da6282638e9a236b188ccea758616f6954cb","unresolved":false,"context_lines":[{"line_number":70,"context_line":"        deadline_cleanup \u003d time.monotonic() + join_timeout_sec"},{"line_number":71,"context_line":"        while join_thread.is_alive() and time.monotonic() \u003c deadline_cleanup:"},{"line_number":72,"context_line":"            time.sleep(0.05)"},{"line_number":73,"context_line":""},{"line_number":74,"context_line":""},{"line_number":75,"context_line":"def list_opts():"},{"line_number":76,"context_line":"    \"\"\"Entry point for oslo-config-generator.\"\"\""}],"source_content_type":"text/x-python","patch_set":7,"id":"d6ba335d_087d2568","line":73,"range":{"start_line":73,"start_character":0,"end_line":73,"end_character":1},"in_reply_to":"9c3ecf7e_7095c957","updated":"2026-03-24 12:24:48.000000000","message":"Done","commit_id":"0a94315c65648055f0144849e0398370e8099c89"},{"author":{"_account_id":31245,"name":"Daniel Bengtsson","email":"dbengt@redhat.com","username":"damani42"},"change_message_id":"7ba51bd3c56c0215dd6eaa9f203bb6864bfbeab6","unresolved":false,"context_lines":[{"line_number":70,"context_line":"        deadline_cleanup \u003d time.monotonic() + join_timeout_sec"},{"line_number":71,"context_line":"        while join_thread.is_alive() and time.monotonic() \u003c deadline_cleanup:"},{"line_number":72,"context_line":"            time.sleep(0.05)"},{"line_number":73,"context_line":""},{"line_number":74,"context_line":""},{"line_number":75,"context_line":"def list_opts():"},{"line_number":76,"context_line":"    \"\"\"Entry point for oslo-config-generator.\"\"\""}],"source_content_type":"text/x-python","patch_set":7,"id":"5c4db0ea_3b44cd48","line":73,"range":{"start_line":73,"start_character":0,"end_line":73,"end_character":1},"in_reply_to":"d6ba335d_087d2568","updated":"2026-03-25 14:48:58.000000000","message":"I added a final warning when the join thread is still alive after terminate, so the abnormal case is no longer silent.","commit_id":"0a94315c65648055f0144849e0398370e8099c89"},{"author":{"_account_id":9816,"name":"Takashi Kajinami","email":"kajinamit@oss.nttdata.com","username":"kajinamit"},"change_message_id":"fea3143ddd62484f8545298b794f9389bd039af5","unresolved":true,"context_lines":[{"line_number":372,"context_line":"                    else:"},{"line_number":373,"context_line":"                        result.get()"},{"line_number":374,"context_line":"                except MPTimeoutError:"},{"line_number":375,"context_line":"                    LOG.error("},{"line_number":376,"context_line":"                        \"Periodic task %(full_task_name)s timed out after \""},{"line_number":377,"context_line":"                        \"%(timeout)s seconds\","},{"line_number":378,"context_line":"                        {\"full_task_name\": full_task_name, \"timeout\": timeout},"},{"line_number":379,"context_line":"                    )"},{"line_number":380,"context_line":"                    if raise_on_error:"},{"line_number":381,"context_line":"                        raise"},{"line_number":382,"context_line":"                except BaseException:"}],"source_content_type":"text/x-python","patch_set":8,"id":"2e051618_656a581d","line":379,"range":{"start_line":375,"start_character":0,"end_line":379,"end_character":21},"updated":"2026-03-25 17:15:46.000000000","message":"IMO we should cancel the task here instead of doing it at finally clause, otherwise some tasks might be completed after the loop while the log says it timed out.","commit_id":"54aec4d3fc0a92e2dec4e62e76a0965feabc7a76"}]}
