)]}'
{".zuul.yaml":[{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"2121b7c15b1ed6e06292ed41668949c8d64ea7fc","unresolved":true,"context_lines":[{"line_number":451,"context_line":"              # Added in Flamingo. Ensures that thread statistics is logged"},{"line_number":452,"context_line":"              # each time a task is submitted to the executor so that web"},{"line_number":453,"context_line":"              # can troubleshoot hanging threads easier."},{"line_number":454,"context_line":"              thread_pool_statistic_period: 0"},{"line_number":455,"context_line":"          \"/$NEUTRON_CORE_PLUGIN_CONF\":"},{"line_number":456,"context_line":"            # Needed for QoS port heal allocation testing."},{"line_number":457,"context_line":"            ovs:"}],"source_content_type":"text/x-yaml","patch_set":1,"id":"af17307f_86c77249","line":454,"updated":"2025-10-31 10:51:15.000000000","message":"We need this in the subnode config as well","commit_id":"6933301a8bad33dbeac25f3c9a417b7ae40916cc"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"6eb0b2085d8aad6c33654060bf9dec34a6fbd7d3","unresolved":false,"context_lines":[{"line_number":451,"context_line":"              # Added in Flamingo. Ensures that thread statistics is logged"},{"line_number":452,"context_line":"              # each time a task is submitted to the executor so that web"},{"line_number":453,"context_line":"              # can troubleshoot hanging threads easier."},{"line_number":454,"context_line":"              thread_pool_statistic_period: 0"},{"line_number":455,"context_line":"          \"/$NEUTRON_CORE_PLUGIN_CONF\":"},{"line_number":456,"context_line":"            # Needed for QoS port heal allocation testing."},{"line_number":457,"context_line":"            ovs:"}],"source_content_type":"text/x-yaml","patch_set":1,"id":"0ad1100b_43fa2b21","line":454,"in_reply_to":"af17307f_86c77249","updated":"2025-11-04 09:49:57.000000000","message":"Done","commit_id":"6933301a8bad33dbeac25f3c9a417b7ae40916cc"}],"/PATCHSET_LEVEL":[{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"518b76afc6b252d2f42c48d65f69f6570d9ecb8e","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":1,"id":"f0eccd3d_57515d4a","updated":"2025-10-31 14:17:34.000000000","message":"At least nova-compute started with threading mode and run it with our default config. And some complex tempest tests passed:\n\nhttps://bbd33325f713e5bb0c56-7b7d0a327bfffe1646ca64acb937d9e3.ssl.cf1.rackcdn.com/openstack/eb53570f9dc44bdd9f1dbf5b8d05df97/controller/logs/screen-n-cpu.txt\n\n```\nOct 31 13:44:27.371543 npef480c4cb1e94 nova-compute[88374]: DEBUG nova.utils [None req-26c21371-eca5-4c86-8d2d-b3d35b77af0a tempest-TestShelveInstance-539534465 tempest-TestShelveInstance-539534465-project-member] State of MainProcess.default ThreadPoolExecutor when submitting a new task: max_workers: 10, workers: 10, idle workers: 7, queued work: 0, stats: \u003cExecutorStatistics object at 0x7113c031ac00 (failures\u003d0, executed\u003d238, runtime\u003d2560.96, cancelled\u003d0)\u003e {{(pid\u003d88374) _log_executor_stats /opt/stack/nova/nova/utils.py:1276}}\n```\n\n```\n2025-10-31 13:07:09.576696 | controller | {2} tempest.scenario.test_instances_with_cinder_volumes.TestInstancesWithCinderVolumes.test_instances_with_cinder_volumes_on_all_compute_nodes [348.575935s] ... ok\n```\n\nBut there are obvious problems. Some tests are timing out causing the whole tempest run to time out.\n\nLibvirt event handling is probably dead as it still tries to use eventlet primitives. This is a TODO to rewrite\n```\nOct 31 11:10:23.001022 npef480c4cb1e94 nova-compute[88374]: WARNING nova.virt.libvirt.host [None req-4b193c97-11f5-461d-a539-a63871cccf42 None None] URI qemu:///system does not support events: internal error: could not initialize domain event timer: libvirt.libvirtError: internal error: could not initialize domain event timer\n```\n\nos-brick raising multiple errors like this \n```\nOct 31 11:22:49.004165 npef480c4cb1e94 nova-compute[88374]: DEBUG oslo.privsep.daemon [-] privsep: reply[7db28f70-22d9-408c-86cf-a9d4bb6532b7]: (4, (\u0027InitiatorName\u003diqn.2016-04.com.open-iscsi:8fe5ce8bed9\\n\u0027, \u0027\u0027)) {{(pid\u003d105534) _call_back /opt/stack/data/venv/lib/python3.12/site-packages/oslo_privsep/daemon.py:515}}\nOct 31 11:22:49.011645 npef480c4cb1e94 nova-compute[88374]: DEBUG oslo.privsep.daemon [-] privsep: Exception during request[5af6da04-3913-40db-812d-867e64ee9db4]: [Errno 2] No such file or directory: \u0027/dev/scini\u0027 {{(pid\u003d105534) _process_cmd /opt/stack/data/venv/lib/python3.12/site-packages/oslo_privsep/daemon.py:492}}\nOct 31 11:22:49.011645 npef480c4cb1e94 nova-compute[88374]: Traceback (most recent call last):\nOct 31 11:22:49.011645 npef480c4cb1e94 nova-compute[88374]:   File \"/opt/stack/data/venv/lib/python3.12/site-packages/oslo_privsep/daemon.py\", line 489, in _process_cmd\nOct 31 11:22:49.011645 npef480c4cb1e94 nova-compute[88374]:     ret \u003d func(*f_args, **f_kwargs)\nOct 31 11:22:49.011645 npef480c4cb1e94 nova-compute[88374]:           ^^^^^^^^^^^^^^^^^^^^^^^^^\nOct 31 11:22:49.011645 npef480c4cb1e94 nova-compute[88374]:   File \"/opt/stack/data/venv/lib/python3.12/site-packages/oslo_privsep/priv_context.py\", line 270, in _wrap\nOct 31 11:22:49.011645 npef480c4cb1e94 nova-compute[88374]:     return func(*args, **kwargs)\nOct 31 11:22:49.011645 npef480c4cb1e94 nova-compute[88374]:            ^^^^^^^^^^^^^^^^^^^^^\nOct 31 11:22:49.011645 npef480c4cb1e94 nova-compute[88374]:   File \"/opt/stack/data/venv/lib/python3.12/site-packages/os_brick/privileged/scaleio.py\", line 57, in get_guid\nOct 31 11:22:49.011645 npef480c4cb1e94 nova-compute[88374]:     with open_scini_device() as fd:\nOct 31 11:22:49.011645 npef480c4cb1e94 nova-compute[88374]:   File \"/usr/lib/python3.12/contextlib.py\", line 137, in __enter__\nOct 31 11:22:49.011645 npef480c4cb1e94 nova-compute[88374]:     return next(self.gen)\nOct 31 11:22:49.011645 npef480c4cb1e94 nova-compute[88374]:            ^^^^^^^^^^^^^^\nOct 31 11:22:49.011645 npef480c4cb1e94 nova-compute[88374]:   File \"/opt/stack/data/venv/lib/python3.12/site-packages/os_brick/privileged/scaleio.py\", line 40, in open_scini_device\nOct 31 11:22:49.011645 npef480c4cb1e94 nova-compute[88374]:     fd \u003d os.open(SCINI_DEVICE_PATH, os.O_RDWR)\nOct 31 11:22:49.011645 npef480c4cb1e94 nova-compute[88374]:          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\nOct 31 11:22:49.011645 npef480c4cb1e94 nova-compute[88374]: FileNotFoundError: [Errno 2] No such file or directory: \u0027/dev/scini\u0027\n```\n\nIt seems compute manager still uses a greenpool somewhere explicitly\n```\nOct 31 11:24:46.017086 npef480c4cb1e94 nova-compute[88374]: ERROR oslo_messaging.rpc.server   File \"/opt/stack/nova/nova/compute/manager.py\", line 9703, in live_migration\nOct 31 11:24:46.017086 npef480c4cb1e94 nova-compute[88374]: ERROR oslo_messaging.rpc.server     future \u003d nova.utils.spawn_on(\nOct 31 11:24:46.017086 npef480c4cb1e94 nova-compute[88374]: ERROR oslo_messaging.rpc.server              ^^^^^^^^^^^^^^^^^^^^\nOct 31 11:24:46.017086 npef480c4cb1e94 nova-compute[88374]: ERROR oslo_messaging.rpc.server   File \"/opt/stack/nova/nova/utils.py\", line 623, in spawn_on\nOct 31 11:24:46.017086 npef480c4cb1e94 nova-compute[88374]: ERROR oslo_messaging.rpc.server     if _executor_is_full(executor):\nOct 31 11:24:46.017086 npef480c4cb1e94 nova-compute[88374]: ERROR oslo_messaging.rpc.server        ^^^^^^^^^^^^^^^^^^^^^^^^^^^\nOct 31 11:24:46.017086 npef480c4cb1e94 nova-compute[88374]: ERROR oslo_messaging.rpc.server   File \"/opt/stack/nova/nova/utils.py\", line 608, in _executor_is_full\nOct 31 11:24:46.017086 npef480c4cb1e94 nova-compute[88374]: ERROR oslo_messaging.rpc.server     idle_workers \u003d len([w for w in executor._workers if w.idle]) \u003e 0\nOct 31 11:24:46.017086 npef480c4cb1e94 nova-compute[88374]: ERROR oslo_messaging.rpc.server                                    ^^^^^^^^^^^^^^^^^\nOct 31 11:24:46.017086 npef480c4cb1e94 nova-compute[88374]: ERROR oslo_messaging.rpc.server AttributeError: \u0027GreenThreadPoolExecutor\u0027 object has no attribute \u0027_workers\u0027\nOct 31 11:24:46.017086 npef480c4cb1e94 nova-compute[88374]: ERROR oslo_messaging.rpc.server \n```","commit_id":"6933301a8bad33dbeac25f3c9a417b7ae40916cc"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"1d09a711d4c712a949e22c40ec4c41f274eb472a","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"e75ae469_1666dbae","in_reply_to":"f0eccd3d_57515d4a","updated":"2025-11-10 08:52:35.000000000","message":"Done","commit_id":"6933301a8bad33dbeac25f3c9a417b7ae40916cc"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"eb8ad55385524256f9b5e7170e5ac7c33e6507d8","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"cf0d2262_08fa4260","updated":"2025-11-03 09:45:33.000000000","message":"OK we have tempest results now\nhttps://storage.bhs.cloud.ovh.net/v1/AUTH_dcaab5e32b234d56b626f72581e3644c/zuul_opendev_logs_96f/openstack/96f51e5b4de64f4ca77aba4e4f1ef763/testr_results.html\n\nThe not working libvirt event handling makes the live migration and device attach / detach test cases failing. But simple boot delete cases works OK.","commit_id":"16ec74797ca94c0151f9abefd9de7b1e7ea306e6"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"4d49f447f04a0dd09780a1aaa2bb9b54c4ee173a","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":6,"id":"4040d255_14dd9160","updated":"2025-11-04 17:46:38.000000000","message":"We have working live migration now with native threading \\o/\nhttps://storage.gra.cloud.ovh.net/v1/AUTH_dcaab5e32b234d56b626f72581e3644c/zuul_opendev_logs_071/openstack/07142d96394445fc9d4d5ad58b963223/testr_results.html","commit_id":"b617adfb0b58ffac872836ba92be28c24bf7c6ed"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"9f620a5a32e1c710daaf332323cc37aced3c6ae7","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":6,"id":"4fbbb0cb_2233d4a0","updated":"2025-11-05 16:56:47.000000000","message":"recheck just to get more data point but I don\u0027t believe it will help","commit_id":"b617adfb0b58ffac872836ba92be28c24bf7c6ed"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"9a4bea4a958db3581cd63fe4b7db01dd7c5cc20f","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":10,"id":"4a8d2ad3_2c479bc7","updated":"2025-11-09 10:02:57.000000000","message":"26 failure left https://184bdcb3da6f14107fec-6cf4452aeae42378118fa7e7779efd66.ssl.cf1.rackcdn.com/openstack/7d87d50ce3ff4e4884e79f9cb75c10b0/testr_results.html\n\nThe majority is shelve/unshelve related. So that is what I have to look next","commit_id":"e31fb91e1a48db9a6b8e17b485f91a2da6c3e83f"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"c44d2106082bf53ebf9c6c4dc0530d2a9b9e307c","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":10,"id":"8c90bcee_6097542b","updated":"2025-11-07 20:48:09.000000000","message":"recheck  now we have libvirt events working. still some tests are failing and we have overall slowness to look at.","commit_id":"e31fb91e1a48db9a6b8e17b485f91a2da6c3e83f"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"1d09a711d4c712a949e22c40ec4c41f274eb472a","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":11,"id":"52e2e740_cd1de12f","updated":"2025-11-10 08:52:35.000000000","message":"We have a full green run of nova-next with threaded compute.\nhttps://7fb31eaa2eb369fc242e-b7be325c2c973618eb7074df9913ea2c.ssl.cf1.rackcdn.com/openstack/917ff8c286f14ffa91332be61b443ca8/testr_results.html\n\nA lot of cleanup work is needed in the patches below but at least we know that the direction is viable.","commit_id":"6a08ece503bc010362db58c65ec0e466e51afd91"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"dc967730e490883b765e6c4103785f57c6dba996","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":11,"id":"bcc2e9c9_edb795e5","in_reply_to":"52e2e740_cd1de12f","updated":"2026-02-24 15:29:27.000000000","message":"Done","commit_id":"6a08ece503bc010362db58c65ec0e466e51afd91"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"17e0ed079d1728b9228373562be7b3c1cbc8b32d","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":27,"id":"8dcaf632_6144ead9","updated":"2026-01-13 08:45:55.000000000","message":"recheck","commit_id":"f4e1be48efdecef7eae35371594d64ed669e601c"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"6601f09071e8ce79a817f3b512d3e681e7572447","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":34,"id":"a60f85ed_ed704d6f","updated":"2026-01-22 14:31:05.000000000","message":"That one is fine by me as Sean said.","commit_id":"c35789145f5240ea28b465f5de8fc261c06090a6"},{"author":{"_account_id":8556,"name":"Ghanshyam Maan","display_name":"Ghanshyam Maan","email":"gmaan.os14@gmail.com","username":"ghanshyam"},"change_message_id":"f25c2fc8f062fad52f6256ed59560d9417ef834e","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":34,"id":"7a538dd9_05a696e3","updated":"2026-01-23 01:51:09.000000000","message":"ditto. libvirt event handler change is still needs to be reviewed but after that this lgtm to merge.","commit_id":"c35789145f5240ea28b465f5de8fc261c06090a6"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"3f271521b03b4273f40003fae5a203fc63cada10","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":34,"id":"f6a000db_65e46558","updated":"2026-01-22 13:12:12.000000000","message":"this content is fine but i have some concerns with the previous patch","commit_id":"c35789145f5240ea28b465f5de8fc261c06090a6"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"191254e8132d5162ced2217faeb06910e85527d5","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":44,"id":"f5e503c3_d9ae2589","updated":"2026-01-30 15:48:07.000000000","message":"I found a bug while testing with 10 VMs per compute in parallel (and probably reproduced the oslo.vmware problem).\n\nThe following scenario leads to a high level deadlock:\n* switch to threading mode, the default executor pool size is 10\n* boot 10 VMs in parallel\n* compute gets 10 RPC request for build_and_run_instance\n* compute moves those request to the default executor due to the logic [1] this makes the default pool full.\n* build_and_run_instance tasks are progressing and spawning _allocate_network_async [2] to the same default executor and a bit later waiting for them to finish. But the executor is full due to the parent tasks. So we have a deadlock between the 10 parallel build_and_run_instance and the 10 parallel _allocate_network_async tasks.\n\n[1] https://github.com/openstack/nova/blob/59a7093915298973c72b6d1749a6acd27e0045a9/nova/compute/manager.py#L2452-L2460\n\n[2] https://github.com/openstack/nova/blob/59a7093915298973c72b6d1749a6acd27e0045a9/nova/network/model.py#L580\n\n---\n\nRelevant IRC discussion: https://meetings.opendev.org/irclogs/%23openstack-nova/%23openstack-nova.2026-01-30.log.html#openstack-nova.2026-01-30.log.html#t2026-01-30T15:02:52\n\n---\n\nI will do the following:\n* audit all the spawn calls on the compute side to see how many similar cases we have\n* move the build_and_run_instance tasks to a dedicated executor that will implement the limit for parallel builds that today is implemented by a semaphore\n* look into solutions that can catch the case when a task running in an executor tries to submit a new task to the same executor.","commit_id":"a21df4a8c63a48224694f993b02cb38ffc357394"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"c9e4618eaffe7da1bdb72fb39de0398575c1fa68","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":44,"id":"bb4a7fb9_5c91b4dc","updated":"2026-01-30 13:50:05.000000000","message":"recheck guest kernel crash\n```\ncurrently loaded modules: 8021q 8139cp 8390 9pnet 9pnet_virtio ahci cec dca drm drm_kms_helper e1000 e1000e failover fb_sys_fops garp hid hid_generic i2c_algo_bit igb igbvf ip6_udp_tunnel ip_tables isofs libahci libcrc32c llc mii mrp ne2k_pci net_failover nls_ascii nls_iso8859_1 nls_utf8 pcnet32 qemu_fw_cfg rc_core sctp stp syscopyarea sysfillrect sysimgblt udp_tunnel usbhid virtio_blk virtio_dma_buf virtio_gpu virtio_input virtio_net virtio_rng virtio_scsi virtiofs x_tables \ninfo: initramfs loading root from /dev/vda1\n/sbin/init: can\u0027t load library \u0027libtirpc.so.3\u0027\n[    9.000734] Kernel panic - not syncing: Attempted to kill init! exitcode\u003d0x00001000\n[    9.001688] CPU: 0 PID: 1 Comm: init Not tainted 5.15.0-117-generic #127-Ubuntu\n[    9.002132] Hardware name: OpenStack Foundation OpenStack Nova, BIOS 1.16.3-debian-1.16.3-2 04/01/2014\n[    9.002668] Call Trace:\n[    9.003341]  \u003cTASK\u003e\n[    9.003613]  show_stack+0x52/0x5c\n[    9.004277]  dump_stack_lvl+0x4a/0x63\n[    9.004479]  dump_stack+0x10/0x16\n[    9.004650]  panic+0x15c/0x33b\n[    9.004838]  do_exit.cold+0x15/0xa0\n[    9.005120]  __x64_sys_exit+0x1b/0x20\n[    9.005469]  x64_sys_call+0x1f30/0x1fa0\n[    9.005778]  do_syscall_64+0x56/0xb0\n[    9.006071]  ? vm_area_free+0x18/0x20\n[    9.006428]  ? __do_munmap+0x2af/0x500\n[    9.006756]  ? __vm_munmap+0x96/0x130\n[    9.007057]  ? exit_to_user_mode_prepare+0x37/0xb0\n[    9.007422]  ? syscall_exit_to_user_mode+0x2c/0x50\n[    9.007811]  ? x64_sys_call+0x1e07/0x1fa0\n[    9.008196]  ? do_syscall_64+0x63/0xb0\n[    9.008405]  ? vfs_write+0x1d5/0x270\n[    9.008653]  ? ksys_write+0x67/0xf0\n[    9.008849]  ? exit_to_user_mode_prepare+0x37/0xb0\n[    9.009065]  ? syscall_exit_to_user_mode+0x2c/0x50\n[    9.013655]  ? x64_sys_call+0x47c/0x1fa0\n[    9.015992]  ? do_syscall_64+0x63/0xb0\n[    9.017978]  ? exit_to_user_mode_loop+0x7e/0x160\n[    9.020140]  ? exit_to_user_mode_prepare+0x99/0xb0\n[    9.022155]  ? irqentry_exit_to_user_mode+0xe/0x20\n[    9.024517]  ? irqentry_exit+0x1d/0x30\n[    9.026866]  ? sysvec_apic_timer_interrupt+0x4e/0x90\n[    9.029081]  entry_SYSCALL_64_after_hwframe+0x6c/0xd6\n[    9.031842] RIP: 0033:0x7f1f7536855e\n[    9.034459] Code: 05 d7 2a 00 00 4c 89 f9 bf 02 00 00 00 48 8d 35 fb 0d 00 00 48 8b 10 31 c0 e8 50 d2 ff ff bf 10 00 00 00 b8 3c 00 00 00 0f 05 \u003c48\u003e 8d 15 f3 2a 00 00 f7 d8 89 02 48 83 ec 20 49 8b 8c 24 b8 00 00\n[    9.042224] RSP: 002b:00007fffe495ae50 EFLAGS: 00000207 ORIG_RAX: 000000000000003c\n[    9.047817] RAX: ffffffffffffffda RBX: 00007fffe495c100 RCX: 00007f1f7536855e\n[    9.051407] RDX: 0000000000000002 RSI: 0000000000001000 RDI: 0000000000000010\n[    9.053980] RBP: 00007fffe495c0e0 R08: 00007f1f75361000 R09: 00007f1f7536101a\n[    9.056260] R10: 0000000000000001 R11: 0000000000000207 R12: 00007f1f75362040\n[    9.058288] R13: 00000000004bae50 R14: 0000000000000000 R15: 0000000000403d66\n[    9.060638]  \u003c/TASK\u003e\n[    9.063519] Kernel Offset: 0x29a00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)\n[    9.066777] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode\u003d0x00001000 ]---\n\n```","commit_id":"a21df4a8c63a48224694f993b02cb38ffc357394"},{"author":{"_account_id":8556,"name":"Ghanshyam Maan","display_name":"Ghanshyam Maan","email":"gmaan.os14@gmail.com","username":"ghanshyam"},"change_message_id":"492dc845805cd56e869237c021dc8d0f5b63e033","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":44,"id":"394cf722_91d5ea70","in_reply_to":"074f593d_3bb94bed","updated":"2026-02-03 18:25:35.000000000","message":"Nice catch, thanks for doing that good amount of testing on it. Also, default executor is used for the libvirt event also https://review.opendev.org/c/openstack/nova/+/974445/13/nova/virt/libvirt/host.py#350\n\nAs you are auditing it, can we somwhere add in default executor doc string or log the users (via class_name etc) in _get_default_executor about who all using it. That will help to know if we need to increase the default size or use different executor?\n\nOr somewhere we should make guidlines or checks about where all we can use this global default executor and what are the implication?","commit_id":"a21df4a8c63a48224694f993b02cb38ffc357394"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"63e214eb3dbd45d70a1fb4703cb76083bc4f33fd","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":44,"id":"d0a6557d_a050ae15","in_reply_to":"0bc3ad85_fa454965","updated":"2026-02-03 10:56:59.000000000","message":"still open","commit_id":"a21df4a8c63a48224694f993b02cb38ffc357394"},{"author":{"_account_id":8556,"name":"Ghanshyam Maan","display_name":"Ghanshyam Maan","email":"gmaan.os14@gmail.com","username":"ghanshyam"},"change_message_id":"eb62262fb42eeaa43fd6a33a1696154ce64aca28","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":44,"id":"dce326e5_cba15ebc","in_reply_to":"394cf722_91d5ea70","updated":"2026-02-03 18:41:47.000000000","message":"I am also rethinking on need of default/global executor which can create more deadlock or performance issue like this good one you found. let\u0027s discuss it in eventlet meeting in case I am missing the context of its need.","commit_id":"a21df4a8c63a48224694f993b02cb38ffc357394"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"2fef21c3882dc36c464005422849aa58cad155a2","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":44,"id":"c41182d7_0a861ed7","in_reply_to":"513f7c34_c7f5d35f","updated":"2026-02-24 15:29:13.000000000","message":"This is now fixed by the parent patch moving tasks to different executors.","commit_id":"a21df4a8c63a48224694f993b02cb38ffc357394"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"2b483bd50e84c18cea54debbacb58f676f04edc1","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":44,"id":"074f593d_3bb94bed","in_reply_to":"d0a6557d_a050ae15","updated":"2026-02-03 14:59:46.000000000","message":"So I can try to collect the cases when a child task tries to use the same executor as its parent here https://review.opendev.org/c/openstack/nova/+/975515\n\n---\n\nThis is the places we call spawn_* \n```\n❯ grep utils.spawn nova -R -A 2 | grep -v nova/test\nnova/network/model.py:        self._future \u003d utils.spawn(async_method, *args, **kwargs)\n--\nnova/compute/manager.py:        utils.spawn(_locked_do_build_and_run_instance,\n--\nnova/compute/manager.py:            future \u003d nova.utils.spawn_on(\nnova/compute/manager.py-                self._live_migration_executor,\nnova/compute/manager.py-                self._do_live_migration, context, dest, instance,\n--\nnova/compute/manager.py:                    nova.utils.spawn_on(\nnova/compute/manager.py-                        self._sync_power_executor, _sync, db_instance)\nnova/compute/manager.py-\n--\nnova/virt/libvirt/host.py:        utils.spawn(self._dispatch_thread)\n--\nnova/virt/libvirt/host.py:        utils.spawn(self._conn_event_thread)\n--\nnova/virt/libvirt/driver.py:        future \u003d utils.spawn(self._live_migration_operation,\n--\nnova/conductor/manager.py:                    future \u003d utils.spawn_on(cache_image_executor,\nnova/conductor/manager.py-                                    wrap_cache_images,\n--\nnova/scheduler/host_manager.py:        utils.spawn(_async_init_instance_info, computes_by_cell)\n--\nnova/context.py:            future \u003d utils.spawn_on(\nnova/context.py-                executor,\nnova/context.py-                gather_result, cell_mapping.uuid, fn, cctxt, *args, **kwargs)\n```\nBased on this we have a limited set of places to investigate:\ncompute:\n* _locked_do_build_and_run_instance and its sub tasks (network async_method)\n* _do_live_migration and its sub tasks (_live_migration_operation)\n* libvirt event handling thread if the result of any event leads to subtasks spawns (probably not)\n\nIn other service we need to check that any above task does scatter_gather or not. But even if it does scatter_gather has its own executor. So probably not an issue.","commit_id":"a21df4a8c63a48224694f993b02cb38ffc357394"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"68bbde59bc9540b92770406397dfc90bb462c72e","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":44,"id":"513f7c34_c7f5d35f","in_reply_to":"dce326e5_cba15ebc","updated":"2026-02-04 12:12:41.000000000","message":"\u003e Nice catch, thanks for doing that good amount of testing on it. Also, default executor is used for the libvirt event also https://review.opendev.org/c/openstack/nova/+/974445/13/nova/virt/libvirt/host.py#350\n\nGood point I need to think about his too.\n\n\u003e As you are auditing it, can we somwhere add in default executor doc string or log the users (via class_name etc) in _get_default_executor about who all using it. That will help to know if we need to increase the default size or use different executor?\n\nThere is almost a way to get this usage info already today by setting the [DEFAULT]thread_pool_statistic_period to 0. That will log executor statistics at each task submission. I just need to add the name of the function to the log.\n\nhttps://github.com/openstack/nova/blob/ba24639b8dd34a19885298cf728e58dd7db9e703/nova/utils.py#L599\n\nhttps://github.com/openstack/nova/blob/ba24639b8dd34a19885298cf728e58dd7db9e703/nova/utils.py#L1358-L1373\n\nThere is also an independent change I\u0027m trialing to track parent - child task executor usage to know if a task submits a child task into the same executor as its parent. https://review.opendev.org/c/openstack/nova/+/975515/1/nova/utils.py\n\n\u003e I am also rethinking on need of default/global executor which can create more deadlock or performance issue like this good one you found. let\u0027s discuss it in eventlet meeting in case I am missing the context of its need.\n\nGood observation. I agree that having a shared (global / default) executor is a major factor in this issue. If each type of task uses it own executor then such a deadlock is less likely.","commit_id":"a21df4a8c63a48224694f993b02cb38ffc357394"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"62f7ef828c38c33f322f72737b4016ace9b9f242","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":44,"id":"0bc3ad85_fa454965","in_reply_to":"f5e503c3_d9ae2589","updated":"2026-02-02 14:06:43.000000000","message":"This is still open.","commit_id":"a21df4a8c63a48224694f993b02cb38ffc357394"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"cf9eea6c7921add98776807a01b21488baf01017","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":58,"id":"456046ed_4f685b61","updated":"2026-02-25 12:04:36.000000000","message":"recheck guest kernel crash\n```\ninfo: initramfs loading root from /dev/vda1\n/sbin/init: can\u0027t load library \u0027libtirpc.so.3\u0027\n[    9.991583] Kernel panic - not syncing: Attempted to kill init! exitcode\u003d0x00001000\n[    9.992472] CPU: 0 PID: 1 Comm: init Not tainted 5.15.0-117-generic #127-Ubuntu\n[    9.993109] Hardware name: OpenStack Foundation OpenStack Nova, BIOS 1.16.3-debian-1.16.3-2 04/01/2014\n[    9.993867] Call Trace:\n[    9.994803]  \u003cTASK\u003e\n[    9.995222]  show_stack+0x52/0x5c\n[    9.995960]  dump_stack_lvl+0x4a/0x63\n[    9.996220]  dump_stack+0x10/0x16\n[    9.996438]  panic+0x15c/0x33b\n[    9.996658]  do_exit.cold+0x15/0xa0\n[    9.996890]  __x64_sys_exit+0x1b/0x20\n[    9.997141]  x64_sys_call+0x1f30/0x1fa0\n[    9.997392]  do_syscall_64+0x56/0xb0\n[    9.997637]  ? vfs_write+0x1d5/0x270\n[    9.997884]  ? ksys_write+0x67/0xf0\n[    9.998126]  ? exit_to_user_mode_prepare+0x37/0xb0\n[    9.998468]  ? syscall_exit_to_user_mode+0x2c/0x50\n[   10.004347]  ? x64_sys_call+0x47c/0x1fa0\n[   10.004636]  ? do_syscall_64+0x63/0xb0\n[   10.004890]  ? ksys_write+0x67/0xf0\n[   10.005134]  ? exit_to_user_mode_prepare+0x37/0xb0\n[   10.005428]  ? syscall_exit_to_user_mode+0x2c/0x50\n[   10.005781]  ? x64_sys_call+0x47c/0x1fa0\n[   10.006166]  ? do_syscall_64+0x63/0xb0\n[   10.011845]  ? exit_to_user_mode_prepare+0x37/0xb0\n[   10.015103]  ? irqentry_exit_to_user_mode+0xe/0x20\n[   10.018245]  ? irqentry_exit+0x1d/0x30\n[   10.021249]  ? exc_page_fault+0x89/0x170\n[   10.024150]  entry_SYSCALL_64_after_hwframe+0x6c/0xd6\n[   10.027258] RIP: 0033:0x7f3af1a4655e\n[   10.030579] Code: 05 d7 2a 00 00 4c 89 f9 bf 02 00 00 00 48 8d 35 fb 0d 00 00 48 8b 10 31 c0 e8 50 d2 ff ff bf 10 00 00 00 b8 3c 00 00 00 0f 05 \u003c48\u003e 8d 15 f3 2a 00 00 f7 d8 89 02 48 83 ec 20 49 8b 8c 24 b8 00 00\n[   10.037179] RSP: 002b:00007fff05615530 EFLAGS: 00000207 ORIG_RAX: 000000000000003c\n[   10.040483] RAX: ffffffffffffffda RBX: 00007fff056167e0 RCX: 00007f3af1a4655e\n[   10.043739] RDX: 0000000000000002 RSI: 0000000000001000 RDI: 0000000000000010\n[   10.046992] RBP: 00007fff056167c0 R08: 00007f3af1a3f000 R09: 00007f3af1a3f01a\n[   10.050245] R10: 0000000000000001 R11: 0000000000000207 R12: 00007f3af1a40040\n[   10.053499] R13: 00000000004bae50 R14: 0000000000000000 R15: 0000000000403d66\n[   10.056830]  \u003c/TASK\u003e\n[   10.060858] Kernel Offset: 0x35200000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)\n[   10.064856] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode\u003d0x00001000 ]---\n```","commit_id":"4bce4480b9af6ac57174c217518f54f92fd8c8a2"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"3a6c8a70910eff4bca82e10254ab32b8d677178a","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":58,"id":"bb634f11_9384ef67","updated":"2026-02-25 21:23:10.000000000","message":"recheck nova-next post failure","commit_id":"4bce4480b9af6ac57174c217518f54f92fd8c8a2"},{"author":{"_account_id":8556,"name":"Ghanshyam Maan","display_name":"Ghanshyam Maan","email":"gmaan.os14@gmail.com","username":"ghanshyam"},"change_message_id":"70f7bae0e7056714b0e9ad0b9c6e305879ad1e1f","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":58,"id":"0682f896_2017f04f","updated":"2026-02-26 04:30:05.000000000","message":"recheck parent is in gate pipeline","commit_id":"4bce4480b9af6ac57174c217518f54f92fd8c8a2"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"81e456fe789d57d3c4b876fc3863f24b502a807e","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":58,"id":"321af1f0_eccacc2b","updated":"2026-02-25 09:08:51.000000000","message":"recheck the problematic tempest test is reverted https://review.opendev.org/c/openstack/tempest/+/977922","commit_id":"4bce4480b9af6ac57174c217518f54f92fd8c8a2"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"a9d35a696e2682d9401f10f731eb66d9e7f6487d","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":58,"id":"7dcafc7d_621e62a6","updated":"2026-02-25 15:07:03.000000000","message":"recheck tox-cover timed out. Is this the FF crunch yet?","commit_id":"4bce4480b9af6ac57174c217518f54f92fd8c8a2"}],"doc/source/admin/concurrency.rst":[{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"3e87f35443fe8f53df77bec010be1b2b9a760b57","unresolved":true,"context_lines":[{"line_number":34,"context_line":"   Since nova 32.0.0 (2025.2 Flamingo) the nova-scheduler, nova-metadata, and"},{"line_number":35,"context_line":"   nova-api can be switched to native threading mode."},{"line_number":36,"context_line":""},{"line_number":37,"context_line":"   Since nova 33.0.0 (2026.1 Gazpacho) also the nova-conductor, and the"},{"line_number":38,"context_line":"   nova-compute can be switched to native threading mode."},{"line_number":39,"context_line":""},{"line_number":40,"context_line":""},{"line_number":41,"context_line":"Tunables for the native threading mode"}],"source_content_type":"text/x-rst","patch_set":22,"id":"76ee1079_3fa72ab2","line":38,"range":{"start_line":37,"start_character":0,"end_line":38,"end_character":57},"updated":"2025-12-11 11:31:34.000000000","message":"```suggestion\n   Since nova 33.0.0 (2026.1 Gazpacho) the nova-conductor and nova-compute\n   can also be switched to native threading mode.\n```","commit_id":"67fdd55643afdf79675e2a969e3a0fa05133cb7f"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"1a3fc4a531d50e845e4cf47a1cdb9d6f9981c61a","unresolved":false,"context_lines":[{"line_number":34,"context_line":"   Since nova 32.0.0 (2025.2 Flamingo) the nova-scheduler, nova-metadata, and"},{"line_number":35,"context_line":"   nova-api can be switched to native threading mode."},{"line_number":36,"context_line":""},{"line_number":37,"context_line":"   Since nova 33.0.0 (2026.1 Gazpacho) also the nova-conductor, and the"},{"line_number":38,"context_line":"   nova-compute can be switched to native threading mode."},{"line_number":39,"context_line":""},{"line_number":40,"context_line":""},{"line_number":41,"context_line":"Tunables for the native threading mode"}],"source_content_type":"text/x-rst","patch_set":22,"id":"8295e575_8a08e6eb","line":38,"range":{"start_line":37,"start_character":0,"end_line":38,"end_character":57},"in_reply_to":"76ee1079_3fa72ab2","updated":"2025-12-19 15:36:21.000000000","message":"Done","commit_id":"67fdd55643afdf79675e2a969e3a0fa05133cb7f"}]}
