)]}'
{"/COMMIT_MSG":[{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"1458c509125ca395eec9883da5e4e2f34eb1e26b","unresolved":true,"context_lines":[{"line_number":15,"context_line":"* Do more testing especially with the threading mode enabled"},{"line_number":16,"context_line":"* Report pool statistics"},{"line_number":17,"context_line":"* Document how to configure the DB backend to ensure that"},{"line_number":18,"context_line":"long queries are not holding up our worker threads."},{"line_number":19,"context_line":""},{"line_number":20,"context_line":"Change-Id: Ibff6c73ad9af911a42204e53fee31ed5537c829d"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":7,"id":"43babe33_25e93ea8","line":18,"updated":"2025-05-05 18:52:43.000000000","message":"for mysqld on the server side:\n```\nstack@aio:~$ grep max_execution_time /etc/mysql/my.cnf\nmax_execution_time \u003d 30000\n```\nfor pymysql on the client side:\n```\nstack@aio:/opt/stack/nova$ grep connection /etc/nova/nova.conf \nconnection \u003d mysql+pymysql://root:admin@127.0.0.1/nova_cell0?charset\u003dutf8\u0026plugin\u003ddbcounter\nconnection_parameters \u003d \"read_timeout\u003d10\"\nconnection \u003d mysql+pymysql://root:admin@127.0.0.1/nova_api?charset\u003dutf8\u0026plugin\u003ddbcounter\nconnection_parameters \u003d \"read_timeout\u003d10\"\n```","commit_id":"0aa7770bd1930710d2eb0379153e759e1054f9c7"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"f2a391ee5784ee179f0838006e367b33c276f974","unresolved":false,"context_lines":[{"line_number":15,"context_line":"* Do more testing especially with the threading mode enabled"},{"line_number":16,"context_line":"* Report pool statistics"},{"line_number":17,"context_line":"* Document how to configure the DB backend to ensure that"},{"line_number":18,"context_line":"long queries are not holding up our worker threads."},{"line_number":19,"context_line":""},{"line_number":20,"context_line":"Change-Id: Ibff6c73ad9af911a42204e53fee31ed5537c829d"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":7,"id":"6e3ad984_8fe69fe2","line":18,"in_reply_to":"43babe33_25e93ea8","updated":"2025-05-09 14:40:10.000000000","message":"Documentation is added in https://review.opendev.org/c/openstack/nova/+/949364","commit_id":"0aa7770bd1930710d2eb0379153e759e1054f9c7"}],"/PATCHSET_LEVEL":[{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"43e4ef2563f37a7912adef63bcf554b1ef737eac","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":9,"id":"58b86874_84f6a581","updated":"2025-05-09 07:20:15.000000000","message":"from IRC:\n17:20 \u003c sean-k-mooney\u003e gibi: and yes teh reset looks cleaner then i was expecting too so +1 on that\n17:23 \u003c sean-k-mooney\u003e gibi: by the way we likely should modify our base testcase to ensure we shutdown teh threadpool and reset it between test runs\n17:24 \u003c sean-k-mooney\u003e i belive i had that in my verison ot prevent leakign thread betwen tests\n17:24 \u003c sean-k-mooney\u003e if we ever do leak a thread in the pool it should fail the test\n17:24 \u003c sean-k-mooney\u003e the same way our greenthread leak proctection works\n17:26 \u003c sean-k-mooney\u003e in the test we can kind of get awar with it by using the synconousthreadpool via a fixture too\n17:27 \u003c sean-k-mooney\u003e but we do have fixture to mange this currently https://github.com/openstack/nova/blob/master/nova/test.py#L197 https://github.com/openstack/nova/blob/master/nova/test.py#L324-L329\n17:27 \u003c sean-k-mooney\u003e so we likely need to extend this to the new scather gather pool\n17:29 \u003c sean-k-mooney\u003e i made the SynchronousThreadPoolExecutorFixture also apply to the futurist.ThreadPoolExecutor https://review.opendev.org/c/openstack/nova/+/922497/5/nova/tests/fixtures/nova.py#1254\n17:33 \u003c sean-k-mooney\u003e gibi: bug i think you shoudl also update https://github.com/openstack/nova/blob/master/nova/tests/fixtures/nova.py#L1191-L1247 and \n                       https://github.com/openstack/nova/blob/master/nova/tests/fixtures/nova.py#L2056 or add new ones for this thread pool\n\nGood point about the fixtures and testing. My first stab at the fixture change is current in https://review.opendev.org/c/openstack/nova/+/948087/9/nova/tests/fixtures/nova.py but it needs to be moved earlier and extended to cover the new pool introduced here.","commit_id":"fbdf6b7fdb94d627cc5ac7670f02fe450d65eb10"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"bd77a1abd394276703223f6654714188c8c65ad8","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":9,"id":"1cc052a1_b74f1b1c","in_reply_to":"58b86874_84f6a581","updated":"2025-05-13 08:00:50.000000000","message":"Moved the test fixture changes here now.","commit_id":"fbdf6b7fdb94d627cc5ac7670f02fe450d65eb10"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"e71b1685720876d51b1a0d1f4c03b50fdc88b7b1","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":10,"id":"6a7fe8dd_ec6fd98f","updated":"2025-05-09 14:41:13.000000000","message":"Name change looks good. I haven\u0027t fully reviewed this, but I think it looks good from a quick re-read.","commit_id":"23dc3ff6c1ef949bb1497f32b2761ac78dd0232c"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"47541d00b148e11232c854b0b4353132c16e774e","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":12,"id":"dd91fe6f_4a0e53e2","updated":"2025-05-13 08:01:25.000000000","message":"CI is green, I need to add some unit test coverage for the new code but other than that I consider this ready.","commit_id":"1dddfc5799c3e99348fa80084c91dbdf501c3ebc"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1f61fddb5018a9bffb93f67f36c033fb61a1f660","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":12,"id":"967e8273_e0efe915","updated":"2025-05-13 11:54:50.000000000","message":"i think this looks good over all.\n\ni called out one possible edge case we could impove in the future timeout handelign\ninline","commit_id":"1dddfc5799c3e99348fa80084c91dbdf501c3ebc"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"ecd4cb9f490b59d5719eb438ae7dbad7bc0cc181","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":12,"id":"04d20dd2_ea8fb409","in_reply_to":"967e8273_e0efe915","updated":"2025-06-03 13:08:03.000000000","message":"Done","commit_id":"1dddfc5799c3e99348fa80084c91dbdf501c3ebc"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"039865e19478c129046433bde30c68dff78c52cc","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":13,"id":"3f21cb22_fb86b5ca","updated":"2025-05-14 13:06:07.000000000","message":"Added extra tests and fixed the review feedback","commit_id":"18b3a82bcd21b9841e576560bb220254b066569f"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"d5864c4c2c35c643abaccaf782df48ded21d1ace","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":14,"id":"661265b4_b49bc27f","updated":"2025-05-27 08:01:50.000000000","message":"I will improve on the test cleanup","commit_id":"c9bd933144f2920e66d33fa2a0b243e9c32c1f8a"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"eb5d0b0dc3bf669fce54bc43bc0ea53abe2ca25b","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":14,"id":"7a59e5cd_770a7ca0","updated":"2025-05-26 13:01:48.000000000","message":"a couple of test quetion inline but i think im happy with the production code as is","commit_id":"c9bd933144f2920e66d33fa2a0b243e9c32c1f8a"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"835fd5c99bffa9e080ee18a22a2a0b918dab0e5f","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":14,"id":"0174fb75_579ff94a","updated":"2025-05-19 14:02:54.000000000","message":"recheck bug/2109428 (nova-ceph-multistore)","commit_id":"c9bd933144f2920e66d33fa2a0b243e9c32c1f8a"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"f2a5c8bb96ff6aec2e830b9bd338477a0faf5f53","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":16,"id":"ff5496ce_577aa7d1","updated":"2025-06-05 13:05:10.000000000","message":"@rene.ribaud@gmail.com Thanks for the review. I tried to answer your questions inline. Let me know if you still have additional questions based on this information.","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":16207,"name":"ribaudr","display_name":"uggla","email":"rene.ribaud@gmail.com","username":"uggla","status":"Red Hat"},"change_message_id":"2304e23b537ea30754b1962ccc465ac51dd971f0","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":16,"id":"46d22667_aed332b4","updated":"2025-06-04 14:17:41.000000000","message":"A -1 just to notify you about my comments and questions.\nI spent a lot of time reviewing this path. I need some clarifications on some parts so I\u0027ll appreciate if you could answer my questions.\n\nOverall, I understand what this patch is doing and learn a lot from it.","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"d68f0d37aba6d329d618c22ff7df3ee3005eb020","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":16,"id":"0b217647_24bd0c72","updated":"2025-06-09 14:08:09.000000000","message":"A couple complaints in the tests, -1 for visibility at least.","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":16207,"name":"ribaudr","display_name":"uggla","email":"rene.ribaud@gmail.com","username":"uggla","status":"Red Hat"},"change_message_id":"53eb6a94b441a4a535fab99aeae95c8064163dbb","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":16,"id":"6ace3f5d_bc61d431","updated":"2025-06-10 15:22:42.000000000","message":"Thanks for answering my questions.\nFrom my point of view this patch looks good.","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"a4fd51b897a6fdbc18046b537f64cdb4baa8d9c7","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":17,"id":"b96b732b_353770fb","updated":"2025-06-18 18:20:04.000000000","message":"I now understand where I lost track of what the tests were doing such that I thought we were losing (or altering) our scenario coverage. Thanks for explaining and the tweaks from ps16 make it more obvious now. One comment about a potential comment, but only if you respin or can put it further up the stack.\n\nI think the spawn fixture stuff is going to be addressed separately, according to gibi, so I think I\u0027m good with this now.","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"886c4d76f603dc32e67312eac9213557c8751c6d","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":17,"id":"f33af639_1d48d0f4","updated":"2025-06-25 17:09:29.000000000","message":"I will push a follow up","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"eea7031dfa59be923abb40e1a41689b45fa66da3","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":17,"id":"d1328b22_3362a5c9","updated":"2025-06-25 14:43:42.000000000","message":"Now that I was able to review up to spawn_on(), I think I agree with the consensus we\u0027re currently discussing on our upstream eventlet gmeet session, +2.","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"1bdd520d2ab15f92773b9e2f46cd81f48f106bbf","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":17,"id":"4f797d1c_1be3de0e","updated":"2025-06-23 14:55:29.000000000","message":"for now, I have done a first review of that Facade change, nothing hurting me to +2 but I\u0027d want to review the later patches before telling that.\n\nI still have a few nits, nothing critical.","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"98efe15de9efb809fed5d37b1fd8b5ddbf7ec33c","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":17,"id":"b1b30843_2d054a98","updated":"2025-06-25 14:23:45.000000000","message":"im holding +w for now as we are about to have a sync call on this but i would like to appvoe this today and start makeing progress on the sereis.\n\nthere are some valid point but i think those woudl be best adressed in a follow up patch.\n\nif there are no objects on the call ill add +w after","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"7cc506d388659f9ae0e30988297ce1071d018dcd","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":17,"id":"54019674_b57b6363","updated":"2025-06-25 14:59:38.000000000","message":"no objections on the call so adding +w.\n\nwe resolved to make incremental progress so that we can leave this bake in ci.","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"ff6060136f5a56c1e4d3cc96cdeba6a42c5a2bd3","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":17,"id":"e62ff861_76570900","updated":"2025-06-25 17:16:03.000000000","message":"recheck\n\nIt is a slightly unstable functional test.\n```\n  File \"/home/zuul/src/opendev.org/openstack/nova/nova/tests/functional/regressions/test_bug_1938326.py\", line 63, in test_migrate_from_disabled_host\n```\nIt failed twice in the last 7 days and the other hit was not in the eventlet series so I believe it was unstable regardless of the eventlet series\n\n```\n❯ logsearch log --job-group nova-functional --result FAILURE --days 7 test_bug_1938326.py\n...\nBuilds with matching logs 2/16:\n+----------------------------------+---------------------+----------------+----------+-----------------------------------+--------+---------------------------+\n| uuid                             | finished            | project        | pipeline | review                            | branch | job                       |\n+----------------------------------+---------------------+----------------+----------+-----------------------------------+--------+---------------------------+\n| f629389b319b483487d068102f914827 | 2025-06-25T15:40:31 | openstack/nova | gate     | https://review.opendev.org/947966 | master | nova-tox-functional-py312 |\n| 31ced7b49dc6466eacaa195db33fe356 | 2025-06-23T15:05:06 | openstack/nova | check    | https://review.opendev.org/951640 | master | nova-tox-functional-py312 |\n+----------------------------------+---------------------+----------------+----------+-----------------------------------+--------+---------------------------+\n```","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"0030570b69afe629a250d01a5f093ae05d16e374","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":17,"id":"6c39df4b_c875c556","updated":"2025-06-26 07:00:22.000000000","message":"recheck network connectivity issues in nova-grenade-multinode","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"}],"nova/cmd/scheduler.py":[{"author":{"_account_id":16207,"name":"ribaudr","display_name":"uggla","email":"rene.ribaud@gmail.com","username":"uggla","status":"Red Hat"},"change_message_id":"2304e23b537ea30754b1962ccc465ac51dd971f0","unresolved":true,"context_lines":[{"line_number":49,"context_line":"    # Determine the number of workers; if not specified in config, default"},{"line_number":50,"context_line":"    # to number of CPUs"},{"line_number":51,"context_line":"    workers \u003d CONF.scheduler.workers or processutils.get_worker_count()"},{"line_number":52,"context_line":"    # NOTE(gibi): The oslo.service backend creates the worker processes"},{"line_number":53,"context_line":"    # via os.fork. As nova already initialized these executor(s)"},{"line_number":54,"context_line":"    # in the master process, and os.fork\u0027s behavior is to copy the state of"},{"line_number":55,"context_line":"    # parent to the child process, we destroy the executor in the parent"},{"line_number":56,"context_line":"    # process before the forking, so that the workers initialize new"},{"line_number":57,"context_line":"    # executor(s) and therefore avoid working with the wrong internal executor"},{"line_number":58,"context_line":"    # state (i.e. number of workers idle in the pool). A long therm solution"},{"line_number":59,"context_line":"    # would be to use os.spawn instead of os.fork for the workers."},{"line_number":60,"context_line":"    utils.destroy_scatter_gather_executor()"},{"line_number":61,"context_line":""},{"line_number":62,"context_line":"    service.serve(server, workers\u003dworkers)"}],"source_content_type":"text/x-python","patch_set":16,"id":"51e1c156_4344749e","line":59,"range":{"start_line":52,"start_character":0,"end_line":59,"end_character":66},"updated":"2025-06-04 14:17:41.000000000","message":"Last time you explained this behavior in the Nova meeting, I was a bit confused.\nReading this comment makes the intent clear and I understand the problem.\n\nHowever, I am still struggling to find where the executor is initialized before this point.\nBecause I think the executor is initialized in the context, which is part of a user request.\n\nSo, this is possible if we receive a request between line 46 and 62 ? Am i right ?\n\n\nAlso as get_scatter_gather_executor() is called by the api should a similar approch be done on nova-api ?","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"f2a5c8bb96ff6aec2e830b9bd338477a0faf5f53","unresolved":true,"context_lines":[{"line_number":49,"context_line":"    # Determine the number of workers; if not specified in config, default"},{"line_number":50,"context_line":"    # to number of CPUs"},{"line_number":51,"context_line":"    workers \u003d CONF.scheduler.workers or processutils.get_worker_count()"},{"line_number":52,"context_line":"    # NOTE(gibi): The oslo.service backend creates the worker processes"},{"line_number":53,"context_line":"    # via os.fork. As nova already initialized these executor(s)"},{"line_number":54,"context_line":"    # in the master process, and os.fork\u0027s behavior is to copy the state of"},{"line_number":55,"context_line":"    # parent to the child process, we destroy the executor in the parent"},{"line_number":56,"context_line":"    # process before the forking, so that the workers initialize new"},{"line_number":57,"context_line":"    # executor(s) and therefore avoid working with the wrong internal executor"},{"line_number":58,"context_line":"    # state (i.e. number of workers idle in the pool). A long therm solution"},{"line_number":59,"context_line":"    # would be to use os.spawn instead of os.fork for the workers."},{"line_number":60,"context_line":"    utils.destroy_scatter_gather_executor()"},{"line_number":61,"context_line":""},{"line_number":62,"context_line":"    service.serve(server, workers\u003dworkers)"}],"source_content_type":"text/x-python","patch_set":16,"id":"a0a7f02b_c5d324fa","line":59,"range":{"start_line":52,"start_character":0,"end_line":59,"end_character":66},"in_reply_to":"51e1c156_4344749e","updated":"2025-06-05 13:05:10.000000000","message":"\u003e However, I am still struggling to find where the executor is initialized before this point.\n\u003e Because I think the executor is initialized in the context, which is part of a user request.\n\nThe executor is initialized at first use. And the first use actually happens during L46 above.\n\n* https://github.com/openstack/nova/blob/a1c47fc242b6002d2be60fc41176ce29d19d924e/nova/cmd/scheduler.py#L45\n* https://github.com/openstack/nova/blob/a1c47fc242b6002d2be60fc41176ce29d19d924e/nova/service.py#L258\n* https://github.com/openstack/nova/blob/a1c47fc242b6002d2be60fc41176ce29d19d924e/nova/utils.py#L1122\n* https://github.com/openstack/nova/blob/a1c47fc242b6002d2be60fc41176ce29d19d924e/nova/objects/service.py#L626\n\n\u003e So, this is possible if we receive a request between line 46 and 62 ? Am i right ?\n\nThe scheduler gets the requests via RPC the RPC interface is initialized at\n\nhttps://github.com/openstack/nova/blob/a1c47fc242b6002d2be60fc41176ce29d19d924e/nova/service.py#L190\nwhich happens only after the fork when oslo.sevice calls the start() method on the passed in sevice (server) after L62.\n\n\u003e Also as get_scatter_gather_executor() is called by the api should a similar approch be done on nova-api ?\n\nCorrect, the nova-api, nova-conductor and nova-scheduler all needs this logic as soon as they can run in threading mode. The current series only aims to enable the nova-scheduler to run in threading mode hence this is now only added to the nova-scheduler entry point. We will add very similar code to the rest a bit later.\n\nThis issue existed in eventlet mode for a long time (we always used fork for the workers in oslo.service). But the eventlet mode is more tolerant about this issue as the greenthreads are pretty cheap and therefore our executor is configured to allow a lot more greenthreads. I.e. loosing 2 greenthreads from the start out of 100 wasn\u0027t really visible so far. But loosing 2 threads out of 5 with threading mode is much more visible.","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":16207,"name":"ribaudr","display_name":"uggla","email":"rene.ribaud@gmail.com","username":"uggla","status":"Red Hat"},"change_message_id":"53eb6a94b441a4a535fab99aeae95c8064163dbb","unresolved":false,"context_lines":[{"line_number":49,"context_line":"    # Determine the number of workers; if not specified in config, default"},{"line_number":50,"context_line":"    # to number of CPUs"},{"line_number":51,"context_line":"    workers \u003d CONF.scheduler.workers or processutils.get_worker_count()"},{"line_number":52,"context_line":"    # NOTE(gibi): The oslo.service backend creates the worker processes"},{"line_number":53,"context_line":"    # via os.fork. As nova already initialized these executor(s)"},{"line_number":54,"context_line":"    # in the master process, and os.fork\u0027s behavior is to copy the state of"},{"line_number":55,"context_line":"    # parent to the child process, we destroy the executor in the parent"},{"line_number":56,"context_line":"    # process before the forking, so that the workers initialize new"},{"line_number":57,"context_line":"    # executor(s) and therefore avoid working with the wrong internal executor"},{"line_number":58,"context_line":"    # state (i.e. number of workers idle in the pool). A long therm solution"},{"line_number":59,"context_line":"    # would be to use os.spawn instead of os.fork for the workers."},{"line_number":60,"context_line":"    utils.destroy_scatter_gather_executor()"},{"line_number":61,"context_line":""},{"line_number":62,"context_line":"    service.serve(server, workers\u003dworkers)"}],"source_content_type":"text/x-python","patch_set":16,"id":"f25bba1f_77c8befc","line":59,"range":{"start_line":52,"start_character":0,"end_line":59,"end_character":66},"in_reply_to":"a0a7f02b_c5d324fa","updated":"2025-06-10 15:22:42.000000000","message":"Thanks for providing the path to the \"guilty\" scatter_gather_all_cells call.\nIt is precisely what I was looking for but I didn\u0027t find it.\n\nThe other explanations are clear too.\n\nNo more questions from me on that point.","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"}],"nova/conf/base.py":[{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"0adb295dbf271ba2c64b9310529de1824529b909","unresolved":true,"context_lines":[{"line_number":69,"context_line":"greenthread pool concurrently, defaults to 1000, min value is 100."},{"line_number":70,"context_line":"\u0027\u0027\u0027),"},{"line_number":71,"context_line":"    cfg.IntOpt("},{"line_number":72,"context_line":"        \u0027scatter_gather_thread_pool_size\u0027,"},{"line_number":73,"context_line":"        default\u003d5,"},{"line_number":74,"context_line":"        min\u003d1,"},{"line_number":75,"context_line":"        help\u003d\u0027\u0027\u0027"}],"source_content_type":"text/x-python","patch_set":9,"id":"a7a6b062_4bb49ccd","line":72,"updated":"2025-05-08 15:03:34.000000000","message":"I wonder if it\u0027s better to name the something meaningful from the outside instead of the inside. The term \"scatter-gather\" could apply to a lot of things, and certainly some operators could think it\u0027s about IO performance of the guests.\n\nEven though it\u0027s in the help text, I wonder if it would be better to call this something like \"cell_stripe_parallelism\" or \"cell_list_workers\". Those aren\u0027t great, but ... something that better describes what this actually controls that will be meaningful to people not familiar with our internals.","commit_id":"fbdf6b7fdb94d627cc5ac7670f02fe450d65eb10"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"e71b1685720876d51b1a0d1f4c03b50fdc88b7b1","unresolved":false,"context_lines":[{"line_number":69,"context_line":"greenthread pool concurrently, defaults to 1000, min value is 100."},{"line_number":70,"context_line":"\u0027\u0027\u0027),"},{"line_number":71,"context_line":"    cfg.IntOpt("},{"line_number":72,"context_line":"        \u0027scatter_gather_thread_pool_size\u0027,"},{"line_number":73,"context_line":"        default\u003d5,"},{"line_number":74,"context_line":"        min\u003d1,"},{"line_number":75,"context_line":"        help\u003d\u0027\u0027\u0027"}],"source_content_type":"text/x-python","patch_set":9,"id":"5ac88eff_dee1f00c","line":72,"in_reply_to":"18318453_0e8dd4fc","updated":"2025-05-09 14:41:13.000000000","message":"Done","commit_id":"fbdf6b7fdb94d627cc5ac7670f02fe450d65eb10"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"43e4ef2563f37a7912adef63bcf554b1ef737eac","unresolved":true,"context_lines":[{"line_number":69,"context_line":"greenthread pool concurrently, defaults to 1000, min value is 100."},{"line_number":70,"context_line":"\u0027\u0027\u0027),"},{"line_number":71,"context_line":"    cfg.IntOpt("},{"line_number":72,"context_line":"        \u0027scatter_gather_thread_pool_size\u0027,"},{"line_number":73,"context_line":"        default\u003d5,"},{"line_number":74,"context_line":"        min\u003d1,"},{"line_number":75,"context_line":"        help\u003d\u0027\u0027\u0027"}],"source_content_type":"text/x-python","patch_set":9,"id":"18318453_0e8dd4fc","line":72,"in_reply_to":"a7a6b062_4bb49ccd","updated":"2025-05-09 07:20:15.000000000","message":"OK. I can change it. Originally tried to follow the pattern coming from oslo.messaging\u0027s executor_thread_pool_size and our own default_green_pool_size (deprecated in [1]) and default_thread_pool_size (added in [1] instead). What about cell_worker_thread_pool_size?","commit_id":"fbdf6b7fdb94d627cc5ac7670f02fe450d65eb10"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1f61fddb5018a9bffb93f67f36c033fb61a1f660","unresolved":false,"context_lines":[{"line_number":71,"context_line":"    cfg.IntOpt("},{"line_number":72,"context_line":"        \u0027cell_worker_thread_pool_size\u0027,"},{"line_number":73,"context_line":"        default\u003d5,"},{"line_number":74,"context_line":"        min\u003d1,"},{"line_number":75,"context_line":"        help\u003d\u0027\u0027\u0027"},{"line_number":76,"context_line":"The number of tasks that can run concurrently, one for each cell, for"},{"line_number":77,"context_line":"operations requires cross cell data gathering a.k.a scatter-gather, like"}],"source_content_type":"text/x-python","patch_set":12,"id":"acb29fa7_ada79749","line":74,"range":{"start_line":74,"start_character":7,"end_line":74,"end_character":14},"updated":"2025-05-13 11:54:50.000000000","message":"the min 1 is very important here i believe setting 0 will not make it unlimited and instead mean that we wont allow any threads to be spawned. \n\ni could be wrong but we have fixed bugs like that in the past.\n\nwe could special case 0 to be \"use the synchronous executor instead\" that would be useful for testing to avoid needing to have a fixture replace it but honestly i think that probably overly complex and beside if we want to allow that in the future we can.","commit_id":"1dddfc5799c3e99348fa80084c91dbdf501c3ebc"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"039865e19478c129046433bde30c68dff78c52cc","unresolved":false,"context_lines":[{"line_number":71,"context_line":"    cfg.IntOpt("},{"line_number":72,"context_line":"        \u0027cell_worker_thread_pool_size\u0027,"},{"line_number":73,"context_line":"        default\u003d5,"},{"line_number":74,"context_line":"        min\u003d1,"},{"line_number":75,"context_line":"        help\u003d\u0027\u0027\u0027"},{"line_number":76,"context_line":"The number of tasks that can run concurrently, one for each cell, for"},{"line_number":77,"context_line":"operations requires cross cell data gathering a.k.a scatter-gather, like"}],"source_content_type":"text/x-python","patch_set":12,"id":"5ab1efe0_41045aff","line":74,"range":{"start_line":74,"start_character":7,"end_line":74,"end_character":14},"in_reply_to":"acb29fa7_ada79749","updated":"2025-05-14 13:06:07.000000000","message":"you are right. Futurist actually rejects 0 out of the box https://github.com/openstack/futurist/blob/bcbb4f382a95dd002ed6d3d9a73a65b0dda9bf4f/futurist/_futures.py#L130-L131","commit_id":"1dddfc5799c3e99348fa80084c91dbdf501c3ebc"},{"author":{"_account_id":16207,"name":"ribaudr","display_name":"uggla","email":"rene.ribaud@gmail.com","username":"uggla","status":"Red Hat"},"change_message_id":"2304e23b537ea30754b1962ccc465ac51dd971f0","unresolved":true,"context_lines":[{"line_number":70,"context_line":"\u0027\u0027\u0027),"},{"line_number":71,"context_line":"    cfg.IntOpt("},{"line_number":72,"context_line":"        \u0027cell_worker_thread_pool_size\u0027,"},{"line_number":73,"context_line":"        default\u003d5,"},{"line_number":74,"context_line":"        min\u003d1,"},{"line_number":75,"context_line":"        help\u003d\u0027\u0027\u0027"},{"line_number":76,"context_line":"The number of tasks that can run concurrently, one for each cell, for"}],"source_content_type":"text/x-python","patch_set":16,"id":"1f4a6629_058fecf2","line":73,"range":{"start_line":73,"start_character":8,"end_line":73,"end_character":17},"updated":"2025-06-04 14:17:41.000000000","message":"Please let me know if I’ve understood this correctly:\n\ntotal_threads \u003d nb_workers × cell_worker_thread_pool_size\n\n- nb_workers defaults to the number of CPU cores.\n- So, on a system with 2 CPUs, I end up with:\n  - 1 master process\n  - 2 worker (child) processes\n  - Each worker has a thread pool with 5 threads dedicated to scatter-gather operations\n\nThis means I can query up to 5 cells concurrently per worker, for a total of 10 concurrent cell queries.\n\nWithin each cell, however, the computes are queried serially (we are not spawning a thread per compute).","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"f2a5c8bb96ff6aec2e830b9bd338477a0faf5f53","unresolved":true,"context_lines":[{"line_number":70,"context_line":"\u0027\u0027\u0027),"},{"line_number":71,"context_line":"    cfg.IntOpt("},{"line_number":72,"context_line":"        \u0027cell_worker_thread_pool_size\u0027,"},{"line_number":73,"context_line":"        default\u003d5,"},{"line_number":74,"context_line":"        min\u003d1,"},{"line_number":75,"context_line":"        help\u003d\u0027\u0027\u0027"},{"line_number":76,"context_line":"The number of tasks that can run concurrently, one for each cell, for"}],"source_content_type":"text/x-python","patch_set":16,"id":"7ced8655_56739544","line":73,"range":{"start_line":73,"start_character":8,"end_line":73,"end_character":17},"in_reply_to":"1f4a6629_058fecf2","updated":"2025-06-05 13:05:10.000000000","message":"\u003e Please let me know if I’ve understood this correctly\n\nMostly correct. \nThe number of worker processes is configurable via https://docs.openstack.org/nova/latest/configuration/config.html#scheduler.workers\n\n\u003e This means I can query up to 5 cells concurrently per worker, for a total of 10 concurrent cell queries.\n\nCorrect. This means that in your example if the deployment has 1 real cell (2 actual cells) then 4 `openstack server list` commands can run in fully parallel (needs 4 * 2 threads). Or in a deployment with 5 total cells, two of such commands can run full parallel.\nEven if we are running out of threads in the pool the request will not fail it will just wait for threads to be returned to the pool. Eventually in a busy cloud some of the parallel openstack server list commands will start to timeout while waiting for threads. So there this configuration option needs to be tuned. A later patch https://review.opendev.org/c/openstack/nova/+/948079/15/nova/utils.py in the series makes sure that nova will start logging warnings when requests are need to wait for threads, so the admin can detect it and tune the config. Also a later patch adds documentation about these tunables. https://review.opendev.org/c/openstack/nova/+/949364/9/doc/source/admin/concurrency.rst\n\n\n\u003e Within each cell, however, the computes are queried serially (we are not spawning a thread per compute).\n\nThe scatter gather code does not need to talk to the compute node itself it just need to query the cell database. So there is no need for a thread per compute.","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":16207,"name":"ribaudr","display_name":"uggla","email":"rene.ribaud@gmail.com","username":"uggla","status":"Red Hat"},"change_message_id":"53eb6a94b441a4a535fab99aeae95c8064163dbb","unresolved":false,"context_lines":[{"line_number":70,"context_line":"\u0027\u0027\u0027),"},{"line_number":71,"context_line":"    cfg.IntOpt("},{"line_number":72,"context_line":"        \u0027cell_worker_thread_pool_size\u0027,"},{"line_number":73,"context_line":"        default\u003d5,"},{"line_number":74,"context_line":"        min\u003d1,"},{"line_number":75,"context_line":"        help\u003d\u0027\u0027\u0027"},{"line_number":76,"context_line":"The number of tasks that can run concurrently, one for each cell, for"}],"source_content_type":"text/x-python","patch_set":16,"id":"e896df72_d71f5336","line":73,"range":{"start_line":73,"start_character":8,"end_line":73,"end_character":17},"in_reply_to":"7ced8655_56739544","updated":"2025-06-10 15:22:42.000000000","message":"```\nThe scatter gather code does not need to talk to the compute node itself it just need to query the cell database. So there is no need for a thread per compute.\n```\nIndeed, I was aware of that, but I got carried away while writing the message and it slipped my mind.","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"1bdd520d2ab15f92773b9e2f46cd81f48f106bbf","unresolved":true,"context_lines":[{"line_number":75,"context_line":"        help\u003d\u0027\u0027\u0027"},{"line_number":76,"context_line":"The number of tasks that can run concurrently, one for each cell, for"},{"line_number":77,"context_line":"operations requires cross cell data gathering a.k.a scatter-gather, like"},{"line_number":78,"context_line":"listing instances across multiple cells."},{"line_number":79,"context_line":"\u0027\u0027\u0027),"},{"line_number":80,"context_line":"]"},{"line_number":81,"context_line":""}],"source_content_type":"text/x-python","patch_set":17,"id":"d863ca6d_0d3aeef5","line":78,"updated":"2025-06-23 14:55:29.000000000","message":"only when threading is enabled","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"886c4d76f603dc32e67312eac9213557c8751c6d","unresolved":true,"context_lines":[{"line_number":75,"context_line":"        help\u003d\u0027\u0027\u0027"},{"line_number":76,"context_line":"The number of tasks that can run concurrently, one for each cell, for"},{"line_number":77,"context_line":"operations requires cross cell data gathering a.k.a scatter-gather, like"},{"line_number":78,"context_line":"listing instances across multiple cells."},{"line_number":79,"context_line":"\u0027\u0027\u0027),"},{"line_number":80,"context_line":"]"},{"line_number":81,"context_line":""}],"source_content_type":"text/x-python","patch_set":17,"id":"f9839319_d3216578","line":78,"in_reply_to":"d863ca6d_0d3aeef5","updated":"2025-06-25 17:09:29.000000000","message":"True. Clarified in a follow up","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"038c06f367ec1101d870a5cd00198d886d720fc3","unresolved":false,"context_lines":[{"line_number":75,"context_line":"        help\u003d\u0027\u0027\u0027"},{"line_number":76,"context_line":"The number of tasks that can run concurrently, one for each cell, for"},{"line_number":77,"context_line":"operations requires cross cell data gathering a.k.a scatter-gather, like"},{"line_number":78,"context_line":"listing instances across multiple cells."},{"line_number":79,"context_line":"\u0027\u0027\u0027),"},{"line_number":80,"context_line":"]"},{"line_number":81,"context_line":""}],"source_content_type":"text/x-python","patch_set":17,"id":"e75663e0_ef391da3","line":78,"in_reply_to":"f9839319_d3216578","updated":"2025-06-25 17:11:08.000000000","message":"https://review.opendev.org/c/openstack/nova/+/953338","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"}],"nova/context.py":[{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"0226fa0fa73e4a88726eff3b9cf760626362b440","unresolved":true,"context_lines":[{"line_number":441,"context_line":"        if future in not_done:"},{"line_number":442,"context_line":"            results[cell_uuid] \u003d did_not_respond_sentinel"},{"line_number":443,"context_line":"            LOG.warning(\u0027Timed out waiting for response from cell %s\u0027,"},{"line_number":444,"context_line":"                        cell_uuid)"},{"line_number":445,"context_line":"            cancelled \u003d future.cancel()"},{"line_number":446,"context_line":"            if not cancelled:"},{"line_number":447,"context_line":"                LOG.warning("}],"source_content_type":"text/x-python","patch_set":3,"id":"012f69e9_77de98e0","line":444,"updated":"2025-04-25 16:22:06.000000000","message":"So, here is where we leak a thread if the timeout passed and we haven\u0027t heard back from one or more right?","commit_id":"902fd8f769bf89856259d11927d046b021bfc3da"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"5d12425a023b6b18044bb4528e45a4f40adaacf1","unresolved":true,"context_lines":[{"line_number":441,"context_line":"        if future in not_done:"},{"line_number":442,"context_line":"            results[cell_uuid] \u003d did_not_respond_sentinel"},{"line_number":443,"context_line":"            LOG.warning(\u0027Timed out waiting for response from cell %s\u0027,"},{"line_number":444,"context_line":"                        cell_uuid)"},{"line_number":445,"context_line":"            cancelled \u003d future.cancel()"},{"line_number":446,"context_line":"            if not cancelled:"},{"line_number":447,"context_line":"                LOG.warning("}],"source_content_type":"text/x-python","patch_set":3,"id":"7908a7f5_d267456b","line":444,"in_reply_to":"012f69e9_77de98e0","updated":"2025-05-05 14:25:57.000000000","message":"Yes, this means that there is a thread that did not finish before the timeout happened. This thread will be out of the pool until the task in it finishes.","commit_id":"902fd8f769bf89856259d11927d046b021bfc3da"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"0adb295dbf271ba2c64b9310529de1824529b909","unresolved":false,"context_lines":[{"line_number":441,"context_line":"        if future in not_done:"},{"line_number":442,"context_line":"            results[cell_uuid] \u003d did_not_respond_sentinel"},{"line_number":443,"context_line":"            LOG.warning(\u0027Timed out waiting for response from cell %s\u0027,"},{"line_number":444,"context_line":"                        cell_uuid)"},{"line_number":445,"context_line":"            cancelled \u003d future.cancel()"},{"line_number":446,"context_line":"            if not cancelled:"},{"line_number":447,"context_line":"                LOG.warning("}],"source_content_type":"text/x-python","patch_set":3,"id":"c574260f_8869d3fa","line":444,"in_reply_to":"7908a7f5_d267456b","updated":"2025-05-08 15:03:34.000000000","message":"Acknowledged","commit_id":"902fd8f769bf89856259d11927d046b021bfc3da"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"0226fa0fa73e4a88726eff3b9cf760626362b440","unresolved":true,"context_lines":[{"line_number":443,"context_line":"            LOG.warning(\u0027Timed out waiting for response from cell %s\u0027,"},{"line_number":444,"context_line":"                        cell_uuid)"},{"line_number":445,"context_line":"            cancelled \u003d future.cancel()"},{"line_number":446,"context_line":"            if not cancelled:"},{"line_number":447,"context_line":"                LOG.warning("},{"line_number":448,"context_line":"                    \u0027Failed to cancel the gather task for cell %s\u0027, cell_uuid)"},{"line_number":449,"context_line":"        else:"}],"source_content_type":"text/x-python","patch_set":3,"id":"a9f8a150_e5cb7caf","line":446,"updated":"2025-04-25 16:22:06.000000000","message":"This is always going to fail unless the thread pool is busy and we never even started it right? I wonder if we should not log on L443 and below, given that they\u0027ll almost always come together. Maybe:\n```\nif future in not_done:\n   if not started (cancelable):\n       LOG(\u0027Unable to collect results from cellX because threads?\u0027)\n   else:\n       LOG(\u0027Timeout receiving results from cellX because it\u0027s down or stuck\u0027)\n```","commit_id":"902fd8f769bf89856259d11927d046b021bfc3da"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"5d12425a023b6b18044bb4528e45a4f40adaacf1","unresolved":true,"context_lines":[{"line_number":443,"context_line":"            LOG.warning(\u0027Timed out waiting for response from cell %s\u0027,"},{"line_number":444,"context_line":"                        cell_uuid)"},{"line_number":445,"context_line":"            cancelled \u003d future.cancel()"},{"line_number":446,"context_line":"            if not cancelled:"},{"line_number":447,"context_line":"                LOG.warning("},{"line_number":448,"context_line":"                    \u0027Failed to cancel the gather task for cell %s\u0027, cell_uuid)"},{"line_number":449,"context_line":"        else:"}],"source_content_type":"text/x-python","patch_set":3,"id":"ffb9d83b_8fe08a1c","line":446,"in_reply_to":"a9f8a150_e5cb7caf","updated":"2025-05-05 14:25:57.000000000","message":"You are right. Cancel only works if the task is not scheduled to a thread yet. \nNow I tried to be specific about the log message to log only one line per cell. How do you like it?","commit_id":"902fd8f769bf89856259d11927d046b021bfc3da"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"0adb295dbf271ba2c64b9310529de1824529b909","unresolved":false,"context_lines":[{"line_number":443,"context_line":"            LOG.warning(\u0027Timed out waiting for response from cell %s\u0027,"},{"line_number":444,"context_line":"                        cell_uuid)"},{"line_number":445,"context_line":"            cancelled \u003d future.cancel()"},{"line_number":446,"context_line":"            if not cancelled:"},{"line_number":447,"context_line":"                LOG.warning("},{"line_number":448,"context_line":"                    \u0027Failed to cancel the gather task for cell %s\u0027, cell_uuid)"},{"line_number":449,"context_line":"        else:"}],"source_content_type":"text/x-python","patch_set":3,"id":"6653d39f_bc21db63","line":446,"in_reply_to":"ffb9d83b_8fe08a1c","updated":"2025-05-08 15:03:34.000000000","message":"Done","commit_id":"902fd8f769bf89856259d11927d046b021bfc3da"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"0adb295dbf271ba2c64b9310529de1824529b909","unresolved":true,"context_lines":[{"line_number":445,"context_line":"                if utils.concurrency_mode_threading():"},{"line_number":446,"context_line":"                    LOG.warning("},{"line_number":447,"context_line":"                        \u0027Timed out waiting for response from cell %s. \u0027"},{"line_number":448,"context_line":"                        \u0027The gather task did not start and now cancelled. The \u0027"},{"line_number":449,"context_line":"                        \u0027scatter-gather pool size is too small for the load \u0027"},{"line_number":450,"context_line":"                        \u0027or there are stuck gather threads filling the pool.\u0027,"},{"line_number":451,"context_line":"                        cell_uuid)"}],"source_content_type":"text/x-python","patch_set":9,"id":"e63cb412_540bc799","line":448,"range":{"start_line":448,"start_character":59,"end_line":448,"end_character":62},"updated":"2025-05-08 15:03:34.000000000","message":"\"is now\"","commit_id":"fbdf6b7fdb94d627cc5ac7670f02fe450d65eb10"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"496e905ef797b4383f210eac9def49afec5d7224","unresolved":false,"context_lines":[{"line_number":445,"context_line":"                if utils.concurrency_mode_threading():"},{"line_number":446,"context_line":"                    LOG.warning("},{"line_number":447,"context_line":"                        \u0027Timed out waiting for response from cell %s. \u0027"},{"line_number":448,"context_line":"                        \u0027The gather task did not start and now cancelled. The \u0027"},{"line_number":449,"context_line":"                        \u0027scatter-gather pool size is too small for the load \u0027"},{"line_number":450,"context_line":"                        \u0027or there are stuck gather threads filling the pool.\u0027,"},{"line_number":451,"context_line":"                        cell_uuid)"}],"source_content_type":"text/x-python","patch_set":9,"id":"0607c3e6_c10ee7fc","line":448,"range":{"start_line":448,"start_character":59,"end_line":448,"end_character":62},"in_reply_to":"e63cb412_540bc799","updated":"2025-05-09 13:28:02.000000000","message":"Done","commit_id":"fbdf6b7fdb94d627cc5ac7670f02fe450d65eb10"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"0adb295dbf271ba2c64b9310529de1824529b909","unresolved":true,"context_lines":[{"line_number":447,"context_line":"                        \u0027Timed out waiting for response from cell %s. \u0027"},{"line_number":448,"context_line":"                        \u0027The gather task did not start and now cancelled. The \u0027"},{"line_number":449,"context_line":"                        \u0027scatter-gather pool size is too small for the load \u0027"},{"line_number":450,"context_line":"                        \u0027or there are stuck gather threads filling the pool.\u0027,"},{"line_number":451,"context_line":"                        cell_uuid)"},{"line_number":452,"context_line":"                else:"},{"line_number":453,"context_line":"                    LOG.warning("}],"source_content_type":"text/x-python","patch_set":9,"id":"c21aea07_f5e177fb","line":450,"updated":"2025-05-08 15:03:34.000000000","message":"I\u0027m not sure the \"gather task\" and \"gather threads\" terms make much sense to me here. I mean, I know what you\u0027re saying of course, but I find myself arguing internal about whether they\u0027re \"scatter\" or \"gather\". To me, the \"scattering\" is the splitting into threads, and the \"gathering\" is the main thread assembling the results (sequentially).\n\nEither way, if we go with a different name for the config option (per comments on the previous file) I wonder if we could change this to be \"cell worker threads\" or something to tie them together and eliminate the confusion above.","commit_id":"fbdf6b7fdb94d627cc5ac7670f02fe450d65eb10"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"496e905ef797b4383f210eac9def49afec5d7224","unresolved":false,"context_lines":[{"line_number":447,"context_line":"                        \u0027Timed out waiting for response from cell %s. \u0027"},{"line_number":448,"context_line":"                        \u0027The gather task did not start and now cancelled. The \u0027"},{"line_number":449,"context_line":"                        \u0027scatter-gather pool size is too small for the load \u0027"},{"line_number":450,"context_line":"                        \u0027or there are stuck gather threads filling the pool.\u0027,"},{"line_number":451,"context_line":"                        cell_uuid)"},{"line_number":452,"context_line":"                else:"},{"line_number":453,"context_line":"                    LOG.warning("}],"source_content_type":"text/x-python","patch_set":9,"id":"5955030c_3221c69d","line":450,"in_reply_to":"513f6d10_e647d1c9","updated":"2025-05-09 13:28:02.000000000","message":"Done","commit_id":"fbdf6b7fdb94d627cc5ac7670f02fe450d65eb10"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"43e4ef2563f37a7912adef63bcf554b1ef737eac","unresolved":true,"context_lines":[{"line_number":447,"context_line":"                        \u0027Timed out waiting for response from cell %s. \u0027"},{"line_number":448,"context_line":"                        \u0027The gather task did not start and now cancelled. The \u0027"},{"line_number":449,"context_line":"                        \u0027scatter-gather pool size is too small for the load \u0027"},{"line_number":450,"context_line":"                        \u0027or there are stuck gather threads filling the pool.\u0027,"},{"line_number":451,"context_line":"                        cell_uuid)"},{"line_number":452,"context_line":"                else:"},{"line_number":453,"context_line":"                    LOG.warning("}],"source_content_type":"text/x-python","patch_set":9,"id":"513f6d10_e647d1c9","line":450,"in_reply_to":"c21aea07_f5e177fb","updated":"2025-05-09 07:20:15.000000000","message":"yeah I will spread the new naming here.","commit_id":"fbdf6b7fdb94d627cc5ac7670f02fe450d65eb10"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1f61fddb5018a9bffb93f67f36c033fb61a1f660","unresolved":true,"context_lines":[{"line_number":435,"context_line":"                cell_mapping.uuid, fn, cctxt, *args, **kwargs)"},{"line_number":436,"context_line":"            tasks[cell_mapping.uuid] \u003d future"},{"line_number":437,"context_line":""},{"line_number":438,"context_line":"    _, not_done \u003d futurist.waiters.wait_for_all(tasks.values(), timeout)"},{"line_number":439,"context_line":""},{"line_number":440,"context_line":"    for cell_uuid, future in tasks.items():"},{"line_number":441,"context_line":"        if future in not_done:"}],"source_content_type":"text/x-python","patch_set":12,"id":"8b4f3afb_c050d7ca","line":438,"updated":"2025-05-13 11:54:50.000000000","message":"ack so we submit all the work and then wait once for the timeout.\n\ntechnially its possibel that between this point and the loop over the not_done","commit_id":"1dddfc5799c3e99348fa80084c91dbdf501c3ebc"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"039865e19478c129046433bde30c68dff78c52cc","unresolved":false,"context_lines":[{"line_number":435,"context_line":"                cell_mapping.uuid, fn, cctxt, *args, **kwargs)"},{"line_number":436,"context_line":"            tasks[cell_mapping.uuid] \u003d future"},{"line_number":437,"context_line":""},{"line_number":438,"context_line":"    _, not_done \u003d futurist.waiters.wait_for_all(tasks.values(), timeout)"},{"line_number":439,"context_line":""},{"line_number":440,"context_line":"    for cell_uuid, future in tasks.items():"},{"line_number":441,"context_line":"        if future in not_done:"}],"source_content_type":"text/x-python","patch_set":12,"id":"db37cb36_cc43d7af","line":438,"in_reply_to":"8b4f3afb_c050d7ca","updated":"2025-05-14 13:06:07.000000000","message":"yepp if the tasks finished after this point then we will consider that as too late. Cancel will not be effective (obviously) but we are not leaking threads as the thread is already back in the pool.","commit_id":"1dddfc5799c3e99348fa80084c91dbdf501c3ebc"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1f61fddb5018a9bffb93f67f36c033fb61a1f660","unresolved":false,"context_lines":[{"line_number":440,"context_line":"    for cell_uuid, future in tasks.items():"},{"line_number":441,"context_line":"        if future in not_done:"},{"line_number":442,"context_line":"            results[cell_uuid] \u003d did_not_respond_sentinel"},{"line_number":443,"context_line":"            cancelled \u003d future.cancel()"},{"line_number":444,"context_line":"            if cancelled:"},{"line_number":445,"context_line":"                if utils.concurrency_mode_threading():"},{"line_number":446,"context_line":"                    LOG.warning("}],"source_content_type":"text/x-python","patch_set":12,"id":"58f2ea9b_c97c7395","line":443,"range":{"start_line":443,"start_character":11,"end_line":443,"end_character":39},"updated":"2025-05-13 11:54:50.000000000","message":"so your using the fact that if a future is already complete calling canle returns false\n\nhttps://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Future.cancel\n\n\nthis has the benifit that if the timeout expired before the future was actully executed it will not run in the future.\n\nthats a nice way to handel this.\n\nthe alternitive would have been to just call result with a timeout\n\nhttps://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Future.result\n\nthat would have been what i gravitated too as its simpler but the downside of that \nis 1 we would have to hanel the timeout exception which is slow then this\n\nand second we would not benifit form cancelign the peding futures that have not been executed yet.\n\n+1","commit_id":"1dddfc5799c3e99348fa80084c91dbdf501c3ebc"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"039865e19478c129046433bde30c68dff78c52cc","unresolved":false,"context_lines":[{"line_number":440,"context_line":"    for cell_uuid, future in tasks.items():"},{"line_number":441,"context_line":"        if future in not_done:"},{"line_number":442,"context_line":"            results[cell_uuid] \u003d did_not_respond_sentinel"},{"line_number":443,"context_line":"            cancelled \u003d future.cancel()"},{"line_number":444,"context_line":"            if cancelled:"},{"line_number":445,"context_line":"                if utils.concurrency_mode_threading():"},{"line_number":446,"context_line":"                    LOG.warning("}],"source_content_type":"text/x-python","patch_set":12,"id":"5ad8044c_7ab44541","line":443,"range":{"start_line":443,"start_character":11,"end_line":443,"end_character":39},"in_reply_to":"58f2ea9b_c97c7395","updated":"2025-05-14 13:06:07.000000000","message":"yepp.","commit_id":"1dddfc5799c3e99348fa80084c91dbdf501c3ebc"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1f61fddb5018a9bffb93f67f36c033fb61a1f660","unresolved":true,"context_lines":[{"line_number":454,"context_line":"                    LOG.warning("},{"line_number":455,"context_line":"                        \u0027Timed out waiting for response from cell %s.\u0027,"},{"line_number":456,"context_line":"                        cell_uuid)"},{"line_number":457,"context_line":"            else:"},{"line_number":458,"context_line":"                LOG.warning("},{"line_number":459,"context_line":"                    \u0027Timed out waiting for response from cell %s. Left the \u0027"},{"line_number":460,"context_line":"                    \u0027cell worker thread to finish in the background.\u0027,"}],"source_content_type":"text/x-python","patch_set":12,"id":"76cb019d_f3905021","line":457,"updated":"2025-05-13 11:54:50.000000000","message":"there are actuly two possiblities here.\n\neither we go to this brnach because the future is still running or it completed after the wait_for_all returned but before we called cancel.\n\n\na minor enhancement would be to check if future.done() returns ture or falseo here and if it returns true then  do  results[cell_uuid] \u003d future.result()\n\n\nthere  is a very small window of time where that can happen but it exist so i think if we happen to get the result back just after the timeout while we are loopign here we might as well save the result.","commit_id":"1dddfc5799c3e99348fa80084c91dbdf501c3ebc"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"039865e19478c129046433bde30c68dff78c52cc","unresolved":false,"context_lines":[{"line_number":454,"context_line":"                    LOG.warning("},{"line_number":455,"context_line":"                        \u0027Timed out waiting for response from cell %s.\u0027,"},{"line_number":456,"context_line":"                        cell_uuid)"},{"line_number":457,"context_line":"            else:"},{"line_number":458,"context_line":"                LOG.warning("},{"line_number":459,"context_line":"                    \u0027Timed out waiting for response from cell %s. Left the \u0027"},{"line_number":460,"context_line":"                    \u0027cell worker thread to finish in the background.\u0027,"}],"source_content_type":"text/x-python","patch_set":12,"id":"8341ad1b_8cfb226d","line":457,"in_reply_to":"76cb019d_f3905021","updated":"2025-05-14 13:06:07.000000000","message":"Good point. We can adjust the loop to check for `not future.done()` instead of `future in not_done` that way we can use a bit late task results.\n\nDone.","commit_id":"1dddfc5799c3e99348fa80084c91dbdf501c3ebc"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"eb5d0b0dc3bf669fce54bc43bc0ea53abe2ca25b","unresolved":false,"context_lines":[{"line_number":438,"context_line":"    futurist.waiters.wait_for_all(tasks.values(), timeout)"},{"line_number":439,"context_line":""},{"line_number":440,"context_line":"    for cell_uuid, future in tasks.items():"},{"line_number":441,"context_line":"        if not future.done():"},{"line_number":442,"context_line":"            results[cell_uuid] \u003d did_not_respond_sentinel"},{"line_number":443,"context_line":"            cancelled \u003d future.cancel()"},{"line_number":444,"context_line":"            if cancelled:"}],"source_content_type":"text/x-python","patch_set":14,"id":"136cee0f_7386d9d4","line":441,"updated":"2025-05-26 13:01:48.000000000","message":"ack so this compined with not savign the result of  futurist.waiters.wait_for_all(tasks.values(), timeout)\nadresses the comments i left on v12\n\nhttps://review.opendev.org/c/openstack/nova/+/947966/comment/8b4f3afb_c050d7ca/\nhttps://review.opendev.org/c/openstack/nova/+/947966/comment/76cb019d_f3905021/\n\n+1","commit_id":"c9bd933144f2920e66d33fa2a0b243e9c32c1f8a"},{"author":{"_account_id":16207,"name":"ribaudr","display_name":"uggla","email":"rene.ribaud@gmail.com","username":"uggla","status":"Red Hat"},"change_message_id":"2304e23b537ea30754b1962ccc465ac51dd971f0","unresolved":true,"context_lines":[{"line_number":435,"context_line":"                cell_mapping.uuid, fn, cctxt, *args, **kwargs)"},{"line_number":436,"context_line":"            tasks[cell_mapping.uuid] \u003d future"},{"line_number":437,"context_line":""},{"line_number":438,"context_line":"    futurist.waiters.wait_for_all(tasks.values(), timeout)"},{"line_number":439,"context_line":""},{"line_number":440,"context_line":"    for cell_uuid, future in tasks.items():"},{"line_number":441,"context_line":"        if not future.done():"}],"source_content_type":"text/x-python","patch_set":16,"id":"f477bb11_6de34e5c","line":438,"range":{"start_line":438,"start_character":4,"end_line":438,"end_character":58},"updated":"2025-06-04 14:17:41.000000000","message":"Note for myself:\nOk, this is replacing all the queue mechanisms to wait for completion.\ntasks[cell_mapping.uuid] is a dict of future.\n\nThe future can be \u0027backed\u0027 by a greenthread (old behavior) or a real thread.","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":16207,"name":"ribaudr","display_name":"uggla","email":"rene.ribaud@gmail.com","username":"uggla","status":"Red Hat"},"change_message_id":"53eb6a94b441a4a535fab99aeae95c8064163dbb","unresolved":false,"context_lines":[{"line_number":435,"context_line":"                cell_mapping.uuid, fn, cctxt, *args, **kwargs)"},{"line_number":436,"context_line":"            tasks[cell_mapping.uuid] \u003d future"},{"line_number":437,"context_line":""},{"line_number":438,"context_line":"    futurist.waiters.wait_for_all(tasks.values(), timeout)"},{"line_number":439,"context_line":""},{"line_number":440,"context_line":"    for cell_uuid, future in tasks.items():"},{"line_number":441,"context_line":"        if not future.done():"}],"source_content_type":"text/x-python","patch_set":16,"id":"1b3163b5_4d14ec96","line":438,"range":{"start_line":438,"start_character":4,"end_line":438,"end_character":58},"in_reply_to":"23e11684_813718a3","updated":"2025-06-10 15:22:42.000000000","message":"Acknowledged","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"f2a5c8bb96ff6aec2e830b9bd338477a0faf5f53","unresolved":true,"context_lines":[{"line_number":435,"context_line":"                cell_mapping.uuid, fn, cctxt, *args, **kwargs)"},{"line_number":436,"context_line":"            tasks[cell_mapping.uuid] \u003d future"},{"line_number":437,"context_line":""},{"line_number":438,"context_line":"    futurist.waiters.wait_for_all(tasks.values(), timeout)"},{"line_number":439,"context_line":""},{"line_number":440,"context_line":"    for cell_uuid, future in tasks.items():"},{"line_number":441,"context_line":"        if not future.done():"}],"source_content_type":"text/x-python","patch_set":16,"id":"23e11684_813718a3","line":438,"range":{"start_line":438,"start_character":4,"end_line":438,"end_character":58},"in_reply_to":"f477bb11_6de34e5c","updated":"2025-06-05 13:05:10.000000000","message":"Correct. With the futurist library we abstracted away what is behind a Future. And we have a call to wait for a list of futures as a single wait statement with a timeout","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":16207,"name":"ribaudr","display_name":"uggla","email":"rene.ribaud@gmail.com","username":"uggla","status":"Red Hat"},"change_message_id":"2304e23b537ea30754b1962ccc465ac51dd971f0","unresolved":true,"context_lines":[{"line_number":454,"context_line":"                    LOG.warning("},{"line_number":455,"context_line":"                        \u0027Timed out waiting for response from cell %s.\u0027,"},{"line_number":456,"context_line":"                        cell_uuid)"},{"line_number":457,"context_line":"            else:"},{"line_number":458,"context_line":"                LOG.warning("},{"line_number":459,"context_line":"                    \u0027Timed out waiting for response from cell %s. Left the \u0027"},{"line_number":460,"context_line":"                    \u0027cell worker thread to finish in the background.\u0027,"}],"source_content_type":"text/x-python","patch_set":16,"id":"9c51bb4d_820ba8ff","line":457,"range":{"start_line":457,"start_character":12,"end_line":457,"end_character":16},"updated":"2025-06-04 14:17:41.000000000","message":"Can we have an issue with blocked threads that would never terminate ? And at the end we will saturate the pool ?\nShould we not try to kill this greenthread/thread ? Although I\u0027m not sure it is possible.","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"d68f0d37aba6d329d618c22ff7df3ee3005eb020","unresolved":true,"context_lines":[{"line_number":454,"context_line":"                    LOG.warning("},{"line_number":455,"context_line":"                        \u0027Timed out waiting for response from cell %s.\u0027,"},{"line_number":456,"context_line":"                        cell_uuid)"},{"line_number":457,"context_line":"            else:"},{"line_number":458,"context_line":"                LOG.warning("},{"line_number":459,"context_line":"                    \u0027Timed out waiting for response from cell %s. Left the \u0027"},{"line_number":460,"context_line":"                    \u0027cell worker thread to finish in the background.\u0027,"}],"source_content_type":"text/x-python","patch_set":16,"id":"770949f9_584a072a","line":457,"range":{"start_line":457,"start_character":12,"end_line":457,"end_character":16},"in_reply_to":"3355f0a5_220b39b8","updated":"2025-06-09 14:08:09.000000000","message":"\u003e A generic workaround would be to resize the executor when we detect that one of the thread is not returned to the pool.\n\nWe also don\u0027t know that the function will never terminate, so a very laggy implementation of something could end up moving our pool size over time, unless we also have some way to reduce it if the worker ever comes back.\n\nIMHO, the better thing to do is logging that we\u0027re leaking (below) and log when we start running out of workers (above) for the initial attempt. If this becomes problematic for lots of people then we can engineer something more complicated (read: probably buggy) to try to mitigate it at that point.","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"40a1077c1f9b9c117c1206944be2e5d533538c67","unresolved":false,"context_lines":[{"line_number":454,"context_line":"                    LOG.warning("},{"line_number":455,"context_line":"                        \u0027Timed out waiting for response from cell %s.\u0027,"},{"line_number":456,"context_line":"                        cell_uuid)"},{"line_number":457,"context_line":"            else:"},{"line_number":458,"context_line":"                LOG.warning("},{"line_number":459,"context_line":"                    \u0027Timed out waiting for response from cell %s. Left the \u0027"},{"line_number":460,"context_line":"                    \u0027cell worker thread to finish in the background.\u0027,"}],"source_content_type":"text/x-python","patch_set":16,"id":"c67914fa_24ea5b77","line":457,"range":{"start_line":457,"start_character":12,"end_line":457,"end_character":16},"in_reply_to":"4eda14a4_a702a76e","updated":"2025-06-13 10:54:11.000000000","message":"yeah, lets focus on having enough logging to detect such leaks. I agree that having a pool that can be dynamically sized up and down is a complex business.","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":16207,"name":"ribaudr","display_name":"uggla","email":"rene.ribaud@gmail.com","username":"uggla","status":"Red Hat"},"change_message_id":"53eb6a94b441a4a535fab99aeae95c8064163dbb","unresolved":true,"context_lines":[{"line_number":454,"context_line":"                    LOG.warning("},{"line_number":455,"context_line":"                        \u0027Timed out waiting for response from cell %s.\u0027,"},{"line_number":456,"context_line":"                        cell_uuid)"},{"line_number":457,"context_line":"            else:"},{"line_number":458,"context_line":"                LOG.warning("},{"line_number":459,"context_line":"                    \u0027Timed out waiting for response from cell %s. Left the \u0027"},{"line_number":460,"context_line":"                    \u0027cell worker thread to finish in the background.\u0027,"}],"source_content_type":"text/x-python","patch_set":16,"id":"4eda14a4_a702a76e","line":457,"range":{"start_line":457,"start_character":12,"end_line":457,"end_character":16},"in_reply_to":"770949f9_584a072a","updated":"2025-06-10 15:22:42.000000000","message":"more complicated (read: probably buggy) \u003c-- This comes from wisdom. 👍","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"f2a5c8bb96ff6aec2e830b9bd338477a0faf5f53","unresolved":true,"context_lines":[{"line_number":454,"context_line":"                    LOG.warning("},{"line_number":455,"context_line":"                        \u0027Timed out waiting for response from cell %s.\u0027,"},{"line_number":456,"context_line":"                        cell_uuid)"},{"line_number":457,"context_line":"            else:"},{"line_number":458,"context_line":"                LOG.warning("},{"line_number":459,"context_line":"                    \u0027Timed out waiting for response from cell %s. Left the \u0027"},{"line_number":460,"context_line":"                    \u0027cell worker thread to finish in the background.\u0027,"}],"source_content_type":"text/x-python","patch_set":16,"id":"3355f0a5_220b39b8","line":457,"range":{"start_line":457,"start_character":12,"end_line":457,"end_character":16},"in_reply_to":"9c51bb4d_820ba8ff","updated":"2025-06-05 13:05:10.000000000","message":"\u003e Can we have an issue with blocked threads that would never terminate ? And at the end we will saturate the pool ?\n\u003e Should we not try to kill this greenthread/thread ? Although I\u0027m not sure it is possible.\n\nYou are correct. If the function `fn` never terminates for a given cell then the thread executing that function will never be returned to the pool.\n\nFor greenthreads future.cancel() will be effective in this case. \n\nFor native threads future.cancel() is only effective if the task was waiting for a thread to be available. If the task already executing in that thread the it cannot be cancelled.\n\nIn general a native thread cannot be terminated / killed from outside. (It is probably a POSIX limitation as pthread_kill also does not kill a thread just signal it.) There is no generic way to stop a running python function `fn` either. So the only way to circumvent this in the generic case is to require the caller to provide an `fn` that guaranteed to terminate.\n\nA generic workaround would be to resize the executor when we detect that one of the thread is not returned to the pool. That would be a pretty complicated process as the futurist executors are not resizable. (There are other executor implementations out there that can be resized)\n\nInstead of all these we make a different assumption. Scatter gather is created to execute the same DB query across multiple cell databases. If scatter gather is used correctly only for this then we can advise the deployer to set up some timeouts on the DB level to ensure `fn` always terminates in a reasonable time. We can have two level of protection. DB server side and DB client side. The doc patch contains advice how to configure these \nhttps://review.opendev.org/c/openstack/nova/+/949364/9/doc/source/admin/concurrency.rst","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"1bdd520d2ab15f92773b9e2f46cd81f48f106bbf","unresolved":false,"context_lines":[{"line_number":430,"context_line":""},{"line_number":431,"context_line":"    for cell_mapping in cell_mappings:"},{"line_number":432,"context_line":"        with target_cell(context, cell_mapping) as cctxt:"},{"line_number":433,"context_line":"            future \u003d executor.submit("},{"line_number":434,"context_line":"                utils.pass_context_wrapper(gather_result),"},{"line_number":435,"context_line":"                cell_mapping.uuid, fn, cctxt, *args, **kwargs)"},{"line_number":436,"context_line":"            tasks[cell_mapping.uuid] \u003d future"}],"source_content_type":"text/x-python","patch_set":17,"id":"2a0429b8_259978d2","line":433,"updated":"2025-06-23 14:55:29.000000000","message":"as said earlier, shall we destroy an executor, then the global var would be None and then a new threadpool would be created.","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"886c4d76f603dc32e67312eac9213557c8751c6d","unresolved":false,"context_lines":[{"line_number":430,"context_line":""},{"line_number":431,"context_line":"    for cell_mapping in cell_mappings:"},{"line_number":432,"context_line":"        with target_cell(context, cell_mapping) as cctxt:"},{"line_number":433,"context_line":"            future \u003d executor.submit("},{"line_number":434,"context_line":"                utils.pass_context_wrapper(gather_result),"},{"line_number":435,"context_line":"                cell_mapping.uuid, fn, cctxt, *args, **kwargs)"},{"line_number":436,"context_line":"            tasks[cell_mapping.uuid] \u003d future"}],"source_content_type":"text/x-python","patch_set":17,"id":"0b08cf0a_0ab2e8cb","line":433,"in_reply_to":"2a0429b8_259978d2","updated":"2025-06-25 17:09:29.000000000","message":"We shall not. That would create a nasty race condition when two parallel RCP / API requests are handled and each requires a scatter gather run using the same process level global executor.\n\nThe executor is initialized lazily but one it is initialized it runs forever. Except for the worker thread forking case as we cannot work with running threads. See the relevant blog post :)","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"}],"nova/tests/fixtures/nova.py":[{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1f61fddb5018a9bffb93f67f36c033fb61a1f660","unresolved":true,"context_lines":[{"line_number":1290,"context_line":"        super(_FakeExecutor, self).__init__()"},{"line_number":1291,"context_line":""},{"line_number":1292,"context_line":""},{"line_number":1293,"context_line":"class SynchronousThreadPoolExecutorFixture(fixtures.Fixture):"},{"line_number":1294,"context_line":"    \"\"\"Make GreenThreadPoolExecutor synchronous."},{"line_number":1295,"context_line":""},{"line_number":1296,"context_line":"    Replace the GreenThreadPoolExecutor with the SynchronousExecutor."}],"source_content_type":"text/x-python","patch_set":12,"id":"f98caa47_1d7c779b","side":"PARENT","line":1293,"updated":"2025-05-13 11:54:50.000000000","message":"im not sure that removing this really is a good idea but i guess we could.","commit_id":"7f4c47c6423b121a5d8d79c192a4922500a749a1"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"a80dc3f827c4eaa6f455f8e5044d8f29b774fbfa","unresolved":false,"context_lines":[{"line_number":1290,"context_line":"        super(_FakeExecutor, self).__init__()"},{"line_number":1291,"context_line":""},{"line_number":1292,"context_line":""},{"line_number":1293,"context_line":"class SynchronousThreadPoolExecutorFixture(fixtures.Fixture):"},{"line_number":1294,"context_line":"    \"\"\"Make GreenThreadPoolExecutor synchronous."},{"line_number":1295,"context_line":""},{"line_number":1296,"context_line":"    Replace the GreenThreadPoolExecutor with the SynchronousExecutor."}],"source_content_type":"text/x-python","patch_set":12,"id":"bd371c3f_fdff816a","side":"PARENT","line":1293,"in_reply_to":"f98caa47_1d7c779b","updated":"2025-05-19 12:08:11.000000000","message":"The problem with it is that is replaces all the GreenThreadExecutors but some tests relies on concurrent behavior. I\u0027m happy to add it back if needed somewhere explicitly and more selectively.","commit_id":"7f4c47c6423b121a5d8d79c192a4922500a749a1"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"eb5d0b0dc3bf669fce54bc43bc0ea53abe2ca25b","unresolved":false,"context_lines":[{"line_number":1201,"context_line":"    def _setUp(self):"},{"line_number":1202,"context_line":"        # Just safety that the previous testcase cleaned up after itself"},{"line_number":1203,"context_line":"        assert utils.SCATTER_GATHER_EXECUTOR is None"},{"line_number":1204,"context_line":"        assert utils.DEFAULT_GREEN_POOL is None"},{"line_number":1205,"context_line":""},{"line_number":1206,"context_line":"        origi_get_scatter_gather \u003d utils.get_scatter_gather_executor"},{"line_number":1207,"context_line":"        origi_default_green_pool \u003d utils._get_default_green_pool"}],"source_content_type":"text/x-python","patch_set":14,"id":"0923ed1b_3e03f7f1","line":1204,"updated":"2025-05-26 13:01:48.000000000","message":"ack, i like useing asserts in test code for things like this.\n\nwhile it should never fire if it does it means we messed up the test setup and we cant trust the results even if they passsed so this is nice to have.","commit_id":"c9bd933144f2920e66d33fa2a0b243e9c32c1f8a"},{"author":{"_account_id":16207,"name":"ribaudr","display_name":"uggla","email":"rene.ribaud@gmail.com","username":"uggla","status":"Red Hat"},"change_message_id":"2304e23b537ea30754b1962ccc465ac51dd971f0","unresolved":true,"context_lines":[{"line_number":1200,"context_line":""},{"line_number":1201,"context_line":"    def _setUp(self):"},{"line_number":1202,"context_line":"        # Just safety that the previous testcase cleaned up after itself"},{"line_number":1203,"context_line":"        assert utils.SCATTER_GATHER_EXECUTOR is None"},{"line_number":1204,"context_line":"        assert utils.DEFAULT_GREEN_POOL is None"},{"line_number":1205,"context_line":""},{"line_number":1206,"context_line":"        origi_get_scatter_gather \u003d utils.get_scatter_gather_executor"},{"line_number":1207,"context_line":"        origi_default_green_pool \u003d utils._get_default_green_pool"}],"source_content_type":"text/x-python","patch_set":16,"id":"6cabd74e_15026b33","line":1204,"range":{"start_line":1203,"start_character":0,"end_line":1204,"end_character":47},"updated":"2025-06-04 14:17:41.000000000","message":"+1 I think this is a good safety rule.","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"f2a5c8bb96ff6aec2e830b9bd338477a0faf5f53","unresolved":false,"context_lines":[{"line_number":1200,"context_line":""},{"line_number":1201,"context_line":"    def _setUp(self):"},{"line_number":1202,"context_line":"        # Just safety that the previous testcase cleaned up after itself"},{"line_number":1203,"context_line":"        assert utils.SCATTER_GATHER_EXECUTOR is None"},{"line_number":1204,"context_line":"        assert utils.DEFAULT_GREEN_POOL is None"},{"line_number":1205,"context_line":""},{"line_number":1206,"context_line":"        origi_get_scatter_gather \u003d utils.get_scatter_gather_executor"},{"line_number":1207,"context_line":"        origi_default_green_pool \u003d utils._get_default_green_pool"}],"source_content_type":"text/x-python","patch_set":16,"id":"84fdcf87_392c04bc","line":1204,"range":{"start_line":1203,"start_character":0,"end_line":1204,"end_character":47},"in_reply_to":"6cabd74e_15026b33","updated":"2025-06-05 13:05:10.000000000","message":"Acknowledged","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"1bdd520d2ab15f92773b9e2f46cd81f48f106bbf","unresolved":false,"context_lines":[{"line_number":1299,"context_line":"    def setUp(self):"},{"line_number":1300,"context_line":"        super(SynchronousThreadPoolExecutorFixture, self).setUp()"},{"line_number":1301,"context_line":"        self.useFixture(fixtures.MonkeyPatch("},{"line_number":1302,"context_line":"            \u0027futurist.GreenThreadPoolExecutor\u0027, _FakeExecutor))"},{"line_number":1303,"context_line":""},{"line_number":1304,"context_line":""},{"line_number":1305,"context_line":"class BannedDBSchemaOperations(fixtures.Fixture):"}],"source_content_type":"text/x-python","patch_set":17,"id":"f320fdc1_364784ea","side":"PARENT","line":1302,"updated":"2025-06-23 14:55:29.000000000","message":"I see, you need to delete the Fixture too, of course.","commit_id":"221a3e89e8988bc664298106ee691a4e41ca71f9"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"1bdd520d2ab15f92773b9e2f46cd81f48f106bbf","unresolved":false,"context_lines":[{"line_number":1201,"context_line":"    def _setUp(self):"},{"line_number":1202,"context_line":"        # Just safety that the previous testcase cleaned up after itself"},{"line_number":1203,"context_line":"        assert utils.SCATTER_GATHER_EXECUTOR is None"},{"line_number":1204,"context_line":"        assert utils.DEFAULT_GREEN_POOL is None"},{"line_number":1205,"context_line":""},{"line_number":1206,"context_line":"        origi_get_scatter_gather \u003d utils.get_scatter_gather_executor"},{"line_number":1207,"context_line":"        origi_default_green_pool \u003d utils._get_default_green_pool"}],"source_content_type":"text/x-python","patch_set":17,"id":"7074c407_bd110b78","line":1204,"updated":"2025-06-23 14:55:29.000000000","message":"++","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"1bdd520d2ab15f92773b9e2f46cd81f48f106bbf","unresolved":false,"context_lines":[{"line_number":1225,"context_line":"            self.scatter_gather_executor \u003d origi_get_scatter_gather()"},{"line_number":1226,"context_line":"            self.scatter_gather_executor.name \u003d ("},{"line_number":1227,"context_line":"                f\"{self.test_case_id}.cell_worker\")"},{"line_number":1228,"context_line":"            return self.scatter_gather_executor"},{"line_number":1229,"context_line":""},{"line_number":1230,"context_line":"        self.useFixture(fixtures.MonkeyPatch("},{"line_number":1231,"context_line":"            \u0027nova.utils.get_scatter_gather_executor\u0027,"}],"source_content_type":"text/x-python","patch_set":17,"id":"7255c73b_1eb954d4","line":1228,"updated":"2025-06-23 14:55:29.000000000","message":"++","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"1bdd520d2ab15f92773b9e2f46cd81f48f106bbf","unresolved":false,"context_lines":[{"line_number":1247,"context_line":"                # NOTE(gibi): This is optimistic, but we need specific examples"},{"line_number":1248,"context_line":"                # where self.executor.shutdown(wait\u003dTrue) hangs to figure out"},{"line_number":1249,"context_line":"                # what we can do here."},{"line_number":1250,"context_line":"                pass"},{"line_number":1251,"context_line":"            else:"},{"line_number":1252,"context_line":"                # kill all greenthreads in the pool before raising to prevent"},{"line_number":1253,"context_line":"                # them from interfering with other tests."}],"source_content_type":"text/x-python","patch_set":17,"id":"429d0c17_5dc40b19","line":1250,"updated":"2025-06-23 14:55:29.000000000","message":"fair assumption.","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"1bdd520d2ab15f92773b9e2f46cd81f48f106bbf","unresolved":false,"context_lines":[{"line_number":1260,"context_line":"            if threading:"},{"line_number":1261,"context_line":"                # NOTE(gibi):If shutdown(wait\u003dTrue) returns then nothing is"},{"line_number":1262,"context_line":"                # leaked, so nothing to do here."},{"line_number":1263,"context_line":"                pass"},{"line_number":1264,"context_line":"            else:"},{"line_number":1265,"context_line":"                self._raise_on_green_pool(executor._pool)"},{"line_number":1266,"context_line":""}],"source_content_type":"text/x-python","patch_set":17,"id":"74c4836e_32920cf1","line":1263,"updated":"2025-06-23 14:55:29.000000000","message":"fair assumption too","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"}],"nova/tests/unit/cmd/test_scheduler.py":[{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"1bdd520d2ab15f92773b9e2f46cd81f48f106bbf","unresolved":false,"context_lines":[{"line_number":63,"context_line":"        mock_wait.assert_called_once_with()"},{"line_number":64,"context_line":"        # check that the executor was properly destroyed"},{"line_number":65,"context_line":"        self.assertFalse(executor.alive)"},{"line_number":66,"context_line":"        self.assertIsNone(utils.SCATTER_GATHER_EXECUTOR)"}],"source_content_type":"text/x-python","patch_set":17,"id":"dab5e727_8946cf1c","line":66,"updated":"2025-06-23 14:55:29.000000000","message":"++","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"}],"nova/tests/unit/compute/test_compute.py":[{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1f61fddb5018a9bffb93f67f36c033fb61a1f660","unresolved":true,"context_lines":[{"line_number":1661,"context_line":"                      fake_resource_tracker.RTMockMixin):"},{"line_number":1662,"context_line":"    def setUp(self):"},{"line_number":1663,"context_line":"        super(ComputeTestCase, self).setUp()"},{"line_number":1664,"context_line":"        self.compute._live_migration_executor \u003d futurist.SynchronousExecutor()"},{"line_number":1665,"context_line":"        self.useFixture(fixtures.SpawnIsSynchronousFixture())"},{"line_number":1666,"context_line":""},{"line_number":1667,"context_line":"        self.image_api \u003d image_api.API()"}],"source_content_type":"text/x-python","patch_set":12,"id":"97b7fc1e_95f1093b","line":1664,"updated":"2025-05-13 11:54:50.000000000","message":"i guess this replaces the SynchronousThreadPoolExecutorFixture()","commit_id":"1dddfc5799c3e99348fa80084c91dbdf501c3ebc"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"a80dc3f827c4eaa6f455f8e5044d8f29b774fbfa","unresolved":false,"context_lines":[{"line_number":1661,"context_line":"                      fake_resource_tracker.RTMockMixin):"},{"line_number":1662,"context_line":"    def setUp(self):"},{"line_number":1663,"context_line":"        super(ComputeTestCase, self).setUp()"},{"line_number":1664,"context_line":"        self.compute._live_migration_executor \u003d futurist.SynchronousExecutor()"},{"line_number":1665,"context_line":"        self.useFixture(fixtures.SpawnIsSynchronousFixture())"},{"line_number":1666,"context_line":""},{"line_number":1667,"context_line":"        self.image_api \u003d image_api.API()"}],"source_content_type":"text/x-python","patch_set":12,"id":"e2f0a458_a17d9ee6","line":1664,"in_reply_to":"97b7fc1e_95f1093b","updated":"2025-05-19 12:08:11.000000000","message":"This was the only place that depended on the sync behavior. As I was able to explicitly use a pool during the setup I opted to it as the fixture was replacing all the pools.","commit_id":"1dddfc5799c3e99348fa80084c91dbdf501c3ebc"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"fe4454b7c216213222aad1b936f23eb7fd4bd261","unresolved":true,"context_lines":[{"line_number":1662,"context_line":"        # This needs to go before we call setUp because the thread pool"},{"line_number":1663,"context_line":"        # executor is created in ComputeManager.__init__, which is called"},{"line_number":1664,"context_line":"        # during setUp."},{"line_number":1665,"context_line":"        self.useFixture(fixtures.SynchronousThreadPoolExecutorFixture())"},{"line_number":1666,"context_line":"        super(ComputeTestCase, self).setUp()"},{"line_number":1667,"context_line":"        self.useFixture(fixtures.SpawnIsSynchronousFixture())"},{"line_number":1668,"context_line":""}],"source_content_type":"text/x-python","patch_set":17,"id":"d058193d_1ac38813","side":"PARENT","line":1665,"updated":"2025-06-13 13:20:45.000000000","message":"@smooney@redhat.com: This was the only place where the executor was mocked to use synchronous execution via this fixture. This is still using sync execution after the patch just via futurist. For a single usage I don\u0027t want to maintain the fixture.","commit_id":"221a3e89e8988bc664298106ee691a4e41ca71f9"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"98efe15de9efb809fed5d37b1fd8b5ddbf7ec33c","unresolved":false,"context_lines":[{"line_number":1662,"context_line":"        # This needs to go before we call setUp because the thread pool"},{"line_number":1663,"context_line":"        # executor is created in ComputeManager.__init__, which is called"},{"line_number":1664,"context_line":"        # during setUp."},{"line_number":1665,"context_line":"        self.useFixture(fixtures.SynchronousThreadPoolExecutorFixture())"},{"line_number":1666,"context_line":"        super(ComputeTestCase, self).setUp()"},{"line_number":1667,"context_line":"        self.useFixture(fixtures.SpawnIsSynchronousFixture())"},{"line_number":1668,"context_line":""}],"source_content_type":"text/x-python","patch_set":17,"id":"839c7369_39e38b02","side":"PARENT","line":1665,"in_reply_to":"82fcb0ee_5621578c","updated":"2025-06-25 14:23:45.000000000","message":"Done\nthis found an actual bug and its adressed in a followup so im resolving this.\nim happy that the concern has been adressed.","commit_id":"221a3e89e8988bc664298106ee691a4e41ca71f9"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"5ee08c42ae50ce47fff5e60acfb959b52a94d796","unresolved":true,"context_lines":[{"line_number":1662,"context_line":"        # This needs to go before we call setUp because the thread pool"},{"line_number":1663,"context_line":"        # executor is created in ComputeManager.__init__, which is called"},{"line_number":1664,"context_line":"        # during setUp."},{"line_number":1665,"context_line":"        self.useFixture(fixtures.SynchronousThreadPoolExecutorFixture())"},{"line_number":1666,"context_line":"        super(ComputeTestCase, self).setUp()"},{"line_number":1667,"context_line":"        self.useFixture(fixtures.SpawnIsSynchronousFixture())"},{"line_number":1668,"context_line":""}],"source_content_type":"text/x-python","patch_set":17,"id":"82fcb0ee_5621578c","side":"PARENT","line":1665,"in_reply_to":"a6377cb6_f13131d7","updated":"2025-06-16 14:01:18.000000000","message":"\u003e i was also thjinking of this one \n\u003e https://github.com/openstack/nova/blob/d75507e679f64a7760e3344a257ed6f096ddae3c/nova/tests/fixtures/nova.py#L1272C1-L1280C51\n\u003e \n\u003e That makes all spawn calls be Synchronous.\n\u003e \n\u003e your modifyign that in the next patch\n\u003e \n\u003e https://review.opendev.org/c/openstack/nova/+/948072/15/nova/tests/fixtures/nova.py\n\u003e \n\nThat change does not change the fact that the call is synchronous it keeps executing the task when the task is created:\n```\n    def __init__(self, func, *args, **kwargs):\n        self._result \u003d func(*args, **kwargs)\n```\n\n\u003e currently fixtures.SpawnIsSynchronousFixture()\n\u003e makes scatter gather synchronous.\n\u003e \n\u003e spawnon does not use spanw and wont be caught by the fixture\n\nThis is a good point. I will fix it in the patch adding spawn_on.\n\n\u003e \n\u003e https://review.opendev.org/c/openstack/nova/+/948079/17/nova/utils.py\n\u003e \n\u003e either we need to add spwan on to fixtures.SpawnIsSynchronousFixture() or be confident that runign thos calls asynconosyly is fine.","commit_id":"221a3e89e8988bc664298106ee691a4e41ca71f9"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1b2f453adbca97171f5f4f389c576ba594b53b01","unresolved":true,"context_lines":[{"line_number":1662,"context_line":"        # This needs to go before we call setUp because the thread pool"},{"line_number":1663,"context_line":"        # executor is created in ComputeManager.__init__, which is called"},{"line_number":1664,"context_line":"        # during setUp."},{"line_number":1665,"context_line":"        self.useFixture(fixtures.SynchronousThreadPoolExecutorFixture())"},{"line_number":1666,"context_line":"        super(ComputeTestCase, self).setUp()"},{"line_number":1667,"context_line":"        self.useFixture(fixtures.SpawnIsSynchronousFixture())"},{"line_number":1668,"context_line":""}],"source_content_type":"text/x-python","patch_set":17,"id":"a6377cb6_f13131d7","side":"PARENT","line":1665,"in_reply_to":"d058193d_1ac38813","updated":"2025-06-13 18:40:30.000000000","message":"i was also thjinking of this one \nhttps://github.com/openstack/nova/blob/d75507e679f64a7760e3344a257ed6f096ddae3c/nova/tests/fixtures/nova.py#L1272C1-L1280C51\n\nThat makes all spawn calls be Synchronous.\n\nyour modifyign that in the next patch\n\nhttps://review.opendev.org/c/openstack/nova/+/948072/15/nova/tests/fixtures/nova.py\n\ncurrently fixtures.SpawnIsSynchronousFixture()\nmakes scatter gather synchronous.\n\nspawnon does not use spanw and wont be caught by the fixture\n\nhttps://review.opendev.org/c/openstack/nova/+/948079/17/nova/utils.py\n\neither we need to add spwan on to fixtures.SpawnIsSynchronousFixture() or be confident that runign thos calls asynconosyly is fine.","commit_id":"221a3e89e8988bc664298106ee691a4e41ca71f9"}],"nova/tests/unit/test_context.py":[{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"eb5d0b0dc3bf669fce54bc43bc0ea53abe2ca25b","unresolved":true,"context_lines":[{"line_number":370,"context_line":""},{"line_number":371,"context_line":"        def task(*args, **kwargs):"},{"line_number":372,"context_line":"            work.acquire()"},{"line_number":373,"context_line":"            return mock.sentinel.instances"},{"line_number":374,"context_line":""},{"line_number":375,"context_line":"        ctxt \u003d context.get_context()"},{"line_number":376,"context_line":"        mapping0 \u003d objects.CellMapping(database_connection\u003d\u0027fake://db0\u0027,"}],"source_content_type":"text/x-python","patch_set":14,"id":"edc5f246_71b6dcbd","line":373,"updated":"2025-05-26 13:01:48.000000000","message":"ok so this is how your forcing it to be Synchronous. makes sense.","commit_id":"c9bd933144f2920e66d33fa2a0b243e9c32c1f8a"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"312721a227b34627b8226ee82e3776ab9e0ba14c","unresolved":false,"context_lines":[{"line_number":370,"context_line":""},{"line_number":371,"context_line":"        def task(*args, **kwargs):"},{"line_number":372,"context_line":"            work.acquire()"},{"line_number":373,"context_line":"            return mock.sentinel.instances"},{"line_number":374,"context_line":""},{"line_number":375,"context_line":"        ctxt \u003d context.get_context()"},{"line_number":376,"context_line":"        mapping0 \u003d objects.CellMapping(database_connection\u003d\u0027fake://db0\u0027,"}],"source_content_type":"text/x-python","patch_set":14,"id":"f58ed014_c0785308","line":373,"in_reply_to":"edc5f246_71b6dcbd","updated":"2025-05-27 12:47:37.000000000","message":"Acknowledged","commit_id":"c9bd933144f2920e66d33fa2a0b243e9c32c1f8a"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"eb5d0b0dc3bf669fce54bc43bc0ea53abe2ca25b","unresolved":true,"context_lines":[{"line_number":389,"context_line":""},{"line_number":390,"context_line":"        # ensure that eventually the other task finishes to avoid triggering"},{"line_number":391,"context_line":"        # the leaked thread check at the test case cleanup"},{"line_number":392,"context_line":"        work.release()"},{"line_number":393,"context_line":"        utils.SCATTER_GATHER_EXECUTOR.shutdown(wait\u003dTrue)"},{"line_number":394,"context_line":""},{"line_number":395,"context_line":"    @mock.patch(\u0027nova.context.LOG.warning\u0027)"}],"source_content_type":"text/x-python","patch_set":14,"id":"60a1626f_cb7278b2","line":392,"updated":"2025-05-26 13:01:48.000000000","message":"+1 you could do self.addCleanup() to do this instead.\n\nif the assert above fail we wont call this currently but given the tesets are going to fail anyway it  does not change the outcome. it would be a more robst pattern to do that however in general.","commit_id":"c9bd933144f2920e66d33fa2a0b243e9c32c1f8a"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"312721a227b34627b8226ee82e3776ab9e0ba14c","unresolved":false,"context_lines":[{"line_number":389,"context_line":""},{"line_number":390,"context_line":"        # ensure that eventually the other task finishes to avoid triggering"},{"line_number":391,"context_line":"        # the leaked thread check at the test case cleanup"},{"line_number":392,"context_line":"        work.release()"},{"line_number":393,"context_line":"        utils.SCATTER_GATHER_EXECUTOR.shutdown(wait\u003dTrue)"},{"line_number":394,"context_line":""},{"line_number":395,"context_line":"    @mock.patch(\u0027nova.context.LOG.warning\u0027)"}],"source_content_type":"text/x-python","patch_set":14,"id":"7ea20f94_f15d66d8","line":392,"in_reply_to":"3f3b931f_a3eec400","updated":"2025-05-27 12:47:37.000000000","message":"Done","commit_id":"c9bd933144f2920e66d33fa2a0b243e9c32c1f8a"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"d5864c4c2c35c643abaccaf782df48ded21d1ace","unresolved":true,"context_lines":[{"line_number":389,"context_line":""},{"line_number":390,"context_line":"        # ensure that eventually the other task finishes to avoid triggering"},{"line_number":391,"context_line":"        # the leaked thread check at the test case cleanup"},{"line_number":392,"context_line":"        work.release()"},{"line_number":393,"context_line":"        utils.SCATTER_GATHER_EXECUTOR.shutdown(wait\u003dTrue)"},{"line_number":394,"context_line":""},{"line_number":395,"context_line":"    @mock.patch(\u0027nova.context.LOG.warning\u0027)"}],"source_content_type":"text/x-python","patch_set":14,"id":"3f3b931f_a3eec400","line":392,"in_reply_to":"60a1626f_cb7278b2","updated":"2025-05-27 08:01:50.000000000","message":"good point. Adding this to the cleanup is a better pattern and result in less confusing error when the test fails","commit_id":"c9bd933144f2920e66d33fa2a0b243e9c32c1f8a"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"eb5d0b0dc3bf669fce54bc43bc0ea53abe2ca25b","unresolved":true,"context_lines":[{"line_number":403,"context_line":""},{"line_number":404,"context_line":"        def task(*args, **kwargs):"},{"line_number":405,"context_line":"            work.wait()"},{"line_number":406,"context_line":"            return mock.sentinel.instances"},{"line_number":407,"context_line":""},{"line_number":408,"context_line":"        ctxt \u003d context.get_context()"},{"line_number":409,"context_line":"        mapping0 \u003d objects.CellMapping(database_connection\u003d\u0027fake://db0\u0027,"}],"source_content_type":"text/x-python","patch_set":14,"id":"d1e39a2c_3387b586","line":406,"updated":"2025-05-26 13:01:48.000000000","message":"is there a specific reaosn ot use a threading.event() here vs the \n\n work \u003d threading.Semaphore(value\u003d1)\n \n in the previous test.\n \n with the current usage teh behvaior shoudl be the same","commit_id":"c9bd933144f2920e66d33fa2a0b243e9c32c1f8a"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"d5864c4c2c35c643abaccaf782df48ded21d1ace","unresolved":true,"context_lines":[{"line_number":403,"context_line":""},{"line_number":404,"context_line":"        def task(*args, **kwargs):"},{"line_number":405,"context_line":"            work.wait()"},{"line_number":406,"context_line":"            return mock.sentinel.instances"},{"line_number":407,"context_line":""},{"line_number":408,"context_line":"        ctxt \u003d context.get_context()"},{"line_number":409,"context_line":"        mapping0 \u003d objects.CellMapping(database_connection\u003d\u0027fake://db0\u0027,"}],"source_content_type":"text/x-python","patch_set":14,"id":"fa9a3bcc_0a024b10","line":406,"in_reply_to":"d1e39a2c_3387b586","updated":"2025-05-27 08:01:50.000000000","message":"it is actually  different. The semaphore(1) can be acquired once right away. While the event will block until it is set. So the equivalent would be semaphore(0).","commit_id":"c9bd933144f2920e66d33fa2a0b243e9c32c1f8a"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"312721a227b34627b8226ee82e3776ab9e0ba14c","unresolved":false,"context_lines":[{"line_number":403,"context_line":""},{"line_number":404,"context_line":"        def task(*args, **kwargs):"},{"line_number":405,"context_line":"            work.wait()"},{"line_number":406,"context_line":"            return mock.sentinel.instances"},{"line_number":407,"context_line":""},{"line_number":408,"context_line":"        ctxt \u003d context.get_context()"},{"line_number":409,"context_line":"        mapping0 \u003d objects.CellMapping(database_connection\u003d\u0027fake://db0\u0027,"}],"source_content_type":"text/x-python","patch_set":14,"id":"578ae756_6616f813","line":406,"in_reply_to":"fa9a3bcc_0a024b10","updated":"2025-05-27 12:47:37.000000000","message":"Acknowledged","commit_id":"c9bd933144f2920e66d33fa2a0b243e9c32c1f8a"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"d5864c4c2c35c643abaccaf782df48ded21d1ace","unresolved":true,"context_lines":[{"line_number":423,"context_line":""},{"line_number":424,"context_line":"        # let the started task eventually finish so the thread leak check at"},{"line_number":425,"context_line":"        # the test case cleanup is satisfied."},{"line_number":426,"context_line":"        work.set()"},{"line_number":427,"context_line":"        utils.SCATTER_GATHER_EXECUTOR.shutdown(wait\u003dTrue)"},{"line_number":428,"context_line":""},{"line_number":429,"context_line":"        stats \u003d utils.SCATTER_GATHER_EXECUTOR.statistics"}],"source_content_type":"text/x-python","patch_set":14,"id":"961dc308_49b64d39","line":426,"updated":"2025-05-27 08:01:50.000000000","message":"here it is not that straightforward to apply the addCleanup method as we want to assert things after the event is set. But I will add an extra work.set() as cleanup so that if the test fails early we still unblock the thread.","commit_id":"c9bd933144f2920e66d33fa2a0b243e9c32c1f8a"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"312721a227b34627b8226ee82e3776ab9e0ba14c","unresolved":false,"context_lines":[{"line_number":423,"context_line":""},{"line_number":424,"context_line":"        # let the started task eventually finish so the thread leak check at"},{"line_number":425,"context_line":"        # the test case cleanup is satisfied."},{"line_number":426,"context_line":"        work.set()"},{"line_number":427,"context_line":"        utils.SCATTER_GATHER_EXECUTOR.shutdown(wait\u003dTrue)"},{"line_number":428,"context_line":""},{"line_number":429,"context_line":"        stats \u003d utils.SCATTER_GATHER_EXECUTOR.statistics"}],"source_content_type":"text/x-python","patch_set":14,"id":"66ef587a_059b0d2a","line":426,"in_reply_to":"961dc308_49b64d39","updated":"2025-05-27 12:47:37.000000000","message":"Done","commit_id":"c9bd933144f2920e66d33fa2a0b243e9c32c1f8a"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"d68f0d37aba6d329d618c22ff7df3ee3005eb020","unresolved":true,"context_lines":[{"line_number":391,"context_line":""},{"line_number":392,"context_line":"        results \u003d context.scatter_gather_cells(ctxt, mappings, 1, task)"},{"line_number":393,"context_line":"        self.assertEqual(2, len(results))"},{"line_number":394,"context_line":"        self.assertIn(mock.sentinel.instances, results.values())"},{"line_number":395,"context_line":"        self.assertIn(context.did_not_respond_sentinel, results.values())"},{"line_number":396,"context_line":"        self.assertTrue(mock_log_warning.called)"},{"line_number":397,"context_line":""}],"source_content_type":"text/x-python","patch_set":16,"id":"90e0bf54_df998f39","line":394,"updated":"2025-06-09 14:08:09.000000000","message":"This was good enough before, but I think now that you\u0027re returning the same from both workers, you need to tighten this to assert that *all* the responses were this right? Before, we expected one timeout, but if we got one unexpectedly in your new structure here, this would still pass.","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"a4fd51b897a6fdbc18046b537f64cdb4baa8d9c7","unresolved":false,"context_lines":[{"line_number":391,"context_line":""},{"line_number":392,"context_line":"        results \u003d context.scatter_gather_cells(ctxt, mappings, 1, task)"},{"line_number":393,"context_line":"        self.assertEqual(2, len(results))"},{"line_number":394,"context_line":"        self.assertIn(mock.sentinel.instances, results.values())"},{"line_number":395,"context_line":"        self.assertIn(context.did_not_respond_sentinel, results.values())"},{"line_number":396,"context_line":"        self.assertTrue(mock_log_warning.called)"},{"line_number":397,"context_line":""}],"source_content_type":"text/x-python","patch_set":16,"id":"c20e9372_efc558fc","line":394,"in_reply_to":"8c215c03_6179cc5e","updated":"2025-06-18 18:20:04.000000000","message":"Acknowledged","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"40a1077c1f9b9c117c1206944be2e5d533538c67","unresolved":true,"context_lines":[{"line_number":391,"context_line":""},{"line_number":392,"context_line":"        results \u003d context.scatter_gather_cells(ctxt, mappings, 1, task)"},{"line_number":393,"context_line":"        self.assertEqual(2, len(results))"},{"line_number":394,"context_line":"        self.assertIn(mock.sentinel.instances, results.values())"},{"line_number":395,"context_line":"        self.assertIn(context.did_not_respond_sentinel, results.values())"},{"line_number":396,"context_line":"        self.assertTrue(mock_log_warning.called)"},{"line_number":397,"context_line":""}],"source_content_type":"text/x-python","patch_set":16,"id":"8c215c03_6179cc5e","line":394,"in_reply_to":"90e0bf54_df998f39","updated":"2025-06-13 10:54:11.000000000","message":"the second task will only return mock.sentinel.instance during test cleanup. At this point the second task is not returned so the result will be context.did_not_respond_sentinel. So the length assert and the two In assert covers it. But I changed the two In assert to a single set equality (to avoid ordering diffs). Maybe that is more readable.","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"d68f0d37aba6d329d618c22ff7df3ee3005eb020","unresolved":true,"context_lines":[{"line_number":431,"context_line":""},{"line_number":432,"context_line":"        self.assertEqual(2, len(results))"},{"line_number":433,"context_line":"        self.assertEqual("},{"line_number":434,"context_line":"            [context.did_not_respond_sentinel] * 2, list(results.values()))"},{"line_number":435,"context_line":""},{"line_number":436,"context_line":"        # let the started task eventually finish so the thread leak check at"},{"line_number":437,"context_line":"        # the test case cleanup is satisfied."}],"source_content_type":"text/x-python","patch_set":16,"id":"f3e38d42_0dbfd2a6","line":434,"updated":"2025-06-09 14:08:09.000000000","message":"I\u0027m curious why we needed to move from one test that verifies a success and a timeout to two tests, one where both succeed and one where both timeout. If there\u0027s some reason, can we have a third where the much more likely real-world behavior of mixed success/timeout results are tested?","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"a4fd51b897a6fdbc18046b537f64cdb4baa8d9c7","unresolved":false,"context_lines":[{"line_number":431,"context_line":""},{"line_number":432,"context_line":"        self.assertEqual(2, len(results))"},{"line_number":433,"context_line":"        self.assertEqual("},{"line_number":434,"context_line":"            [context.did_not_respond_sentinel] * 2, list(results.values()))"},{"line_number":435,"context_line":""},{"line_number":436,"context_line":"        # let the started task eventually finish so the thread leak check at"},{"line_number":437,"context_line":"        # the test case cleanup is satisfied."}],"source_content_type":"text/x-python","patch_set":16,"id":"9eb48487_bd21bf6b","line":434,"in_reply_to":"918756c0_a51ad7f2","updated":"2025-06-18 18:20:04.000000000","message":"Acknowledged","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"40a1077c1f9b9c117c1206944be2e5d533538c67","unresolved":true,"context_lines":[{"line_number":431,"context_line":""},{"line_number":432,"context_line":"        self.assertEqual(2, len(results))"},{"line_number":433,"context_line":"        self.assertEqual("},{"line_number":434,"context_line":"            [context.did_not_respond_sentinel] * 2, list(results.values()))"},{"line_number":435,"context_line":""},{"line_number":436,"context_line":"        # let the started task eventually finish so the thread leak check at"},{"line_number":437,"context_line":"        # the test case cleanup is satisfied."}],"source_content_type":"text/x-python","patch_set":16,"id":"918756c0_a51ad7f2","line":434,"in_reply_to":"f3e38d42_0dbfd2a6","updated":"2025-06-13 10:54:11.000000000","message":"In test_scatter_gather_cells_timeout there are two tasks to run one will succeeds one will time out. The timed out task already started so it cannot be cancelled. So this tests the mixed result case.\n\nIn test_scatter_gather_cells_queued_task_cancelled both task times out. The first one already started, the second one is queued (as the executor has a single worker).","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"a4fd51b897a6fdbc18046b537f64cdb4baa8d9c7","unresolved":true,"context_lines":[{"line_number":365,"context_line":""},{"line_number":366,"context_line":"    @mock.patch(\u0027nova.context.LOG.warning\u0027)"},{"line_number":367,"context_line":"    def test_scatter_gather_cells_timeout(self, mock_log_warning):"},{"line_number":368,"context_line":"        # Ensure only one task can finish the other will time out"},{"line_number":369,"context_line":"        work \u003d threading.Semaphore(value\u003d1)"},{"line_number":370,"context_line":""},{"line_number":371,"context_line":"        # ensure that eventually all task finishes to avoid triggering"}],"source_content_type":"text/x-python","patch_set":17,"id":"a1e4ff7b_971d218f","line":368,"updated":"2025-06-18 18:20:04.000000000","message":"I obviously didn\u0027t see (or grok) this comment before, but this addresses my concern directly.","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"886c4d76f603dc32e67312eac9213557c8751c6d","unresolved":false,"context_lines":[{"line_number":365,"context_line":""},{"line_number":366,"context_line":"    @mock.patch(\u0027nova.context.LOG.warning\u0027)"},{"line_number":367,"context_line":"    def test_scatter_gather_cells_timeout(self, mock_log_warning):"},{"line_number":368,"context_line":"        # Ensure only one task can finish the other will time out"},{"line_number":369,"context_line":"        work \u003d threading.Semaphore(value\u003d1)"},{"line_number":370,"context_line":""},{"line_number":371,"context_line":"        # ensure that eventually all task finishes to avoid triggering"}],"source_content_type":"text/x-python","patch_set":17,"id":"d6741df6_bbd433b5","line":368,"in_reply_to":"a1e4ff7b_971d218f","updated":"2025-06-25 17:09:29.000000000","message":"Acknowledged","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"1bdd520d2ab15f92773b9e2f46cd81f48f106bbf","unresolved":false,"context_lines":[{"line_number":366,"context_line":"    @mock.patch(\u0027nova.context.LOG.warning\u0027)"},{"line_number":367,"context_line":"    def test_scatter_gather_cells_timeout(self, mock_log_warning):"},{"line_number":368,"context_line":"        # Ensure only one task can finish the other will time out"},{"line_number":369,"context_line":"        work \u003d threading.Semaphore(value\u003d1)"},{"line_number":370,"context_line":""},{"line_number":371,"context_line":"        # ensure that eventually all task finishes to avoid triggering"},{"line_number":372,"context_line":"        # the leaked thread check at the test case cleanup"}],"source_content_type":"text/x-python","patch_set":17,"id":"42defa5b_fa45d231","line":369,"updated":"2025-06-23 14:55:29.000000000","message":"++","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"a4fd51b897a6fdbc18046b537f64cdb4baa8d9c7","unresolved":true,"context_lines":[{"line_number":391,"context_line":""},{"line_number":392,"context_line":"        results \u003d context.scatter_gather_cells(ctxt, mappings, 1, task)"},{"line_number":393,"context_line":"        self.assertEqual(2, len(results))"},{"line_number":394,"context_line":"        self.assertEqual("},{"line_number":395,"context_line":"            {mock.sentinel.instances, context.did_not_respond_sentinel},"},{"line_number":396,"context_line":"            set(results.values()))"},{"line_number":397,"context_line":"        self.assertTrue(mock_log_warning.called)"}],"source_content_type":"text/x-python","patch_set":17,"id":"778c03cc_5676d2db","line":394,"updated":"2025-06-18 18:20:04.000000000","message":"Yeah I think this is what sent me off of track in the previous revision. This being `assertEqual()` with a result and a timeout is and important signal that we are indeed still seeing both types of responses. Having it as an `assertIn()` and only looking for the real result made me think it was only succeeding now. And, I guess I was mentally flipping the next one to `assertNotIn()`. I guess you did this because of determinism concerns, but I think sorting so we can `assertEqual()` is massively more obvious, to me at least.","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"886c4d76f603dc32e67312eac9213557c8751c6d","unresolved":false,"context_lines":[{"line_number":391,"context_line":""},{"line_number":392,"context_line":"        results \u003d context.scatter_gather_cells(ctxt, mappings, 1, task)"},{"line_number":393,"context_line":"        self.assertEqual(2, len(results))"},{"line_number":394,"context_line":"        self.assertEqual("},{"line_number":395,"context_line":"            {mock.sentinel.instances, context.did_not_respond_sentinel},"},{"line_number":396,"context_line":"            set(results.values()))"},{"line_number":397,"context_line":"        self.assertTrue(mock_log_warning.called)"}],"source_content_type":"text/x-python","patch_set":17,"id":"d470eb36_99558c36","line":394,"in_reply_to":"21ecfb26_5b0209f0","updated":"2025-06-25 17:09:29.000000000","message":"I added an assertEqual.","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"98efe15de9efb809fed5d37b1fd8b5ddbf7ec33c","unresolved":true,"context_lines":[{"line_number":391,"context_line":""},{"line_number":392,"context_line":"        results \u003d context.scatter_gather_cells(ctxt, mappings, 1, task)"},{"line_number":393,"context_line":"        self.assertEqual(2, len(results))"},{"line_number":394,"context_line":"        self.assertEqual("},{"line_number":395,"context_line":"            {mock.sentinel.instances, context.did_not_respond_sentinel},"},{"line_number":396,"context_line":"            set(results.values()))"},{"line_number":397,"context_line":"        self.assertTrue(mock_log_warning.called)"}],"source_content_type":"text/x-python","patch_set":17,"id":"21ecfb26_5b0209f0","line":394,"in_reply_to":"778c03cc_5676d2db","updated":"2025-06-25 14:23:45.000000000","message":"i think this is valid feedback but i also think this could be adressed in a followup so i wont resolve this for visableity but i dont think that shoudl block prgress on this patch.","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"a4fd51b897a6fdbc18046b537f64cdb4baa8d9c7","unresolved":true,"context_lines":[{"line_number":428,"context_line":"        mappings \u003d objects.CellMappingList(objects\u003d[mapping0, mapping1])"},{"line_number":429,"context_line":""},{"line_number":430,"context_line":"        results \u003d context.scatter_gather_cells("},{"line_number":431,"context_line":"            ctxt, mappings, 1, task)"},{"line_number":432,"context_line":""},{"line_number":433,"context_line":"        self.assertEqual(2, len(results))"},{"line_number":434,"context_line":"        self.assertEqual("}],"source_content_type":"text/x-python","patch_set":17,"id":"90416ec5_aa585465","line":431,"updated":"2025-06-18 18:20:04.000000000","message":"So this is `timeout\u003d1`, which will actually burn 1s of wall clock time waiting for something to never happen.\n\nPerhaps a comment above this like:\n```\nWe call scatter gather with a short (1s) timeout, dispatching to tasks that will wait forever on our work event above.\n```\n\n...if you respin.","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"038c06f367ec1101d870a5cd00198d886d720fc3","unresolved":false,"context_lines":[{"line_number":428,"context_line":"        mappings \u003d objects.CellMappingList(objects\u003d[mapping0, mapping1])"},{"line_number":429,"context_line":""},{"line_number":430,"context_line":"        results \u003d context.scatter_gather_cells("},{"line_number":431,"context_line":"            ctxt, mappings, 1, task)"},{"line_number":432,"context_line":""},{"line_number":433,"context_line":"        self.assertEqual(2, len(results))"},{"line_number":434,"context_line":"        self.assertEqual("}],"source_content_type":"text/x-python","patch_set":17,"id":"40b84e08_7209613d","line":431,"in_reply_to":"0c866f71_6c7834ef","updated":"2025-06-25 17:11:08.000000000","message":"https://review.opendev.org/c/openstack/nova/+/953338","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"886c4d76f603dc32e67312eac9213557c8751c6d","unresolved":true,"context_lines":[{"line_number":428,"context_line":"        mappings \u003d objects.CellMappingList(objects\u003d[mapping0, mapping1])"},{"line_number":429,"context_line":""},{"line_number":430,"context_line":"        results \u003d context.scatter_gather_cells("},{"line_number":431,"context_line":"            ctxt, mappings, 1, task)"},{"line_number":432,"context_line":""},{"line_number":433,"context_line":"        self.assertEqual(2, len(results))"},{"line_number":434,"context_line":"        self.assertEqual("}],"source_content_type":"text/x-python","patch_set":17,"id":"0c866f71_6c7834ef","line":431,"in_reply_to":"90416ec5_aa585465","updated":"2025-06-25 17:09:29.000000000","message":"Done in a follow up","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"}],"nova/utils.py":[{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"8152fa844b3127ede81d7112ea531301583e631a","unresolved":true,"context_lines":[{"line_number":643,"context_line":"    return trace_info"},{"line_number":644,"context_line":""},{"line_number":645,"context_line":""},{"line_number":646,"context_line":"def pass_context_wrapper(func):"},{"line_number":647,"context_line":"    \"\"\"Generalised passthrough method"},{"line_number":648,"context_line":"    It will grab the context from the threadlocal store and add it to"},{"line_number":649,"context_line":"    the store on the new thread.  This allows for continuity in logging the"}],"source_content_type":"text/x-python","patch_set":2,"id":"e7a95d4a_d2bfba7f","line":646,"updated":"2025-04-24 14:19:32.000000000","message":"this made public temporary until we can switch spawn over to return Futures.","commit_id":"0e6d854c0d770b8296f8e0850f1984235de169e3"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1f61fddb5018a9bffb93f67f36c033fb61a1f660","unresolved":false,"context_lines":[{"line_number":643,"context_line":"    return trace_info"},{"line_number":644,"context_line":""},{"line_number":645,"context_line":""},{"line_number":646,"context_line":"def pass_context_wrapper(func):"},{"line_number":647,"context_line":"    \"\"\"Generalised passthrough method"},{"line_number":648,"context_line":"    It will grab the context from the threadlocal store and add it to"},{"line_number":649,"context_line":"    the store on the new thread.  This allows for continuity in logging the"}],"source_content_type":"text/x-python","patch_set":2,"id":"50d26804_d5e325ed","line":646,"in_reply_to":"1ab32df9_6e005164","updated":"2025-05-13 11:54:50.000000000","message":"im honestly not too worried about htis being public \n\nits not as if python actually cares but I do agree that the underscore help develpers know that they should not sprinkle this in other parts of the code base but  sure  makes sense to do this i guess.\n\ni would not have objected to you just calling the \"priviate\" funciton where you needed too and resolving that in a follow up either.","commit_id":"0e6d854c0d770b8296f8e0850f1984235de169e3"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"267126435fff2f0300efff82d3b0d1271e7a027f","unresolved":true,"context_lines":[{"line_number":643,"context_line":"    return trace_info"},{"line_number":644,"context_line":""},{"line_number":645,"context_line":""},{"line_number":646,"context_line":"def pass_context_wrapper(func):"},{"line_number":647,"context_line":"    \"\"\"Generalised passthrough method"},{"line_number":648,"context_line":"    It will grab the context from the threadlocal store and add it to"},{"line_number":649,"context_line":"    the store on the new thread.  This allows for continuity in logging the"}],"source_content_type":"text/x-python","patch_set":2,"id":"1ab32df9_6e005164","line":646,"in_reply_to":"e7a95d4a_d2bfba7f","updated":"2025-04-25 11:19:07.000000000","message":"made it private in https://review.opendev.org/c/openstack/nova/+/948188/1?usp\u003drelated-change","commit_id":"0e6d854c0d770b8296f8e0850f1984235de169e3"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"ee9677fadc29a3002286c947b337fa20339b4e60","unresolved":true,"context_lines":[{"line_number":1250,"context_line":"    \"\"\"Returns true if the service is running in threading mode, false if"},{"line_number":1251,"context_line":"    running in Eventlet mode"},{"line_number":1252,"context_line":"    \"\"\""},{"line_number":1253,"context_line":"    env \u003d os.environ.get("},{"line_number":1254,"context_line":"        \u0027NOVA_CONCURRENCY_BACKEND\u0027, backend.BackendType.EVENTLET)"},{"line_number":1255,"context_line":"    return env \u003d\u003d backend.BackendType.THREADING"},{"line_number":1256,"context_line":""}],"source_content_type":"text/x-python","patch_set":2,"id":"b2537517_5d86cfc5","line":1253,"updated":"2025-04-24 15:47:00.000000000","message":"i woudl avoid springeling env check like this in the code.\n\ni woudl prefer to just use nova.monkey_patch.patched()\n\ni was intoducing that as the single canonical way to determin of we shoudl patch\n\nsince we arelay ahve a enver varibale that contol if we monkey patch or not\n\nhttps://github.com/openstack/nova/blob/master/nova/monkey_patch.py#L76-L78\n\ni dont really like addign second new one.","commit_id":"0e6d854c0d770b8296f8e0850f1984235de169e3"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"aa3c08176ea55a3dccb8fb3119e6c508b0d61400","unresolved":true,"context_lines":[{"line_number":1250,"context_line":"    \"\"\"Returns true if the service is running in threading mode, false if"},{"line_number":1251,"context_line":"    running in Eventlet mode"},{"line_number":1252,"context_line":"    \"\"\""},{"line_number":1253,"context_line":"    env \u003d os.environ.get("},{"line_number":1254,"context_line":"        \u0027NOVA_CONCURRENCY_BACKEND\u0027, backend.BackendType.EVENTLET)"},{"line_number":1255,"context_line":"    return env \u003d\u003d backend.BackendType.THREADING"},{"line_number":1256,"context_line":""}],"source_content_type":"text/x-python","patch_set":2,"id":"f5156444_5647d06e","line":1253,"in_reply_to":"4c2126e9_118e2a30","updated":"2025-04-25 11:18:36.000000000","message":"Yeah this is pretty raw at the moment. The high level solution needs to depend on an ENV variable and then directly or indirectly affect this code. \n\nI.e. in general\nEVN is set to threading then\n\u003d\u003e do not monkey patch\n\u003d\u003e use the threading oslo.service backend\n\u003d\u003e switch our executors to ThreadPoolExecutor\n\u003d\u003e make concurrency_mode_threading return True so all the branched code paths (hopefully it won\u0027t be many) knows what to do.\n\nI think I\u0027m OK to cascade this to something like:\n\nENV is set to threading:\n* \u003d\u003e check the EVN and do not monkey patch. (this is new code to be added before this patch)\n   * also poison monkey_patch to prevent accidental patching (new code as well)\n\n* \u003d\u003e concurrency_mode_threading is implemented by calling monkey_patch.is_patched()\n* \u003d\u003e oslo.service backend selection is implemented by calling concurrency_mode_threading (backend selection is also new code to be added)\n* \u003d\u003e executor selection is implemented by calling concurrency_mode_threading\n\n\nWe can discuss this while https://review.opendev.org/c/openstack/nova/+/922425 lands and while I making progress to do some devstack level local testing with at least the nova-scheduler running in threading mode.","commit_id":"0e6d854c0d770b8296f8e0850f1984235de169e3"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"6f6d7d51fe30c0f55c1baf023c8882ea8b6f7783","unresolved":false,"context_lines":[{"line_number":1250,"context_line":"    \"\"\"Returns true if the service is running in threading mode, false if"},{"line_number":1251,"context_line":"    running in Eventlet mode"},{"line_number":1252,"context_line":"    \"\"\""},{"line_number":1253,"context_line":"    env \u003d os.environ.get("},{"line_number":1254,"context_line":"        \u0027NOVA_CONCURRENCY_BACKEND\u0027, backend.BackendType.EVENTLET)"},{"line_number":1255,"context_line":"    return env \u003d\u003d backend.BackendType.THREADING"},{"line_number":1256,"context_line":""}],"source_content_type":"text/x-python","patch_set":2,"id":"1fb88b85_bac20d28","line":1253,"in_reply_to":"4ee2a4ec_c5581476","updated":"2025-05-05 14:45:43.000000000","message":"Done","commit_id":"0e6d854c0d770b8296f8e0850f1984235de169e3"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"4d03d6a3e515a882e467008377ce13db52d1aa3e","unresolved":true,"context_lines":[{"line_number":1250,"context_line":"    \"\"\"Returns true if the service is running in threading mode, false if"},{"line_number":1251,"context_line":"    running in Eventlet mode"},{"line_number":1252,"context_line":"    \"\"\""},{"line_number":1253,"context_line":"    env \u003d os.environ.get("},{"line_number":1254,"context_line":"        \u0027NOVA_CONCURRENCY_BACKEND\u0027, backend.BackendType.EVENTLET)"},{"line_number":1255,"context_line":"    return env \u003d\u003d backend.BackendType.THREADING"},{"line_number":1256,"context_line":""}],"source_content_type":"text/x-python","patch_set":2,"id":"4c2126e9_118e2a30","line":1253,"in_reply_to":"b2537517_5d86cfc5","updated":"2025-04-24 17:11:56.000000000","message":"i.e. https://review.opendev.org/c/openstack/nova/+/922425","commit_id":"0e6d854c0d770b8296f8e0850f1984235de169e3"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"5d12425a023b6b18044bb4528e45a4f40adaacf1","unresolved":true,"context_lines":[{"line_number":1250,"context_line":"    \"\"\"Returns true if the service is running in threading mode, false if"},{"line_number":1251,"context_line":"    running in Eventlet mode"},{"line_number":1252,"context_line":"    \"\"\""},{"line_number":1253,"context_line":"    env \u003d os.environ.get("},{"line_number":1254,"context_line":"        \u0027NOVA_CONCURRENCY_BACKEND\u0027, backend.BackendType.EVENTLET)"},{"line_number":1255,"context_line":"    return env \u003d\u003d backend.BackendType.THREADING"},{"line_number":1256,"context_line":""}],"source_content_type":"text/x-python","patch_set":2,"id":"4ee2a4ec_c5581476","line":1253,"in_reply_to":"f5156444_5647d06e","updated":"2025-05-05 14:25:57.000000000","message":"I turned out that this can be reimplemented meaningfully based on monkey_patch.is_patched, see https://review.opendev.org/c/openstack/nova/+/948311/9/nova/utils.py#1247 So I will move those pieces here so we don\u0027t need an extra env variable.","commit_id":"0e6d854c0d770b8296f8e0850f1984235de169e3"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"bac4efd840ccb8274fb6279ecfab777ce42b8fa2","unresolved":true,"context_lines":[{"line_number":1254,"context_line":"    return not monkey_patch.is_patched()"},{"line_number":1255,"context_line":""},{"line_number":1256,"context_line":""},{"line_number":1257,"context_line":"SCATTER_GATHER_EXECUTOR \u003d None"},{"line_number":1258,"context_line":""},{"line_number":1259,"context_line":""},{"line_number":1260,"context_line":"def get_scatter_gather_executor():"}],"source_content_type":"text/x-python","patch_set":8,"id":"076b3e61_92868e79","line":1257,"updated":"2025-05-06 14:48:19.000000000","message":"so we could do the following\nimport threading\nthreadLocal \u003d threading.local()\nthreadlocal.SCATTER_GATHER_EXECUTOR \u003d None\n\nand then use threadlocal.SCATTER_GATHER_EXECUTOR  every wehre we use SCATTER_GATHER_EXECUTOR today.\nwhen we fork it would be unaisalsized in the chile process (i think None)\nso on first use it would be inialzed again.\n\nthis will only work i we dont do a scatter gater from scater gather but that seam unlikely and error prone if we were to do that.","commit_id":"1e0608a581e2667ec38f98c75cf5fc59ade9f502"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"bb8b24f562053ee42b5df94addce9bcc5e9a9f73","unresolved":true,"context_lines":[{"line_number":1254,"context_line":"    return not monkey_patch.is_patched()"},{"line_number":1255,"context_line":""},{"line_number":1256,"context_line":""},{"line_number":1257,"context_line":"SCATTER_GATHER_EXECUTOR \u003d None"},{"line_number":1258,"context_line":""},{"line_number":1259,"context_line":""},{"line_number":1260,"context_line":"def get_scatter_gather_executor():"}],"source_content_type":"text/x-python","patch_set":8,"id":"ec50a16c_12444d22","line":1257,"in_reply_to":"076b3e61_92868e79","updated":"2025-05-06 14:48:55.000000000","message":"https://docs.python.org/3/library/threading.html#thread-local-data","commit_id":"1e0608a581e2667ec38f98c75cf5fc59ade9f502"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"0adb295dbf271ba2c64b9310529de1824529b909","unresolved":true,"context_lines":[{"line_number":1254,"context_line":"    return not monkey_patch.is_patched()"},{"line_number":1255,"context_line":""},{"line_number":1256,"context_line":""},{"line_number":1257,"context_line":"SCATTER_GATHER_EXECUTOR \u003d None"},{"line_number":1258,"context_line":""},{"line_number":1259,"context_line":""},{"line_number":1260,"context_line":"def get_scatter_gather_executor():"}],"source_content_type":"text/x-python","patch_set":8,"id":"b433168d_946f8523","line":1257,"in_reply_to":"536fd38a_4e83eaa0","updated":"2025-05-08 15:03:34.000000000","message":"Yeah, I think that has the potential to get really out of hand, so I\u0027d prefer not using thread-local for this. It also makes debugging or detection of the case where we didn\u0027t expect to create a new pool harder.","commit_id":"1e0608a581e2667ec38f98c75cf5fc59ade9f502"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"f262b01b81a7c836294ea1d4db874fff3005d3ab","unresolved":false,"context_lines":[{"line_number":1254,"context_line":"    return not monkey_patch.is_patched()"},{"line_number":1255,"context_line":""},{"line_number":1256,"context_line":""},{"line_number":1257,"context_line":"SCATTER_GATHER_EXECUTOR \u003d None"},{"line_number":1258,"context_line":""},{"line_number":1259,"context_line":""},{"line_number":1260,"context_line":"def get_scatter_gather_executor():"}],"source_content_type":"text/x-python","patch_set":8,"id":"35ff8d53_b0207390","line":1257,"in_reply_to":"b433168d_946f8523","updated":"2025-05-08 15:19:18.000000000","message":"that fair. i really wanted a \"process local\" equivelnt.\n\nthread local was a strech\n\nyour correct that periodic would not work well. They would all get there\nown pool and then use more resources then intended.","commit_id":"1e0608a581e2667ec38f98c75cf5fc59ade9f502"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"975b8219eea8896108bf999cc43ea365c8803590","unresolved":true,"context_lines":[{"line_number":1254,"context_line":"    return not monkey_patch.is_patched()"},{"line_number":1255,"context_line":""},{"line_number":1256,"context_line":""},{"line_number":1257,"context_line":"SCATTER_GATHER_EXECUTOR \u003d None"},{"line_number":1258,"context_line":""},{"line_number":1259,"context_line":""},{"line_number":1260,"context_line":"def get_scatter_gather_executor():"}],"source_content_type":"text/x-python","patch_set":8,"id":"536fd38a_4e83eaa0","line":1257,"in_reply_to":"ec50a16c_12444d22","updated":"2025-05-07 12:23:32.000000000","message":"I think the prime example when this will not work is when a periodic running in a thread needs to run scatter-gather. That periodic will then init a new pool for itself.","commit_id":"1e0608a581e2667ec38f98c75cf5fc59ade9f502"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"8926850ba6f8d04ef25643e844df70f9c1a5418e","unresolved":true,"context_lines":[{"line_number":1272,"context_line":"        # master to the child process, we need to re-initialize the executor to"},{"line_number":1273,"context_line":"        # avoid working with the wrong internal state (i.e. number of workers"},{"line_number":1274,"context_line":"        # idle in the pool). A long therm solution would be to use os.spawn"},{"line_number":1275,"context_line":"        # instead of os.fork for the workers."},{"line_number":1276,"context_line":"        SCATTER_GATHER_EXECUTOR.name !\u003d executor_name"},{"line_number":1277,"context_line":"    ):"},{"line_number":1278,"context_line":"        if concurrency_mode_threading():"}],"source_content_type":"text/x-python","patch_set":8,"id":"0f1b62ef_1b0e6a4b","line":1275,"updated":"2025-05-06 14:28:03.000000000","message":"Breaking up a condition in an `if` statement with a quarter screen of comments really hurts readability, IMHO. Could you put this above something like:\n```\nneeds_reinit \u003d SCATTER_GATHER_EXECUTOR.name !\u003d executor_name\n```\nand keep the comment out of the actual `if` itself? To me, it\u0027s ideal if I can read an `if` statement symbolically to reason about what it\u0027s testing and this amount of split between two related things makes it really hard.","commit_id":"1e0608a581e2667ec38f98c75cf5fc59ade9f502"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"975b8219eea8896108bf999cc43ea365c8803590","unresolved":true,"context_lines":[{"line_number":1272,"context_line":"        # master to the child process, we need to re-initialize the executor to"},{"line_number":1273,"context_line":"        # avoid working with the wrong internal state (i.e. number of workers"},{"line_number":1274,"context_line":"        # idle in the pool). A long therm solution would be to use os.spawn"},{"line_number":1275,"context_line":"        # instead of os.fork for the workers."},{"line_number":1276,"context_line":"        SCATTER_GATHER_EXECUTOR.name !\u003d executor_name"},{"line_number":1277,"context_line":"    ):"},{"line_number":1278,"context_line":"        if concurrency_mode_threading():"}],"source_content_type":"text/x-python","patch_set":8,"id":"858df05e_9134b07f","line":1275,"in_reply_to":"0f1b62ef_1b0e6a4b","updated":"2025-05-07 12:23:32.000000000","message":"I will move the comment. \nI cannot use `needs_reinit \u003d SCATTER_GATHER_EXECUTOR.name !\u003d executor_name` without a nested `if` as SCATTER_GATHER_EXECUTOR might not even defined at the current level.","commit_id":"1e0608a581e2667ec38f98c75cf5fc59ade9f502"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"0adb295dbf271ba2c64b9310529de1824529b909","unresolved":false,"context_lines":[{"line_number":1272,"context_line":"        # master to the child process, we need to re-initialize the executor to"},{"line_number":1273,"context_line":"        # avoid working with the wrong internal state (i.e. number of workers"},{"line_number":1274,"context_line":"        # idle in the pool). A long therm solution would be to use os.spawn"},{"line_number":1275,"context_line":"        # instead of os.fork for the workers."},{"line_number":1276,"context_line":"        SCATTER_GATHER_EXECUTOR.name !\u003d executor_name"},{"line_number":1277,"context_line":"    ):"},{"line_number":1278,"context_line":"        if concurrency_mode_threading():"}],"source_content_type":"text/x-python","patch_set":8,"id":"b5db646f_22985f2c","line":1275,"in_reply_to":"858df05e_9134b07f","updated":"2025-05-08 15:03:34.000000000","message":"Acknowledged","commit_id":"1e0608a581e2667ec38f98c75cf5fc59ade9f502"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"8926850ba6f8d04ef25643e844df70f9c1a5418e","unresolved":true,"context_lines":[{"line_number":1273,"context_line":"        # avoid working with the wrong internal state (i.e. number of workers"},{"line_number":1274,"context_line":"        # idle in the pool). A long therm solution would be to use os.spawn"},{"line_number":1275,"context_line":"        # instead of os.fork for the workers."},{"line_number":1276,"context_line":"        SCATTER_GATHER_EXECUTOR.name !\u003d executor_name"},{"line_number":1277,"context_line":"    ):"},{"line_number":1278,"context_line":"        if concurrency_mode_threading():"},{"line_number":1279,"context_line":"            SCATTER_GATHER_EXECUTOR \u003d futurist.ThreadPoolExecutor("}],"source_content_type":"text/x-python","patch_set":8,"id":"89e2cc49_cbe0631f","line":1276,"updated":"2025-05-06 14:28:03.000000000","message":"The docs say that \"multiple processes may be given the same name\" so I\u0027m not sure this is really the best way to do this:\n\nhttps://docs.python.org/3/library/multiprocessing.html#multiprocessing.Process.name\n\nI know we\u0027re probably not having clashes the way this is getting used, but it seems rather weak to me.\n\nThe other solution here is to make sure we don\u0027t ever initialize this before the `fork()` for our worker processes, right? Where is this being done so early?","commit_id":"1e0608a581e2667ec38f98c75cf5fc59ade9f502"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"975b8219eea8896108bf999cc43ea365c8803590","unresolved":true,"context_lines":[{"line_number":1273,"context_line":"        # avoid working with the wrong internal state (i.e. number of workers"},{"line_number":1274,"context_line":"        # idle in the pool). A long therm solution would be to use os.spawn"},{"line_number":1275,"context_line":"        # instead of os.fork for the workers."},{"line_number":1276,"context_line":"        SCATTER_GATHER_EXECUTOR.name !\u003d executor_name"},{"line_number":1277,"context_line":"    ):"},{"line_number":1278,"context_line":"        if concurrency_mode_threading():"},{"line_number":1279,"context_line":"            SCATTER_GATHER_EXECUTOR \u003d futurist.ThreadPoolExecutor("}],"source_content_type":"text/x-python","patch_set":8,"id":"f19c4a21_7936f43f","line":1276,"in_reply_to":"89e2cc49_cbe0631f","updated":"2025-05-07 12:23:32.000000000","message":"The forking happens at[1].\n\nSo anything that uses extra threads in Service.create() will initialize the pool. E.g. the minimum service version check in [2]. Moving that check from the master process (pre-fork) to the worker processes (post-fork) would:\n* increase the number of minimum version checks we run, from 1 to number of workers\n* require a way for the worker to signal the master that the worker exited due to an unrecoverable error, so the master should not just recreate the worker blindly (this is the current hardcoded behavior).\n\nWe discussed on IRC that an alternative would be to de-init the pools just before the forking making the startup state of the workers clean. I will try that now, but I expect it will be a bit cleaner than the current post-fork re-init.\n\n[1]https://github.com/openstack/nova/blob/a106f25e8e45eb9f69c6dc6313500c3dac186bc5/nova/cmd/scheduler.py#L51\n\n[2]https://github.com/openstack/nova/blob/a106f25e8e45eb9f69c6dc6313500c3dac186bc5/nova/service.py#L258","commit_id":"1e0608a581e2667ec38f98c75cf5fc59ade9f502"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"0adb295dbf271ba2c64b9310529de1824529b909","unresolved":false,"context_lines":[{"line_number":1273,"context_line":"        # avoid working with the wrong internal state (i.e. number of workers"},{"line_number":1274,"context_line":"        # idle in the pool). A long therm solution would be to use os.spawn"},{"line_number":1275,"context_line":"        # instead of os.fork for the workers."},{"line_number":1276,"context_line":"        SCATTER_GATHER_EXECUTOR.name !\u003d executor_name"},{"line_number":1277,"context_line":"    ):"},{"line_number":1278,"context_line":"        if concurrency_mode_threading():"},{"line_number":1279,"context_line":"            SCATTER_GATHER_EXECUTOR \u003d futurist.ThreadPoolExecutor("}],"source_content_type":"text/x-python","patch_set":8,"id":"b97d1faa_a63af45d","line":1276,"in_reply_to":"f19c4a21_7936f43f","updated":"2025-05-08 15:03:34.000000000","message":"Done","commit_id":"1e0608a581e2667ec38f98c75cf5fc59ade9f502"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"0adb295dbf271ba2c64b9310529de1824529b909","unresolved":true,"context_lines":[{"line_number":1283,"context_line":"    if SCATTER_GATHER_EXECUTOR:"},{"line_number":1284,"context_line":"        SCATTER_GATHER_EXECUTOR.shutdown()"},{"line_number":1285,"context_line":""},{"line_number":1286,"context_line":"    SCATTER_GATHER_EXECUTOR \u003d None"}],"source_content_type":"text/x-python","patch_set":9,"id":"dd7fc7e3_f4e926fb","line":1286,"updated":"2025-05-08 15:03:34.000000000","message":"This is only called in one place now, so maybe not a big deal, but I wonder if we might want to just log (even debug) here so that we\u0027ll notice if this ever gets called from a place we don\u0027t expect it.","commit_id":"fbdf6b7fdb94d627cc5ac7670f02fe450d65eb10"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"43e4ef2563f37a7912adef63bcf554b1ef737eac","unresolved":true,"context_lines":[{"line_number":1283,"context_line":"    if SCATTER_GATHER_EXECUTOR:"},{"line_number":1284,"context_line":"        SCATTER_GATHER_EXECUTOR.shutdown()"},{"line_number":1285,"context_line":""},{"line_number":1286,"context_line":"    SCATTER_GATHER_EXECUTOR \u003d None"}],"source_content_type":"text/x-python","patch_set":9,"id":"ccf56fa7_6d5ab1df","line":1286,"in_reply_to":"31015bb6_a4e7e817","updated":"2025-05-09 07:20:15.000000000","message":"Adding logging to the destroy make sense. I will do it.\n\nAbout the functools.cache I think an explicit singleton is better than an implicit one hiding in a cache implementation. So I would rather keep things explicit. \n\nWhat I\u0027m wondering instead is that we will have multiple pools and each pool needs this kind of destroyable singleton behavior. So I will experiment with an abstraction around our global pools to have the common creation and destroy logic not repeated, and having a single call to destroy *all* pools pre-fork to avoid missing a destroy call.","commit_id":"fbdf6b7fdb94d627cc5ac7670f02fe450d65eb10"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"e22f63c2f6bf0d698c0cbcc539adda968cc278f4","unresolved":false,"context_lines":[{"line_number":1283,"context_line":"    if SCATTER_GATHER_EXECUTOR:"},{"line_number":1284,"context_line":"        SCATTER_GATHER_EXECUTOR.shutdown()"},{"line_number":1285,"context_line":""},{"line_number":1286,"context_line":"    SCATTER_GATHER_EXECUTOR \u003d None"}],"source_content_type":"text/x-python","patch_set":9,"id":"a9660ee3_49fc98fc","line":1286,"in_reply_to":"9ae2f266_581eaf60","updated":"2025-05-13 08:00:22.000000000","message":"I did some local back an forth about an abstraction around our pools to have a common destroy logic but none of them seemed simpler / cleaner than what we have here now. So for now I will keep it as is and focus on cleaning up the patch series  now.","commit_id":"fbdf6b7fdb94d627cc5ac7670f02fe450d65eb10"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"e71b1685720876d51b1a0d1f4c03b50fdc88b7b1","unresolved":true,"context_lines":[{"line_number":1283,"context_line":"    if SCATTER_GATHER_EXECUTOR:"},{"line_number":1284,"context_line":"        SCATTER_GATHER_EXECUTOR.shutdown()"},{"line_number":1285,"context_line":""},{"line_number":1286,"context_line":"    SCATTER_GATHER_EXECUTOR \u003d None"}],"source_content_type":"text/x-python","patch_set":9,"id":"9ae2f266_581eaf60","line":1286,"in_reply_to":"ccf56fa7_6d5ab1df","updated":"2025-05-09 14:41:13.000000000","message":"Agree, explicit is better than implicit for something as important as a threadpool.","commit_id":"fbdf6b7fdb94d627cc5ac7670f02fe450d65eb10"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"f262b01b81a7c836294ea1d4db874fff3005d3ab","unresolved":true,"context_lines":[{"line_number":1283,"context_line":"    if SCATTER_GATHER_EXECUTOR:"},{"line_number":1284,"context_line":"        SCATTER_GATHER_EXECUTOR.shutdown()"},{"line_number":1285,"context_line":""},{"line_number":1286,"context_line":"    SCATTER_GATHER_EXECUTOR \u003d None"}],"source_content_type":"text/x-python","patch_set":9,"id":"31015bb6_a4e7e817","line":1286,"in_reply_to":"dd7fc7e3_f4e926fb","updated":"2025-05-08 15:19:18.000000000","message":"ya perhaps. \n\nthe other thing im wondering is if we are going to have more of tese manualy implementign singletons is proably not the best long germ approch\n\nwe coudl instead drop the global and just have a function that returns a thread pool executor then apply memorize/cache decorator to that so it always returns the same one then we can just reset the cash.\n\nso https://docs.python.org/3/library/functools.html#functools.cache\n\nthe wrapped fucntion has a  cache_clear() \n\nso you woudl instead drop all SCATTER_GATHER_EXECUTOR\n\nand do \n```\n@functools.cache\ndef get_scatter_gather_executor():\n    if concurrency_mode_threading():\n        return futurist.ThreadPoolExecutor(CONF.scatter_gather_thread_pool_size)\n    else:\n        return futurist.GreenThreadPoolExecutor()\n```\n\nand instead fo calling  `nova.utils.destroy_scatter_gather_executor()`\n\nwe jsut do `nova.utils.get_scatter_gather_executorcache_clear()`\n\nwe can add logging and the executor_name stuff too if we want too.\n\ni think that will be cleaner in teh long run.","commit_id":"fbdf6b7fdb94d627cc5ac7670f02fe450d65eb10"},{"author":{"_account_id":16207,"name":"ribaudr","display_name":"uggla","email":"rene.ribaud@gmail.com","username":"uggla","status":"Red Hat"},"change_message_id":"2304e23b537ea30754b1962ccc465ac51dd971f0","unresolved":true,"context_lines":[{"line_number":1260,"context_line":"    running in Eventlet mode"},{"line_number":1261,"context_line":"    \"\"\""},{"line_number":1262,"context_line":"    from nova import monkey_patch"},{"line_number":1263,"context_line":"    return not monkey_patch.is_patched()"},{"line_number":1264,"context_line":""},{"line_number":1265,"context_line":""},{"line_number":1266,"context_line":"SCATTER_GATHER_EXECUTOR \u003d None"}],"source_content_type":"text/x-python","patch_set":16,"id":"f8666889_6d2f1bec","line":1263,"range":{"start_line":1263,"start_character":15,"end_line":1263,"end_character":40},"updated":"2025-06-04 14:17:41.000000000","message":"Note for myself:\nmonkey_path is the trigger if used then we use greenthread (eventlet).","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"f2a5c8bb96ff6aec2e830b9bd338477a0faf5f53","unresolved":false,"context_lines":[{"line_number":1260,"context_line":"    running in Eventlet mode"},{"line_number":1261,"context_line":"    \"\"\""},{"line_number":1262,"context_line":"    from nova import monkey_patch"},{"line_number":1263,"context_line":"    return not monkey_patch.is_patched()"},{"line_number":1264,"context_line":""},{"line_number":1265,"context_line":""},{"line_number":1266,"context_line":"SCATTER_GATHER_EXECUTOR \u003d None"}],"source_content_type":"text/x-python","patch_set":16,"id":"ce2f3a8f_dadb1ecd","line":1263,"range":{"start_line":1263,"start_character":15,"end_line":1263,"end_character":40},"in_reply_to":"f8666889_6d2f1bec","updated":"2025-06-05 13:05:10.000000000","message":"correct.","commit_id":"0b875114d8afa1f7d85e992e4cba00a75756f725"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"1bdd520d2ab15f92773b9e2f46cd81f48f106bbf","unresolved":true,"context_lines":[{"line_number":652,"context_line":"    return trace_info"},{"line_number":653,"context_line":""},{"line_number":654,"context_line":""},{"line_number":655,"context_line":"def pass_context_wrapper(func):"},{"line_number":656,"context_line":"    \"\"\"Generalised passthrough method"},{"line_number":657,"context_line":"    It will grab the context from the threadlocal store and add it to"},{"line_number":658,"context_line":"    the store on the new thread.  This allows for continuity in logging the"}],"source_content_type":"text/x-python","patch_set":17,"id":"255480aa_92864e39","line":655,"updated":"2025-06-23 14:55:29.000000000","message":"I tried hard to see *why* you need to split pass_context() in twice and have another public method that was wrapped but I guess I\u0027ll understand it later in the series.","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"98efe15de9efb809fed5d37b1fd8b5ddbf7ec33c","unresolved":true,"context_lines":[{"line_number":652,"context_line":"    return trace_info"},{"line_number":653,"context_line":""},{"line_number":654,"context_line":""},{"line_number":655,"context_line":"def pass_context_wrapper(func):"},{"line_number":656,"context_line":"    \"\"\"Generalised passthrough method"},{"line_number":657,"context_line":"    It will grab the context from the threadlocal store and add it to"},{"line_number":658,"context_line":"    the store on the new thread.  This allows for continuity in logging the"}],"source_content_type":"text/x-python","patch_set":17,"id":"ba828278_cfdeedb3","line":655,"in_reply_to":"255480aa_92864e39","updated":"2025-06-25 14:23:45.000000000","message":"this becomes private again later but i think the current series of patches make sesne.\n\nthis is mroe a transtionaty state to allow the commtis to be somewhat self contianed.","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"886c4d76f603dc32e67312eac9213557c8751c6d","unresolved":false,"context_lines":[{"line_number":652,"context_line":"    return trace_info"},{"line_number":653,"context_line":""},{"line_number":654,"context_line":""},{"line_number":655,"context_line":"def pass_context_wrapper(func):"},{"line_number":656,"context_line":"    \"\"\"Generalised passthrough method"},{"line_number":657,"context_line":"    It will grab the context from the threadlocal store and add it to"},{"line_number":658,"context_line":"    the store on the new thread.  This allows for continuity in logging the"}],"source_content_type":"text/x-python","patch_set":17,"id":"0e5d255c_e941fa59","line":655,"in_reply_to":"9858e8fe_4aa6493a","updated":"2025-06-25 17:09:29.000000000","message":"yepp as Sean said, it is a transitional thing so that I can do the refactor in baby steps.","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"eea7031dfa59be923abb40e1a41689b45fa66da3","unresolved":false,"context_lines":[{"line_number":652,"context_line":"    return trace_info"},{"line_number":653,"context_line":""},{"line_number":654,"context_line":""},{"line_number":655,"context_line":"def pass_context_wrapper(func):"},{"line_number":656,"context_line":"    \"\"\"Generalised passthrough method"},{"line_number":657,"context_line":"    It will grab the context from the threadlocal store and add it to"},{"line_number":658,"context_line":"    the store on the new thread.  This allows for continuity in logging the"}],"source_content_type":"text/x-python","patch_set":17,"id":"9858e8fe_4aa6493a","line":655,"in_reply_to":"ba828278_cfdeedb3","updated":"2025-06-25 14:43:42.000000000","message":"Acknowledged","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"1bdd520d2ab15f92773b9e2f46cd81f48f106bbf","unresolved":false,"context_lines":[{"line_number":1260,"context_line":"    running in Eventlet mode"},{"line_number":1261,"context_line":"    \"\"\""},{"line_number":1262,"context_line":"    from nova import monkey_patch"},{"line_number":1263,"context_line":"    return not monkey_patch.is_patched()"},{"line_number":1264,"context_line":""},{"line_number":1265,"context_line":""},{"line_number":1266,"context_line":"SCATTER_GATHER_EXECUTOR \u003d None"}],"source_content_type":"text/x-python","patch_set":17,"id":"192c06e8_702696a7","line":1263,"updated":"2025-06-23 14:55:29.000000000","message":"I like that signal, less risky than checking an opt value.","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"1bdd520d2ab15f92773b9e2f46cd81f48f106bbf","unresolved":false,"context_lines":[{"line_number":1266,"context_line":"SCATTER_GATHER_EXECUTOR \u003d None"},{"line_number":1267,"context_line":""},{"line_number":1268,"context_line":""},{"line_number":1269,"context_line":"def get_scatter_gather_executor():"},{"line_number":1270,"context_line":"    \"\"\"Returns the executor used for scatter/gather operations.\"\"\""},{"line_number":1271,"context_line":"    global SCATTER_GATHER_EXECUTOR"},{"line_number":1272,"context_line":""}],"source_content_type":"text/x-python","patch_set":17,"id":"104b9440_c9d5b971","line":1269,"updated":"2025-06-23 14:55:29.000000000","message":"that\u0027s a good Facade","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"1bdd520d2ab15f92773b9e2f46cd81f48f106bbf","unresolved":true,"context_lines":[{"line_number":1296,"context_line":"        LOG.info("},{"line_number":1297,"context_line":"            \"The cell worker thread pool %s is shutting down\","},{"line_number":1298,"context_line":"            SCATTER_GATHER_EXECUTOR.name)"},{"line_number":1299,"context_line":"        SCATTER_GATHER_EXECUTOR.shutdown()"},{"line_number":1300,"context_line":"        LOG.info("},{"line_number":1301,"context_line":"            \"The cell worker thread pool %s is closed\","},{"line_number":1302,"context_line":"            SCATTER_GATHER_EXECUTOR.name)"}],"source_content_type":"text/x-python","patch_set":17,"id":"81a023a5_97fe29cc","line":1299,"updated":"2025-06-23 14:55:29.000000000","message":"we could wrap a RunTime error check here, as shutdown() could raise this exception if we submit a new call to the Executor, but given we unset the global var, there are no ways to call it again.","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"886c4d76f603dc32e67312eac9213557c8751c6d","unresolved":false,"context_lines":[{"line_number":1296,"context_line":"        LOG.info("},{"line_number":1297,"context_line":"            \"The cell worker thread pool %s is shutting down\","},{"line_number":1298,"context_line":"            SCATTER_GATHER_EXECUTOR.name)"},{"line_number":1299,"context_line":"        SCATTER_GATHER_EXECUTOR.shutdown()"},{"line_number":1300,"context_line":"        LOG.info("},{"line_number":1301,"context_line":"            \"The cell worker thread pool %s is closed\","},{"line_number":1302,"context_line":"            SCATTER_GATHER_EXECUTOR.name)"}],"source_content_type":"text/x-python","patch_set":17,"id":"478db0b6_7ef11302","line":1299,"in_reply_to":"81a023a5_97fe29cc","updated":"2025-06-25 17:09:29.000000000","message":"the exception would be raised to the caller of submit not the caller of shutdown:\n\n\u003e Signal the executor that it should free any resources that it is using when the currently pending futures are done executing. Calls to Executor.submit() and Executor.map() made after shutdown will raise RuntimeError.\n\nI consider that exception raised as a software error that should be fixed.","commit_id":"2275b8545e732b8bf29433a6a9cbcf00f4afad34"}]}
