)]}'
{"/PATCHSET_LEVEL":[{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"210a26f39c96b2676f871658c09bbc3be9db976d","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"49ef429f_01dc325a","updated":"2025-07-15 12:36:19.000000000","message":"I feel like I missunderstood the whole code here so far.","commit_id":"a22caeb665f552566b514e5d3f3fc5a258d658d5"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"24c01978af0d2ac57fe6ff9f95fc79ff420614f4","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"bc8d9d45_2e9370ac","updated":"2025-07-16 11:59:44.000000000","message":"Thanks Dan","commit_id":"a22caeb665f552566b514e5d3f3fc5a258d658d5"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"20453f8436581e6e19f54435525d45fa3108e068","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"790c8e26_eb882513","updated":"2025-07-22 14:28:01.000000000","message":"Fixed the functional test - the fixture was just doing the wrong comparison to mock out a thing.","commit_id":"1ef5a120a578fdb37f4006140c4b0823d93659a8"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"70ab66330be2d96ffb9495abe3ef560866e4ed97","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"5076ce4e_3b281bfc","updated":"2025-07-21 15:23:44.000000000","message":"functional failures seems to be relevant","commit_id":"1ef5a120a578fdb37f4006140c4b0823d93659a8"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"03455745d0a8961cc603f4801f5a93e4bb748917","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":5,"id":"442d065e_8f113420","updated":"2025-07-22 14:31:31.000000000","message":"It is OK to me if the CI agrees.","commit_id":"4f95cbaf969f024f3f8aaf8405392df644934b56"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"5b15cd550e8d6033dece85d49f210e1e3becb500","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":5,"id":"891a341c_b709965b","updated":"2025-07-23 14:32:56.000000000","message":"Sylvain, can you review and +W this? It\u0027s +2 from gibi...","commit_id":"4f95cbaf969f024f3f8aaf8405392df644934b56"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"47837952265fa39decfef405bece4f910435ec4d","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":5,"id":"0c69ba85_f72f01a8","updated":"2025-09-15 15:07:27.000000000","message":"Took me a while to review it decently, but I think this is OK, AFAICS, we continue to have the same behaviour.\n\nGiven the jobs tell me it\u0027s fine, +W.","commit_id":"4f95cbaf969f024f3f8aaf8405392df644934b56"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c07c38e88ee4d0163dbdce34195d2f501d6bb44a","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":5,"id":"f1bbc57a_d77210ad","updated":"2025-07-22 16:55:23.000000000","message":"recheck unrelated failure in post","commit_id":"4f95cbaf969f024f3f8aaf8405392df644934b56"}],"nova/compute/multi_cell_list.py":[{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"210a26f39c96b2676f871658c09bbc3be9db976d","unresolved":true,"context_lines":[{"line_number":204,"context_line":"        \"\"\""},{"line_number":205,"context_line":"        try:"},{"line_number":206,"context_line":"            for record in fn(ctx, *args, **kwargs):"},{"line_number":207,"context_line":"                if self.sort_ctx.timeout_expired:"},{"line_number":208,"context_line":"                    raise exception.CellTimeout()"},{"line_number":209,"context_line":"                yield record"},{"line_number":210,"context_line":"        except exception.CellTimeout:"},{"line_number":211,"context_line":"            # Here, we yield a RecordWrapper (no sort_ctx needed since"},{"line_number":212,"context_line":"            # we won\u0027t call into the implementation\u0027s comparison routines)"}],"source_content_type":"text/x-python","patch_set":2,"id":"776a64f1_4ba02236","line":209,"range":{"start_line":207,"start_character":0,"end_line":209,"end_character":28},"updated":"2025-07-15 12:36:19.000000000","message":"What if we check the time, see that we are not timed out yet, go to yield, pass the value and stop there as the other side will never call next() on this generator again, i.e. it was done collecting enough records to fulfill the API request. I guess then we have this generator here never finishing.\n```\n\n\u003e\u003e\u003e def g():\n...     for i in range(10):\n...         print(\"before yielding\", i)\n...         yield i\n...         print(\"after yielding\", i)\n...         \n\u003e\u003e\u003e gen \u003d g()\n\u003e\u003e\u003e gen\n\u003cgenerator object g at 0x7f161112da40\u003e\n\u003e\u003e\u003e next(gen)\nbefore yielding 0\n0\n\u003e\u003e\u003e print(next(gen))\nafter yielding 0\nbefore yielding 1\n1\n```\n\nSo we should see hanging (green) threads in such case.\n\n// later\n\nWe don\u0027t see hanging threads because:\n\nBased on my debugging sessions this code is not using the concurrent tasks in separate (green) threads in scatter gather to run anything substantial concurrently. The scatter gather finishes immediately returning a generator for each cell and then the caller thread will consume those generators within the same single caller thread. No parallel activity happens during consumption at all the implementation of each generator runs in single caller thread.\n\nTo prove it you can look at what the function passed to scatter gather returns when called by the separate (green) thread. The self.query_wrapper is the task function run in the separate thread per cell. When that is called it returns an initialized generator. At this point the implementation of the generator did not run at all. That implementation will only run to the first yield statement *after* the first next() is called on it. That call happens after the scatter gather on the caller side running single threadedly.\n\n--\n\nFrom the perspective of performance I don\u0027t see why would we need to use scatter gather here at all to get those generators one per cell concurrently. That is not a performance sensitive operation at all. So I suggest to factor scatter gather out from this code. It is unnecessary complexity. The scatter gather here can be replaced with a single for loop doing the generator creation for each cell.\n\n--\n\nSo we have a list of generators merged by the caller thread. And we want to have a timeout for that sequential algorithm. In each step that algorithm consumes an item from one of the generators (it has a lookahead to decide from which one based on sorting the top of each generator I assume). So either the generator has the item to yield already loaded from the DB then it is a fast operator to yield and we don\u0027t care about from timeout perspective. Or the generator needs to go to the DB to load the next batch of records which can take excessive time. The proposed \"remaining-time-accounting\" cannot interrupt this operation while I assume the eventlet.Timeout based one could by raising from the socket.read(). Our proposal for the threading mode is to set DB client timeout in the DB URL that will basically raise from the same socket.read(). But it will be a per DB operation timeout not one global one for the whole get_instances_sorted() call. If we want to mimic that then we can spawn the get_instances_sorted() call to a thread and use future.wait(CELL_TIMEOUT) to timeout on the caller side.\nNote that even in the baseline solution with eventlet.Timeout. When the timeout is reached then it stopped all the still active generators in the list we merge. So it wasn\u0027t just stopping one cell but stopping the whole get_instances_sorted() anyhow.","commit_id":"a22caeb665f552566b514e5d3f3fc5a258d658d5"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"24c01978af0d2ac57fe6ff9f95fc79ff420614f4","unresolved":true,"context_lines":[{"line_number":204,"context_line":"        \"\"\""},{"line_number":205,"context_line":"        try:"},{"line_number":206,"context_line":"            for record in fn(ctx, *args, **kwargs):"},{"line_number":207,"context_line":"                if self.sort_ctx.timeout_expired:"},{"line_number":208,"context_line":"                    raise exception.CellTimeout()"},{"line_number":209,"context_line":"                yield record"},{"line_number":210,"context_line":"        except exception.CellTimeout:"},{"line_number":211,"context_line":"            # Here, we yield a RecordWrapper (no sort_ctx needed since"},{"line_number":212,"context_line":"            # we won\u0027t call into the implementation\u0027s comparison routines)"}],"source_content_type":"text/x-python","patch_set":2,"id":"9557b0a5_91196d8d","line":209,"range":{"start_line":207,"start_character":0,"end_line":209,"end_character":28},"in_reply_to":"7244fd36_560b8b21","updated":"2025-07-16 11:59:44.000000000","message":"I agree in both fronts.\n1. Lets make it a proper single consumer multiple producer (green)threads solution independently\n2. Merge the current change as the behavior is not worse than the baseline but it is one less eventlet dependency","commit_id":"a22caeb665f552566b514e5d3f3fc5a258d658d5"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"e3814223e8f9e7809a6cdcaf60e8f796b782f433","unresolved":true,"context_lines":[{"line_number":204,"context_line":"        \"\"\""},{"line_number":205,"context_line":"        try:"},{"line_number":206,"context_line":"            for record in fn(ctx, *args, **kwargs):"},{"line_number":207,"context_line":"                if self.sort_ctx.timeout_expired:"},{"line_number":208,"context_line":"                    raise exception.CellTimeout()"},{"line_number":209,"context_line":"                yield record"},{"line_number":210,"context_line":"        except exception.CellTimeout:"},{"line_number":211,"context_line":"            # Here, we yield a RecordWrapper (no sort_ctx needed since"},{"line_number":212,"context_line":"            # we won\u0027t call into the implementation\u0027s comparison routines)"}],"source_content_type":"text/x-python","patch_set":2,"id":"db9e936f_484ff325","line":209,"range":{"start_line":207,"start_character":0,"end_line":209,"end_character":28},"in_reply_to":"776a64f1_4ba02236","updated":"2025-07-15 13:04:28.000000000","message":"This shows that scatter gather finishes very early regardless of the actual DB reads. https://paste.opendev.org/show/bpOPl7dNX1kZchtrqxEQ/","commit_id":"a22caeb665f552566b514e5d3f3fc5a258d658d5"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"20453f8436581e6e19f54435525d45fa3108e068","unresolved":false,"context_lines":[{"line_number":204,"context_line":"        \"\"\""},{"line_number":205,"context_line":"        try:"},{"line_number":206,"context_line":"            for record in fn(ctx, *args, **kwargs):"},{"line_number":207,"context_line":"                if self.sort_ctx.timeout_expired:"},{"line_number":208,"context_line":"                    raise exception.CellTimeout()"},{"line_number":209,"context_line":"                yield record"},{"line_number":210,"context_line":"        except exception.CellTimeout:"},{"line_number":211,"context_line":"            # Here, we yield a RecordWrapper (no sort_ctx needed since"},{"line_number":212,"context_line":"            # we won\u0027t call into the implementation\u0027s comparison routines)"}],"source_content_type":"text/x-python","patch_set":2,"id":"28599b68_8d69bff6","line":209,"range":{"start_line":207,"start_character":0,"end_line":209,"end_character":28},"in_reply_to":"9557b0a5_91196d8d","updated":"2025-07-22 14:28:01.000000000","message":"Done","commit_id":"a22caeb665f552566b514e5d3f3fc5a258d658d5"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"9ab0e05d7aebbd304321a0da1def16f6ee34cf06","unresolved":true,"context_lines":[{"line_number":204,"context_line":"        \"\"\""},{"line_number":205,"context_line":"        try:"},{"line_number":206,"context_line":"            for record in fn(ctx, *args, **kwargs):"},{"line_number":207,"context_line":"                if self.sort_ctx.timeout_expired:"},{"line_number":208,"context_line":"                    raise exception.CellTimeout()"},{"line_number":209,"context_line":"                yield record"},{"line_number":210,"context_line":"        except exception.CellTimeout:"},{"line_number":211,"context_line":"            # Here, we yield a RecordWrapper (no sort_ctx needed since"},{"line_number":212,"context_line":"            # we won\u0027t call into the implementation\u0027s comparison routines)"}],"source_content_type":"text/x-python","patch_set":2,"id":"7244fd36_560b8b21","line":209,"range":{"start_line":207,"start_character":0,"end_line":209,"end_character":28},"in_reply_to":"db9e936f_484ff325","updated":"2025-07-15 15:20:34.000000000","message":"Hmm, yeah I see. So, when we/I first wrote this, it was because CERN was complaining about list performance across their 80+ geo-distributed cells. The design here, as you can see, was intending to parallelize the queries (and re-queries) and avoid what we were doing before which was basically huge sequential high-latency operation. They were happy with this and reported a big performance gain, but perhaps that was incidental due to just the more efficient behavior in general. I might also go back and look at the state of the code at the time and see if something has changed.\n\nI think what we need to do here is not go back to making this just dumbly sequential but make this a proper producer-consumer loop, where we spawn these out in threads and let them feed queues until signaled to stop (i.e. what this code is trying to do). I can do some thinking on that.\n\nShould we go ahead with this patch just for the de-eventlet-ification aspect of it to remove one more use/import? AFAICT, it does what we have had before, but without eventlet, which is sort of what we\u0027re going for here. I\u0027d definitely want to take more care with any sort of refactor in such a critical part of nova...","commit_id":"a22caeb665f552566b514e5d3f3fc5a258d658d5"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"47837952265fa39decfef405bece4f910435ec4d","unresolved":false,"context_lines":[{"line_number":204,"context_line":"        \"\"\""},{"line_number":205,"context_line":"        try:"},{"line_number":206,"context_line":"            for record in fn(ctx, *args, **kwargs):"},{"line_number":207,"context_line":"                if self.sort_ctx.timeout_expired:"},{"line_number":208,"context_line":"                    raise exception.CellTimeout()"},{"line_number":209,"context_line":"                yield record"},{"line_number":210,"context_line":"        except exception.CellTimeout:"}],"source_content_type":"text/x-python","patch_set":5,"id":"dc2aaafb_1ac18055","line":207,"updated":"2025-09-15 15:07:27.000000000","message":"ok, so we get the right value here","commit_id":"4f95cbaf969f024f3f8aaf8405392df644934b56"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"47837952265fa39decfef405bece4f910435ec4d","unresolved":false,"context_lines":[{"line_number":205,"context_line":"        try:"},{"line_number":206,"context_line":"            for record in fn(ctx, *args, **kwargs):"},{"line_number":207,"context_line":"                if self.sort_ctx.timeout_expired:"},{"line_number":208,"context_line":"                    raise exception.CellTimeout()"},{"line_number":209,"context_line":"                yield record"},{"line_number":210,"context_line":"        except exception.CellTimeout:"},{"line_number":211,"context_line":"            # Here, we yield a RecordWrapper (no sort_ctx needed since"}],"source_content_type":"text/x-python","patch_set":5,"id":"e0ee0a78_345b8c2c","line":208,"updated":"2025-09-15 15:07:27.000000000","message":"I see, here we generate the exception, while eventlet was automatically raising it.","commit_id":"4f95cbaf969f024f3f8aaf8405392df644934b56"}],"nova/tests/unit/compute/test_multi_cell_list.py":[{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"9b2a64b9aa9794280b7a5813788438425ba5ff76","unresolved":true,"context_lines":[{"line_number":29,"context_line":"        super(TestUtils, self).setUp()"},{"line_number":30,"context_line":"        self._cells \u003d [objects.CellMapping(uuid\u003dgetattr(uuids, \u0027cell%i\u0027 % i),"},{"line_number":31,"context_line":"                                    name\u003d\u0027cell%i\u0027 % i)"},{"line_number":32,"context_line":"                       for i in range(0, 10)]"},{"line_number":33,"context_line":""},{"line_number":34,"context_line":"    def test_compare_simple(self):"},{"line_number":35,"context_line":"        dt1 \u003d datetime.datetime(2015, 11, 5, 20, 30, 00)"}],"source_content_type":"text/x-python","patch_set":1,"id":"166c2e4c_89f1f311","line":32,"updated":"2025-07-14 18:16:08.000000000","message":"Oops, this is left over from a previous refactor attempt and not required.","commit_id":"fc1a1f8a024b9202aabbf5a6c4112e500b612dbe"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"7457280f4c672516315f16b8fa65586ef635b99d","unresolved":false,"context_lines":[{"line_number":29,"context_line":"        super(TestUtils, self).setUp()"},{"line_number":30,"context_line":"        self._cells \u003d [objects.CellMapping(uuid\u003dgetattr(uuids, \u0027cell%i\u0027 % i),"},{"line_number":31,"context_line":"                                    name\u003d\u0027cell%i\u0027 % i)"},{"line_number":32,"context_line":"                       for i in range(0, 10)]"},{"line_number":33,"context_line":""},{"line_number":34,"context_line":"    def test_compare_simple(self):"},{"line_number":35,"context_line":"        dt1 \u003d datetime.datetime(2015, 11, 5, 20, 30, 00)"}],"source_content_type":"text/x-python","patch_set":1,"id":"3e9e18a1_3c75b647","line":32,"in_reply_to":"166c2e4c_89f1f311","updated":"2025-07-14 18:16:26.000000000","message":"Done","commit_id":"fc1a1f8a024b9202aabbf5a6c4112e500b612dbe"}]}
