)]}'
{"/PATCHSET_LEVEL":[{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"48414b61ac071ecb654a4439510b575e7f313320","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"3efc2289_ab7571ef","updated":"2026-04-17 14:43:24.000000000","message":"@whalbawi@nvidia.com I\u0027d be grateful if you could double-check my reasoning w.r.t. the implications for rate limiting. I\u0027m concerned that we\u0027re going to iterate many more locations via the rate limiter than before i.e. make slower progress.\n\nI wonder if we have test (or could add a test) that verifies the ratio of on disk locations to processed locations i.e. the degree of hash filtering?\n\nOtherwise, I like moving the hash filtering into the context of the relinker where it is more explicit/more obvious in the execution path.","commit_id":"488f8bd8d5a5f86dd699cb4889ca031fde18864d"},{"author":{"_account_id":38767,"name":"Wael Halbawi","display_name":"Wael Halbawi","email":"whalbawi@nvidia.com","username":"whalbawi"},"change_message_id":"744f7f1c9980e5172b9c0e4e2b2ab8069b0b0307","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"0dea6626_1237dc25","updated":"2026-04-20 21:35:48.000000000","message":"That\u0027s for the careful look @alistairncoles@gmail.com. I think we need to discuss a few things:\n1- Deeper analysis of no-op to op ratio computation and impact on rate limiting.\n2- Reinstating the hashes_filter hook (fixed, of course) to keep this change small.\n3- Adjust the rate-limiting system to be less sensitive to no-ops (possibly a bigger, later patch).","commit_id":"488f8bd8d5a5f86dd699cb4889ca031fde18864d"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"aed9e343c08c40170a55af77ef4838471bc185f2","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":3,"id":"3711a01e_26f45e5b","updated":"2026-04-28 13:57:48.000000000","message":"LGTM. Focussed bug fix with unit tests.\n\nI spotted another gap in relinker tests, but outside the scope of this patch, https://review.opendev.org/c/openstack/swift/+/986537","commit_id":"66af9d2ab0387d1e138b3f39aa4573678b05d0ab"}],"swift/cli/relinker.py":[{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"48414b61ac071ecb654a4439510b575e7f313320","unresolved":true,"context_lines":[{"line_number":392,"context_line":"            fname \u003d os.path.join(suff_path, hsh)"},{"line_number":393,"context_line":"            if fname \u003d\u003d replace_partition_in_path("},{"line_number":394,"context_line":"                    self.conf[\u0027devices\u0027], fname, self.next_part_power):"},{"line_number":395,"context_line":"                hashes.remove(hsh)"},{"line_number":396,"context_line":"        return hashes"},{"line_number":397,"context_line":""},{"line_number":398,"context_line":"    def do_relink(self, device, hash_path, new_hash_path, filename,"}],"source_content_type":"text/x-python","patch_set":1,"id":"9895d514_f8b345c2","side":"PARENT","line":395,"updated":"2026-04-17 14:43:24.000000000","message":"Once it is pointed out, this mutation-while-modifying is clearly wrong","commit_id":"aa186378d238e384e7898a4536b2b9db89690334"},{"author":{"_account_id":38767,"name":"Wael Halbawi","display_name":"Wael Halbawi","email":"whalbawi@nvidia.com","username":"whalbawi"},"change_message_id":"744f7f1c9980e5172b9c0e4e2b2ab8069b0b0307","unresolved":false,"context_lines":[{"line_number":392,"context_line":"            fname \u003d os.path.join(suff_path, hsh)"},{"line_number":393,"context_line":"            if fname \u003d\u003d replace_partition_in_path("},{"line_number":394,"context_line":"                    self.conf[\u0027devices\u0027], fname, self.next_part_power):"},{"line_number":395,"context_line":"                hashes.remove(hsh)"},{"line_number":396,"context_line":"        return hashes"},{"line_number":397,"context_line":""},{"line_number":398,"context_line":"    def do_relink(self, device, hash_path, new_hash_path, filename,"}],"source_content_type":"text/x-python","patch_set":1,"id":"0ed6b2b0_b8796e11","side":"PARENT","line":395,"in_reply_to":"9895d514_f8b345c2","updated":"2026-04-20 21:35:48.000000000","message":"Acknowledged","commit_id":"aa186378d238e384e7898a4536b2b9db89690334"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"48414b61ac071ecb654a4439510b575e7f313320","unresolved":true,"context_lines":[{"line_number":393,"context_line":"            if fname \u003d\u003d replace_partition_in_path("},{"line_number":394,"context_line":"                    self.conf[\u0027devices\u0027], fname, self.next_part_power):"},{"line_number":395,"context_line":"                hashes.remove(hsh)"},{"line_number":396,"context_line":"        return hashes"},{"line_number":397,"context_line":""},{"line_number":398,"context_line":"    def do_relink(self, device, hash_path, new_hash_path, filename,"},{"line_number":399,"context_line":"                  already_quarantined\u003dFalse):"}],"source_content_type":"text/x-python","patch_set":1,"id":"d9869622_cc560697","side":"PARENT","line":396,"updated":"2026-04-17 14:43:24.000000000","message":"It\u0027s perhaps interesting to note that hashes_filter does not have to return a list. The audit_location_generator says it should return a list but AFAICT the implementation only requires it to be an iterable, which would allow correct filtering without copying to another list here.","commit_id":"aa186378d238e384e7898a4536b2b9db89690334"},{"author":{"_account_id":38767,"name":"Wael Halbawi","display_name":"Wael Halbawi","email":"whalbawi@nvidia.com","username":"whalbawi"},"change_message_id":"744f7f1c9980e5172b9c0e4e2b2ab8069b0b0307","unresolved":false,"context_lines":[{"line_number":393,"context_line":"            if fname \u003d\u003d replace_partition_in_path("},{"line_number":394,"context_line":"                    self.conf[\u0027devices\u0027], fname, self.next_part_power):"},{"line_number":395,"context_line":"                hashes.remove(hsh)"},{"line_number":396,"context_line":"        return hashes"},{"line_number":397,"context_line":""},{"line_number":398,"context_line":"    def do_relink(self, device, hash_path, new_hash_path, filename,"},{"line_number":399,"context_line":"                  already_quarantined\u003dFalse):"}],"source_content_type":"text/x-python","patch_set":1,"id":"80edccbd_394eb688","side":"PARENT","line":396,"in_reply_to":"d9869622_cc560697","updated":"2026-04-20 21:35:48.000000000","message":"Acknowledged","commit_id":"aa186378d238e384e7898a4536b2b9db89690334"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"48414b61ac071ecb654a4439510b575e7f313320","unresolved":true,"context_lines":[{"line_number":645,"context_line":"            hook_post_partition\u003dself.hook_post_partition,"},{"line_number":646,"context_line":"            logger\u003dself.logger,"},{"line_number":647,"context_line":"            error_counter\u003daudit_stats,"},{"line_number":648,"context_line":"            yield_hash_dirs\u003dTrue"},{"line_number":649,"context_line":"        )"},{"line_number":650,"context_line":"        if self.conf[\u0027files_per_second\u0027] \u003e 0:"},{"line_number":651,"context_line":"            locations \u003d RateLimitedIterator("}],"source_content_type":"text/x-python","patch_set":1,"id":"01a64eea_da51681e","line":648,"range":{"start_line":648,"start_character":12,"end_line":648,"end_character":32},"updated":"2026-04-17 14:43:24.000000000","message":"Given that the generator is yielding hashes without doing any more work per-hash, I do find it more appealing to have the hash filtering be explicitly implemented in the context of the relinker rather than delegated to the generator.","commit_id":"488f8bd8d5a5f86dd699cb4889ca031fde18864d"},{"author":{"_account_id":38767,"name":"Wael Halbawi","display_name":"Wael Halbawi","email":"whalbawi@nvidia.com","username":"whalbawi"},"change_message_id":"744f7f1c9980e5172b9c0e4e2b2ab8069b0b0307","unresolved":false,"context_lines":[{"line_number":645,"context_line":"            hook_post_partition\u003dself.hook_post_partition,"},{"line_number":646,"context_line":"            logger\u003dself.logger,"},{"line_number":647,"context_line":"            error_counter\u003daudit_stats,"},{"line_number":648,"context_line":"            yield_hash_dirs\u003dTrue"},{"line_number":649,"context_line":"        )"},{"line_number":650,"context_line":"        if self.conf[\u0027files_per_second\u0027] \u003e 0:"},{"line_number":651,"context_line":"            locations \u003d RateLimitedIterator("}],"source_content_type":"text/x-python","patch_set":1,"id":"d3663653_9fa296c4","line":648,"range":{"start_line":648,"start_character":12,"end_line":648,"end_character":32},"in_reply_to":"01a64eea_da51681e","updated":"2026-04-20 21:35:48.000000000","message":"Acknowledged","commit_id":"488f8bd8d5a5f86dd699cb4889ca031fde18864d"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"48414b61ac071ecb654a4439510b575e7f313320","unresolved":true,"context_lines":[{"line_number":649,"context_line":"        )"},{"line_number":650,"context_line":"        if self.conf[\u0027files_per_second\u0027] \u003e 0:"},{"line_number":651,"context_line":"            locations \u003d RateLimitedIterator("},{"line_number":652,"context_line":"                locations, self.conf[\u0027files_per_second\u0027])"},{"line_number":653,"context_line":"        for hash_path, device, _ in locations:"},{"line_number":654,"context_line":"            self.process_location(device, hash_path)"},{"line_number":655,"context_line":""}],"source_content_type":"text/x-python","patch_set":1,"id":"c081644d_3f11fb57","line":652,"updated":"2026-04-17 14:43:24.000000000","message":"An alternative change would be to wrap locations in a filtering iterator prior to the rate limiter:\n\n```\ndef hash_filtering_iter(locations):\n    for hash_path, device, _ in locations:\n        new_hash_path \u003d replace_partition_in_path(\n            self.conf[\u0027devices\u0027], hash_path, self.next_part_power)\n    \tif new_hash_path !\u003d hash_path:\n    \t    yield device, hash_path, new_hash_path\n    \t    \n        \nlocations \u003d hash_filtering_iter(locations)\nif self.conf[\u0027files_per_second\u0027] \u003e 0:\n            locations \u003d RateLimitedIterator(\n                locations, self.conf[\u0027files_per_second\u0027])\n```\n\ndisclaimer: I didn\u0027t run that code ^^^^","commit_id":"488f8bd8d5a5f86dd699cb4889ca031fde18864d"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"48414b61ac071ecb654a4439510b575e7f313320","unresolved":true,"context_lines":[{"line_number":651,"context_line":"            locations \u003d RateLimitedIterator("},{"line_number":652,"context_line":"                locations, self.conf[\u0027files_per_second\u0027])"},{"line_number":653,"context_line":"        for hash_path, device, _ in locations:"},{"line_number":654,"context_line":"            self.process_location(device, hash_path)"},{"line_number":655,"context_line":""},{"line_number":656,"context_line":"        # any unmounted devices don\u0027t trigger the pre_device trigger."},{"line_number":657,"context_line":"        # so we\u0027ll deal with them here."}],"source_content_type":"text/x-python","patch_set":1,"id":"3fdb7c16_ebcd5a53","line":654,"updated":"2026-04-17 14:43:24.000000000","message":"The filtering that was in hashes_filter now happens after the rate limiting, so no-op locations now consume from ``files_per_second``.\n\nMy reasoning is that this is NOT significant for the relink step but could be significant for the cleanup step:\n\nIn the relink step:\n\n- the partitions_filter is already filtering out the upper half of the partition space where no relinking is expected. \n- in a lower half partition, each partition will *trend towards* having ~2/3 locations for which this is an old partition (so work to be done) and ~1/3 locations re-linked from a lower-numbered old partition for which this is the new partition (no work to be done).  \n- however, the partitions are processed in reverse order, so at the time of *processing a partition* it should have only not-yet-relinked locations, all of which need work to be done.\n\nIn the cleanup step:\n\n- all partitions will be included by the audit_location_generator\n- in the lower half partitions ~1/3 location will be in the correct partition (no work to be done)\n- in the upper half partitions all location should be in the correct partition (no work to be done)[*]\n- so there are approx 4/6 locations that on master would have been skipped *before* the rate limiter, but they will now consume from the rate limit.\n- BUT the hashes_filter bug meant that not all of the 4/6 locations were skipped in hashes_filter; maybe only half? still significant though.\n\nIf my reasoning is correct then previously the relinker would fly through the upper-half partitions during cleanup but will now be (unnecessarily) rate limited.\n\n[*] I wonder, why do we include these partitions for cleanup?","commit_id":"488f8bd8d5a5f86dd699cb4889ca031fde18864d"},{"author":{"_account_id":38767,"name":"Wael Halbawi","display_name":"Wael Halbawi","email":"whalbawi@nvidia.com","username":"whalbawi"},"change_message_id":"744f7f1c9980e5172b9c0e4e2b2ab8069b0b0307","unresolved":true,"context_lines":[{"line_number":651,"context_line":"            locations \u003d RateLimitedIterator("},{"line_number":652,"context_line":"                locations, self.conf[\u0027files_per_second\u0027])"},{"line_number":653,"context_line":"        for hash_path, device, _ in locations:"},{"line_number":654,"context_line":"            self.process_location(device, hash_path)"},{"line_number":655,"context_line":""},{"line_number":656,"context_line":"        # any unmounted devices don\u0027t trigger the pre_device trigger."},{"line_number":657,"context_line":"        # so we\u0027ll deal with them here."}],"source_content_type":"text/x-python","patch_set":1,"id":"e5aba78a_536bdf81","line":654,"in_reply_to":"3fdb7c16_ebcd5a53","updated":"2026-04-20 21:35:48.000000000","message":"I think we should talk about cleanup filtering first - \n`partitions_filter` should filter out the upper half during the cleanup phase when `prev_part_power` is set in `hook_pre_device`. Having said that, there is at least one case where `prev_part_power` does not get repopulated, e.g. when the state file does not exist.\n\nThe subsequent [patch](https://review.opendev.org/c/openstack/swift/+/966980/10/swift/cli/relinker.py#269) will fix partition filtering to always exclude the upper half during cleanup (and newly added audit).\n\nHaving said that, I do agree that no-ops eat from the rate limiter\u0027s budget, and the no-op to op ratio during the clean up phase is ~1/3 (at best). For the relink phase I think the ratio computed is a worst case upper bound.","commit_id":"488f8bd8d5a5f86dd699cb4889ca031fde18864d"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"aed9e343c08c40170a55af77ef4838471bc185f2","unresolved":false,"context_lines":[{"line_number":651,"context_line":"            locations \u003d RateLimitedIterator("},{"line_number":652,"context_line":"                locations, self.conf[\u0027files_per_second\u0027])"},{"line_number":653,"context_line":"        for hash_path, device, _ in locations:"},{"line_number":654,"context_line":"            self.process_location(device, hash_path)"},{"line_number":655,"context_line":""},{"line_number":656,"context_line":"        # any unmounted devices don\u0027t trigger the pre_device trigger."},{"line_number":657,"context_line":"        # so we\u0027ll deal with them here."}],"source_content_type":"text/x-python","patch_set":1,"id":"43f58e0d_d9952732","line":654,"in_reply_to":"e5aba78a_536bdf81","updated":"2026-04-28 13:57:48.000000000","message":"Done","commit_id":"488f8bd8d5a5f86dd699cb4889ca031fde18864d"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"aed9e343c08c40170a55af77ef4838471bc185f2","unresolved":true,"context_lines":[{"line_number":652,"context_line":"            yield_hash_dirs\u003dTrue"},{"line_number":653,"context_line":"        )"},{"line_number":654,"context_line":"        if self.conf[\u0027files_per_second\u0027] \u003e 0:"},{"line_number":655,"context_line":"            locations \u003d RateLimitedIterator("},{"line_number":656,"context_line":"                locations, self.conf[\u0027files_per_second\u0027])"},{"line_number":657,"context_line":"        for hash_path, device, _part_num in locations:"},{"line_number":658,"context_line":"            # note, in cleanup step next_part_power \u003d\u003d part_power"}],"source_content_type":"text/x-python","patch_set":3,"id":"328dfd56_4471ee77","line":655,"range":{"start_line":655,"start_character":12,"end_line":655,"end_character":21},"updated":"2026-04-28 13:57:48.000000000","message":"off-topic: while testing this patch I noticed that there is no unit test that verifies that *this* ``locations`` is used. Delete ``locations \u003d`` here and all tests still pass!\n\nSee https://review.opendev.org/c/openstack/swift/+/986537 for more test coverage.","commit_id":"66af9d2ab0387d1e138b3f39aa4573678b05d0ab"}],"test/unit/cli/test_relinker.py":[{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"aed9e343c08c40170a55af77ef4838471bc185f2","unresolved":true,"context_lines":[{"line_number":2628,"context_line":"        self.assertTrue(os.path.isfile(path1))"},{"line_number":2629,"context_line":"        self.assertTrue(os.path.isfile(path2))"},{"line_number":2630,"context_line":""},{"line_number":2631,"context_line":"    def test_cleanup_noops_rate_limiter(self):"},{"line_number":2632,"context_line":"        hash1 \u003d \"027ecbf9027e96e1fe83e6154e1a8380\""},{"line_number":2633,"context_line":"        hash2 \u003d \"024f19a8347495adc3cfa845f725e380\""},{"line_number":2634,"context_line":"        self.assertNotEqual(hash1, hash2)"}],"source_content_type":"text/x-python","patch_set":3,"id":"23deb7ed_8e2727f0","line":2631,"updated":"2026-04-28 13:57:48.000000000","message":"+1: this test fails when I revert the fix:\n\n```\nFAILED\ntest/unit/cli/test_relinker.py:2630 (TestRelinker.test_cleanup_noops_rate_limiter)\n[(\u0027/var/folders/5n/jty8zbg15rjfj5gqbkm3fkv00000gp/T/tmpskg3ek2q/node/sda1/objects/4/380/024f19a8347495adc3cfa845f725e380\u0027,\n  \u0027sda1\u0027,\n  \u00274\u0027)] !\u003d []\n```\n\n...which is telling me that previously we\u0027d erroneously process object files that were in the correct location","commit_id":"66af9d2ab0387d1e138b3f39aa4573678b05d0ab"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"aed9e343c08c40170a55af77ef4838471bc185f2","unresolved":true,"context_lines":[{"line_number":2644,"context_line":"        self.assertLess(part1, 2 ** (self.rb.part_power - 1))"},{"line_number":2645,"context_line":""},{"line_number":2646,"context_line":"        # objects are created in the partition they are expected to be in so"},{"line_number":2647,"context_line":"        # so they are not actually selected for cleanup"},{"line_number":2648,"context_line":"        policy \u003d 0"},{"line_number":2649,"context_line":"        self._recreate_objects_dir(policy)"},{"line_number":2650,"context_line":"        objdir1, fname1, _ \u003d self._create_object(policy, part1, hash1)"}],"source_content_type":"text/x-python","patch_set":3,"id":"2bbb22d4_19b85913","line":2647,"range":{"start_line":2647,"start_character":10,"end_line":2647,"end_character":12},"updated":"2026-04-28 13:57:48.000000000","message":"nit: s/so so/so/","commit_id":"66af9d2ab0387d1e138b3f39aa4573678b05d0ab"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"aed9e343c08c40170a55af77ef4838471bc185f2","unresolved":true,"context_lines":[{"line_number":2645,"context_line":""},{"line_number":2646,"context_line":"        # objects are created in the partition they are expected to be in so"},{"line_number":2647,"context_line":"        # so they are not actually selected for cleanup"},{"line_number":2648,"context_line":"        policy \u003d 0"},{"line_number":2649,"context_line":"        self._recreate_objects_dir(policy)"},{"line_number":2650,"context_line":"        objdir1, fname1, _ \u003d self._create_object(policy, part1, hash1)"},{"line_number":2651,"context_line":"        objdir2, fname2, _ \u003d self._create_object(policy, part2, hash2)"}],"source_content_type":"text/x-python","patch_set":3,"id":"3e89af96_cdc6624c","line":2648,"range":{"start_line":2648,"start_character":8,"end_line":2648,"end_character":18},"updated":"2026-04-28 13:57:48.000000000","message":"nit: this can be ``self.policy``","commit_id":"66af9d2ab0387d1e138b3f39aa4573678b05d0ab"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"aed9e343c08c40170a55af77ef4838471bc185f2","unresolved":true,"context_lines":[{"line_number":2669,"context_line":"                \u0027--files-per-second\u0027, \u00271\u0027,  # Enable rate limiting"},{"line_number":2670,"context_line":"            ]))"},{"line_number":2671,"context_line":""},{"line_number":2672,"context_line":"        self.assertEqual(list(), captured)"},{"line_number":2673,"context_line":""},{"line_number":2674,"context_line":"    def test_relink_noops_rate_limiter(self):"},{"line_number":2675,"context_line":"        hash1 \u003d \"027ecbf9027e96e1fe83e6154e1a8380\""}],"source_content_type":"text/x-python","patch_set":3,"id":"a06b5075_6b38ffe8","line":2672,"updated":"2026-04-28 13:57:48.000000000","message":"nit: could be ``self.assertFalse(captured)``\n\nBTW, I much prefer this pattern of capturing what happened and then making assertions after the test code has executed, rather than making an assertion inside the capturing function.","commit_id":"66af9d2ab0387d1e138b3f39aa4573678b05d0ab"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"aed9e343c08c40170a55af77ef4838471bc185f2","unresolved":true,"context_lines":[{"line_number":2671,"context_line":""},{"line_number":2672,"context_line":"        self.assertEqual(list(), captured)"},{"line_number":2673,"context_line":""},{"line_number":2674,"context_line":"    def test_relink_noops_rate_limiter(self):"},{"line_number":2675,"context_line":"        hash1 \u003d \"027ecbf9027e96e1fe83e6154e1a8380\""},{"line_number":2676,"context_line":"        hash2 \u003d \"024f19a8347495adc3cfa845f725e380\""},{"line_number":2677,"context_line":"        self.assertNotEqual(hash1, hash2)"}],"source_content_type":"text/x-python","patch_set":3,"id":"c648c417_5b31d696","line":2674,"updated":"2026-04-28 13:57:48.000000000","message":"+1 this test also catches a regression to the fix\n\n```\nFAILED\ntest/unit/cli/test_relinker.py:2673 (TestRelinker.test_relink_noops_rate_limiter)\n[(\u0027/var/folders/5n/jty8zbg15rjfj5gqbkm3fkv00000gp/T/tmp8_ms8bcg/node/sda1/objects/4/380/024f19a8347495adc3cfa845f725e380\u0027,\n  \u0027sda1\u0027,\n  \u00274\u0027)] !\u003d []\n```","commit_id":"66af9d2ab0387d1e138b3f39aa4573678b05d0ab"}]}
