)]}'
{"/PATCHSET_LEVEL":[{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"0e86025c223c5aa9e1c1007bc0e9281e202992ec","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"bf9e75d2_6fc24047","updated":"2021-10-18 22:53:27.000000000","message":"I understand now why status_changed_at has to go backwards - that makes way more sense than making up a new timestamp\n\nI\u0027m less sure now why/if we always need to reset the sync-point","commit_id":"7046989b5cde53049b4030dc73d9b92b0599a059"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"7a0932eb4fd84f2a5ce19e49016b443f1509c28a","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"f3affa50_491bafb3","updated":"2021-11-12 18:50:10.000000000","message":"recheck","commit_id":"7046989b5cde53049b4030dc73d9b92b0599a059"}],"swift/container/backend.py":[{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"43376b6aaeecdc03a9f3c6759bc29ca5696006d0","unresolved":true,"context_lines":[{"line_number":1024,"context_line":"                UPDATE container_stat"},{"line_number":1025,"context_line":"                SET storage_policy_index \u003d ?,"},{"line_number":1026,"context_line":"                    status_changed_at \u003d ?,"},{"line_number":1027,"context_line":"                    reconciler_sync_point \u003d 0"},{"line_number":1028,"context_line":"                WHERE storage_policy_index \u003c\u003e ?"},{"line_number":1029,"context_line":"            \u0027\u0027\u0027, (policy_index, timestamp, policy_index))"},{"line_number":1030,"context_line":"            conn.commit()"}],"source_content_type":"text/x-python","patch_set":1,"id":"0a14e54c_71060138","line":1027,"updated":"2021-09-24 14:44:13.000000000","message":"I lifted this from https://review.opendev.org/c/openstack/swift/+/809317 and TBH I\u0027m not yet sure what the significance of the reconciler_sync_point is - perhaps someone can suggest a comment to help understanding?","commit_id":"7046989b5cde53049b4030dc73d9b92b0599a059"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"12e560f4f8a216cf1e0a89a984e919e4f7b8f498","unresolved":true,"context_lines":[{"line_number":1024,"context_line":"                UPDATE container_stat"},{"line_number":1025,"context_line":"                SET storage_policy_index \u003d ?,"},{"line_number":1026,"context_line":"                    status_changed_at \u003d ?,"},{"line_number":1027,"context_line":"                    reconciler_sync_point \u003d 0"},{"line_number":1028,"context_line":"                WHERE storage_policy_index \u003c\u003e ?"},{"line_number":1029,"context_line":"            \u0027\u0027\u0027, (policy_index, timestamp, policy_index))"},{"line_number":1030,"context_line":"            conn.commit()"}],"source_content_type":"text/x-python","patch_set":1,"id":"3dd3402a_b5a3840d","line":1027,"in_reply_to":"0a14e54c_71060138","updated":"2021-09-24 18:42:40.000000000","message":"My understanding is that the trouble is something like this:\n\nShards get created on the wrong side of a split-brain, so they have the wrong policy. But they all *agree* on the wrong policy, so they all slurp up a bunch of rows and work to ensure those objects are in the wrong policy, and all replicas get their reconciler_sync_point close to MAX_ROW.\n\nWith Matt\u0027s patch, the *root* can now come along and push the shards over to the *right* policy. In the unsharded case, we\u0027d rely on the one replica that had the right policy receiving all the bad rows from the other replicas and enqueuing a bunch of reconciler work. With this externally-driven change, nobody will look at the old rows unless we reset the sync point.\n\nMakes me worry that we might have a lurking bug today on master if the one guy that knows about the right policy winds up receiving an rsync-then-merge... I think we might get the rsynced rows and reconciler_sync_point, but the corrected policy index -- which could mean some rows that are never considered for reconciler work.\n\n(Now I kind of want a container watcher that tallies up misplaced objects since the sync point and compares against the totals in the policy_stat table. If they don\u0027t match, log about it -- and maybe, optionally, go ahead and reset the sync point?)\n\nAll that said, it sucks that the reset will kick off a full table scan to look for reconciler work on the next replication cycle. Makes me glad we\u0027ve already got the `WHERE storage_policy_index \u003c\u003e ?` below!","commit_id":"7046989b5cde53049b4030dc73d9b92b0599a059"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"bcbb5224c3f728a2717f76b12f4f43587467f4a2","unresolved":true,"context_lines":[{"line_number":1024,"context_line":"                UPDATE container_stat"},{"line_number":1025,"context_line":"                SET storage_policy_index \u003d ?,"},{"line_number":1026,"context_line":"                    status_changed_at \u003d ?,"},{"line_number":1027,"context_line":"                    reconciler_sync_point \u003d 0"},{"line_number":1028,"context_line":"                WHERE storage_policy_index \u003c\u003e ?"},{"line_number":1029,"context_line":"            \u0027\u0027\u0027, (policy_index, timestamp, policy_index))"},{"line_number":1030,"context_line":"            conn.commit()"}],"source_content_type":"text/x-python","patch_set":1,"id":"99f9a189_281561b7","line":1027,"in_reply_to":"0a14e54c_71060138","updated":"2021-09-24 18:43:19.000000000","message":"do we really want the status_changed_at to go backwards?\n\nthe sync_point resets the rows to be evaluated by the _post_replicate_hook so dump_to_reconciler can re-evaluate all the rows given the new correct policy index - i don\u0027t really understand yet why it ever worked?","commit_id":"7046989b5cde53049b4030dc73d9b92b0599a059"},{"author":{"_account_id":7233,"name":"Matthew Oliver","email":"matt@oliver.net.au","username":"mattoliverau"},"change_message_id":"4a7fd350654c9c0b04436657858843751a83f48f","unresolved":true,"context_lines":[{"line_number":1024,"context_line":"                UPDATE container_stat"},{"line_number":1025,"context_line":"                SET storage_policy_index \u003d ?,"},{"line_number":1026,"context_line":"                    status_changed_at \u003d ?,"},{"line_number":1027,"context_line":"                    reconciler_sync_point \u003d 0"},{"line_number":1028,"context_line":"                WHERE storage_policy_index \u003c\u003e ?"},{"line_number":1029,"context_line":"            \u0027\u0027\u0027, (policy_index, timestamp, policy_index))"},{"line_number":1030,"context_line":"            conn.commit()"}],"source_content_type":"text/x-python","patch_set":1,"id":"19ec85f7_f2c7b04f","line":1027,"in_reply_to":"99f9a189_281561b7","updated":"2021-09-27 01:02:19.000000000","message":"Clay we need to go backwards otherwise we can\u0027t pull in the root\u0027s status_changed_at time which will make the SPI checks fail in known bad cases. In fact it\u0027s what I had to do to get your follow up of  https://review.opendev.org/c/openstack/swift/+/809317 to actaully work. Really it\u0027s just a shame we call it last_modified_at and not something like last_state_change to indicate when we last PUT or DELETEd the container.\n\nOr maybe we loose it all together and just just having a virtual property in the broker that is:\n\n  @property\n  last_state_change_at(self):\n     info \u003d self.get_info()\n     return max(info[\u0027put_timestamp\u0027], info[\u0027delete_timestamp\u0027]","commit_id":"7046989b5cde53049b4030dc73d9b92b0599a059"}],"swift/container/replicator.py":[{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"43376b6aaeecdc03a9f3c6759bc29ca5696006d0","unresolved":true,"context_lines":[{"line_number":70,"context_line":"            if incorrect_policy_index(info, remote_info):"},{"line_number":71,"context_line":"                broker.set_storage_policy_index("},{"line_number":72,"context_line":"                    remote_info[\u0027storage_policy_index\u0027],"},{"line_number":73,"context_line":"                    timestamp\u003dremote_info[\u0027status_changed_at\u0027])"},{"line_number":74,"context_line":"            sync_timestamps \u003d (\u0027created_at\u0027, \u0027put_timestamp\u0027,"},{"line_number":75,"context_line":"                               \u0027delete_timestamp\u0027)"},{"line_number":76,"context_line":"            if any(info[key] !\u003d remote_info[key] for key in sync_timestamps):"}],"source_content_type":"text/x-python","patch_set":1,"id":"1c97a57a_c53c13ef","line":73,"updated":"2021-09-24 14:44:13.000000000","message":"this is the key change\n\nneed unit test coverage for this","commit_id":"7046989b5cde53049b4030dc73d9b92b0599a059"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"12e560f4f8a216cf1e0a89a984e919e4f7b8f498","unresolved":true,"context_lines":[{"line_number":70,"context_line":"            if incorrect_policy_index(info, remote_info):"},{"line_number":71,"context_line":"                broker.set_storage_policy_index("},{"line_number":72,"context_line":"                    remote_info[\u0027storage_policy_index\u0027],"},{"line_number":73,"context_line":"                    timestamp\u003dremote_info[\u0027status_changed_at\u0027])"},{"line_number":74,"context_line":"            sync_timestamps \u003d (\u0027created_at\u0027, \u0027put_timestamp\u0027,"},{"line_number":75,"context_line":"                               \u0027delete_timestamp\u0027)"},{"line_number":76,"context_line":"            if any(info[key] !\u003d remote_info[key] for key in sync_timestamps):"}],"source_content_type":"text/x-python","patch_set":1,"id":"8115617d_209e2ad5","line":73,"in_reply_to":"1c97a57a_c53c13ef","updated":"2021-09-24 18:42:40.000000000","message":"So who is responsible for bumping status_changed_at now? When do they do it?","commit_id":"7046989b5cde53049b4030dc73d9b92b0599a059"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"6ecd4b5e1e3e28ee7f1fd6f5cc9c47d34a2bed3e","unresolved":true,"context_lines":[{"line_number":70,"context_line":"            if incorrect_policy_index(info, remote_info):"},{"line_number":71,"context_line":"                broker.set_storage_policy_index("},{"line_number":72,"context_line":"                    remote_info[\u0027storage_policy_index\u0027],"},{"line_number":73,"context_line":"                    timestamp\u003dremote_info[\u0027status_changed_at\u0027])"},{"line_number":74,"context_line":"            sync_timestamps \u003d (\u0027created_at\u0027, \u0027put_timestamp\u0027,"},{"line_number":75,"context_line":"                               \u0027delete_timestamp\u0027)"},{"line_number":76,"context_line":"            if any(info[key] !\u003d remote_info[key] for key in sync_timestamps):"}],"source_content_type":"text/x-python","patch_set":1,"id":"f6a470c5_6daab607","line":73,"in_reply_to":"8115617d_209e2ad5","updated":"2021-09-27 09:26:23.000000000","message":"status_changed_at is set:\n\n1) when the container db is created (\u003d\u003dput_timestamp)\n2) when the container is re-created (\u003d\u003dput_timestamp)\n3) when the container is deleted (\u003d\u003ddelete_timestamp)\n\nduring replication:\n4) if incorrect_policy_index then during set_storage_policy_index\n5) during merge_timestamps, if delete status changes, then set to \u0027now\u0027 (and this happens after 4 so could override 4!?!)","commit_id":"7046989b5cde53049b4030dc73d9b92b0599a059"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"43376b6aaeecdc03a9f3c6759bc29ca5696006d0","unresolved":true,"context_lines":[{"line_number":358,"context_line":"        if incorrect_policy_index(info, remote_info):"},{"line_number":359,"context_line":"            broker.set_storage_policy_index("},{"line_number":360,"context_line":"                remote_info[\u0027storage_policy_index\u0027],"},{"line_number":361,"context_line":"                timestamp\u003dremote_info[\u0027status_changed_at\u0027]"},{"line_number":362,"context_line":"            )"},{"line_number":363,"context_line":"            info \u003d broker.get_replication_info()"},{"line_number":364,"context_line":"        return info"}],"source_content_type":"text/x-python","patch_set":1,"id":"a12a5a5e_c3c51ecc","line":361,"updated":"2021-09-24 14:44:13.000000000","message":"ditto - https://review.opendev.org/c/openstack/swift/+/809317\n\nneed unit test coverage for this","commit_id":"7046989b5cde53049b4030dc73d9b92b0599a059"}],"test/probe/test_container_merge_policy_index.py":[{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"bcbb5224c3f728a2717f76b12f4f43587467f4a2","unresolved":true,"context_lines":[{"line_number":120,"context_line":"            metadata[\u0027X-Backend-Storage-Policy-Index\u0027]"},{"line_number":121,"context_line":"            for node, metadata in head_responses]"},{"line_number":122,"context_line":"        self.assertEqual([correct_spi, correct_spi, correct_spi],"},{"line_number":123,"context_line":"                         found_spi)"},{"line_number":124,"context_line":""},{"line_number":125,"context_line":"    def test_merge_storage_policy_index(self):"},{"line_number":126,"context_line":"        # generic split brain"}],"source_content_type":"text/x-python","patch_set":1,"id":"7ea95764_58d35658","line":123,"updated":"2021-09-24 18:43:19.000000000","message":"this is good stuff\n\n\tvagrant@saio:~/swift$ nosetests test/probe/test_container_merge_policy_index.py:TestContainerMergePolicyIndex.test_replicate_storage_policy_index\n\ttest_replicate_storage_policy_index (test.probe.test_container_merge_policy_index.TestContainerMergePolicyIndex) ... FAIL\n\n\t\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\n\tFAIL: test_replicate_storage_policy_index (test.probe.test_container_merge_policy_index.TestContainerMergePolicyIndex)\n\t----------------------------------------------------------------------\n\tTraceback (most recent call last):\n\t  File \"/vagrant/swift/test/probe/test_container_merge_policy_index.py\", line 123, in test_replicate_storage_policy_index\n\t    found_spi)\n\tAssertionError: Lists differ: [\u00271\u0027, \u00271\u0027, \u00271\u0027] !\u003d [\u00271\u0027, \u00270\u0027, \u00270\u0027]\n\n\tFirst differing element 1:\n\t\u00271\u0027\n\t\u00270\u0027\n\n\t- [\u00271\u0027, \u00271\u0027, \u00271\u0027]\n\t?        ^    ^\n\n\t+ [\u00271\u0027, \u00270\u0027, \u00270\u0027]\n\t?        ^    ^","commit_id":"7046989b5cde53049b4030dc73d9b92b0599a059"}]}
