)]}'
{"/PATCHSET_LEVEL":[{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"bb230319be0076c15ac1268336abf66e02c8942e","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"b74da9f0_52c2433f","updated":"2022-09-29 17:43:36.000000000","message":"Just a quick first review. This is heading in the right direction, thanks.\n\nlooking at this has made me think that _record_sharding_progress needs some attention (why does it not cover shrinking), but don\u0027t allow my comments to distract you from the *focus* of this patch...we can fix other things in other patches 😊\n\n-1 because of the concern over epoch being None and having the elapsed time in the warning.","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"73ec61fe99b6212c14eb1cdfc2cfb19f4745276f","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"2b831059_67c6cc3e","updated":"2022-09-27 04:51:54.000000000","message":"still need to add unit tests and fix one test test_sharder.py:TestSharder.test_one_shard_cycle, because this test manually modify data structure internals. But would like to get feedback whether production code is correct.","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"b03ef10088c1996e9ec896962a55d9f7e95c0c53","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"9fe32f44_58bec3bd","updated":"2022-09-27 15:50:21.000000000","message":"thanks for getting this started - I think the warning/log is nice.\n\nMaybe we also need a stat we could watch to estimate what the correct timeout value should be?  How long are things takin - how long should they take.","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"b340ead2d941d6b7bdcacb875c0317605a38eb61","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"68be3b05_ebc8fbfc","updated":"2022-10-14 14:49:16.000000000","message":"I think this is a good addition. I\u0027ve pushed an extra unit test to cover the shrinking case:\nhttps://review.opendev.org/c/openstack/swift/+/861395 - please squash in to here if it looks OK","commit_id":"ee50a598df65697f6ae59a8f34f1f2d706a8c490"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"3178087d6e07ea5d1a8da0718c9a09dbe1c1ef58","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":5,"id":"f78e21b3_68876530","updated":"2022-10-14 18:50:15.000000000","message":"recheck\nThere was a \"socket.timeout: timed out\" error in tempest.api.compute.servers.test_server_actions.ServerActionsTestJSON during previous run, this is not related to swift. ","commit_id":"4ed2b89cb78f06cbd08b8a3f94745613def12e97"}],"swift/container/sharder.py":[{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"b03ef10088c1996e9ec896962a55d9f7e95c0c53","unresolved":true,"context_lines":[{"line_number":710,"context_line":"        self.recon_sharded_timeout \u003d get_val("},{"line_number":711,"context_line":"            \u0027recon_sharded_timeout\u0027, int, 43200)"},{"line_number":712,"context_line":"        self.container_sharding_timeout \u003d get_val("},{"line_number":713,"context_line":"            \u0027container_sharding_timeout\u0027, int, 172800)"},{"line_number":714,"context_line":"        self.conn_timeout \u003d get_val("},{"line_number":715,"context_line":"            \u0027conn_timeout\u0027, float, 5)"},{"line_number":716,"context_line":"        self.auto_shard \u003d get_val("}],"source_content_type":"text/x-python","patch_set":1,"id":"36fa92df_b59f25ed","line":713,"updated":"2022-09-27 15:50:21.000000000","message":"2 days","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1f833810a6451d812bda053decb356a3a9a008fa","unresolved":false,"context_lines":[{"line_number":710,"context_line":"        self.recon_sharded_timeout \u003d get_val("},{"line_number":711,"context_line":"            \u0027recon_sharded_timeout\u0027, int, 43200)"},{"line_number":712,"context_line":"        self.container_sharding_timeout \u003d get_val("},{"line_number":713,"context_line":"            \u0027container_sharding_timeout\u0027, int, 172800)"},{"line_number":714,"context_line":"        self.conn_timeout \u003d get_val("},{"line_number":715,"context_line":"            \u0027conn_timeout\u0027, float, 5)"},{"line_number":716,"context_line":"        self.auto_shard \u003d get_val("}],"source_content_type":"text/x-python","patch_set":1,"id":"dbc2e2b1_fb5d0a08","line":713,"in_reply_to":"36fa92df_b59f25ed","updated":"2022-10-13 23:10:45.000000000","message":"Ack","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"bb230319be0076c15ac1268336abf66e02c8942e","unresolved":true,"context_lines":[{"line_number":911,"context_line":"        else:"},{"line_number":912,"context_line":"            category[\u0027top\u0027] \u003d candidates"},{"line_number":913,"context_line":""},{"line_number":914,"context_line":"    def _record_sharding_progress(self, broker, node, error):"},{"line_number":915,"context_line":"        own_shard_range \u003d broker.get_own_shard_range()"},{"line_number":916,"context_line":"        db_state \u003d broker.get_db_state()"},{"line_number":917,"context_line":"        if (db_state in (UNSHARDED, SHARDING, SHARDED)"}],"source_content_type":"text/x-python","patch_set":1,"id":"22ae1070_9266a149","line":914,"range":{"start_line":914,"start_character":8,"end_line":914,"end_character":33},"updated":"2022-09-29 17:43:36.000000000","message":"I need to remind myself why this method is NOT called from closer in the loop to where cleaving happens","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1f833810a6451d812bda053decb356a3a9a008fa","unresolved":false,"context_lines":[{"line_number":911,"context_line":"        else:"},{"line_number":912,"context_line":"            category[\u0027top\u0027] \u003d candidates"},{"line_number":913,"context_line":""},{"line_number":914,"context_line":"    def _record_sharding_progress(self, broker, node, error):"},{"line_number":915,"context_line":"        own_shard_range \u003d broker.get_own_shard_range()"},{"line_number":916,"context_line":"        db_state \u003d broker.get_db_state()"},{"line_number":917,"context_line":"        if (db_state in (UNSHARDED, SHARDING, SHARDED)"}],"source_content_type":"text/x-python","patch_set":1,"id":"9a5fb423_7f0f667e","line":914,"range":{"start_line":914,"start_character":8,"end_line":914,"end_character":33},"in_reply_to":"22ae1070_9266a149","updated":"2022-10-13 23:10:45.000000000","message":"I was thinking same question too, could you please drop a comment later when you figure it out again?","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"bb230319be0076c15ac1268336abf66e02c8942e","unresolved":true,"context_lines":[{"line_number":912,"context_line":"            category[\u0027top\u0027] \u003d candidates"},{"line_number":913,"context_line":""},{"line_number":914,"context_line":"    def _record_sharding_progress(self, broker, node, error):"},{"line_number":915,"context_line":"        own_shard_range \u003d broker.get_own_shard_range()"},{"line_number":916,"context_line":"        db_state \u003d broker.get_db_state()"},{"line_number":917,"context_line":"        if (db_state in (UNSHARDED, SHARDING, SHARDED)"},{"line_number":918,"context_line":"                and own_shard_range.state in (ShardRange.SHARDING,"}],"source_content_type":"text/x-python","patch_set":1,"id":"8609f2ab_7f88ff9f","line":915,"updated":"2022-09-29 17:43:36.000000000","message":"off-topic: we could write:\n\n  if own_shard_range.state not in (ShardRange.SHARDING,\n                                   ShardRange.SHARDED):\n      return\n      \n      \noff-topic: But also, this method seems to have been missed shrinking! At first glance I don\u0027t see why we do not report progress for shrinking (which is pretty similar to sharding - in both cases we are cleaving shard ranges in order). Should it actually be:\n\n  if own_shard_range.state not in (ShardRange.SHARDING,\n                                   ShardRange.SHARDED,\n                                   ShardRange.SHRINKING,\n                                   ShardRange.SHRUNK):\n      return","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1f833810a6451d812bda053decb356a3a9a008fa","unresolved":false,"context_lines":[{"line_number":912,"context_line":"            category[\u0027top\u0027] \u003d candidates"},{"line_number":913,"context_line":""},{"line_number":914,"context_line":"    def _record_sharding_progress(self, broker, node, error):"},{"line_number":915,"context_line":"        own_shard_range \u003d broker.get_own_shard_range()"},{"line_number":916,"context_line":"        db_state \u003d broker.get_db_state()"},{"line_number":917,"context_line":"        if (db_state in (UNSHARDED, SHARDING, SHARDED)"},{"line_number":918,"context_line":"                and own_shard_range.state in (ShardRange.SHARDING,"}],"source_content_type":"text/x-python","patch_set":1,"id":"9ff06087_3fd29817","line":915,"in_reply_to":"8609f2ab_7f88ff9f","updated":"2022-10-13 23:10:45.000000000","message":"Done","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"bb230319be0076c15ac1268336abf66e02c8942e","unresolved":true,"context_lines":[{"line_number":923,"context_line":"                    return"},{"line_number":924,"context_line":"                context_ts \u003d max(float(ts) for c, ts in contexts)"},{"line_number":925,"context_line":"                if context_ts + self.recon_sharded_timeout \\"},{"line_number":926,"context_line":"                        \u003c float(Timestamp.now()):"},{"line_number":927,"context_line":"                    # last context timestamp too old for the"},{"line_number":928,"context_line":"                    # broker to be recorded"},{"line_number":929,"context_line":"                    return"}],"source_content_type":"text/x-python","patch_set":1,"id":"7bbbe62e_442df481","line":926,"range":{"start_line":926,"start_character":26,"end_line":926,"end_character":48},"updated":"2022-09-29 17:43:36.000000000","message":"IDK why this is not simply time.time()","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1f833810a6451d812bda053decb356a3a9a008fa","unresolved":false,"context_lines":[{"line_number":923,"context_line":"                    return"},{"line_number":924,"context_line":"                context_ts \u003d max(float(ts) for c, ts in contexts)"},{"line_number":925,"context_line":"                if context_ts + self.recon_sharded_timeout \\"},{"line_number":926,"context_line":"                        \u003c float(Timestamp.now()):"},{"line_number":927,"context_line":"                    # last context timestamp too old for the"},{"line_number":928,"context_line":"                    # broker to be recorded"},{"line_number":929,"context_line":"                    return"}],"source_content_type":"text/x-python","patch_set":1,"id":"aee856ca_a8bd38ea","line":926,"range":{"start_line":926,"start_character":26,"end_line":926,"end_character":48},"in_reply_to":"7bbbe62e_442df481","updated":"2022-10-13 23:10:45.000000000","message":"Done","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"bb230319be0076c15ac1268336abf66e02c8942e","unresolved":true,"context_lines":[{"line_number":928,"context_line":"                    # broker to be recorded"},{"line_number":929,"context_line":"                    return"},{"line_number":930,"context_line":""},{"line_number":931,"context_line":"            if ((own_shard_range.state \u003d\u003d ShardRange.SHARDING or"},{"line_number":932,"context_line":"                 db_state \u003d\u003d SHARDING)"},{"line_number":933,"context_line":"                    and (float(own_shard_range.epoch) +"},{"line_number":934,"context_line":"                         self.container_sharding_timeout"}],"source_content_type":"text/x-python","patch_set":1,"id":"2eb71508_b0b37d04","line":931,"updated":"2022-09-29 17:43:36.000000000","message":"IDK, but does this clause actually need to be within the condition at line 917? it looks to be a subset","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1f833810a6451d812bda053decb356a3a9a008fa","unresolved":false,"context_lines":[{"line_number":928,"context_line":"                    # broker to be recorded"},{"line_number":929,"context_line":"                    return"},{"line_number":930,"context_line":""},{"line_number":931,"context_line":"            if ((own_shard_range.state \u003d\u003d ShardRange.SHARDING or"},{"line_number":932,"context_line":"                 db_state \u003d\u003d SHARDING)"},{"line_number":933,"context_line":"                    and (float(own_shard_range.epoch) +"},{"line_number":934,"context_line":"                         self.container_sharding_timeout"}],"source_content_type":"text/x-python","patch_set":1,"id":"d8d410af_dcb5891b","line":931,"in_reply_to":"2eb71508_b0b37d04","updated":"2022-10-13 23:10:45.000000000","message":"yes, it\u0027s a subset. I have changed it to reuse sharding_required().","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"bb230319be0076c15ac1268336abf66e02c8942e","unresolved":true,"context_lines":[{"line_number":930,"context_line":""},{"line_number":931,"context_line":"            if ((own_shard_range.state \u003d\u003d ShardRange.SHARDING or"},{"line_number":932,"context_line":"                 db_state \u003d\u003d SHARDING)"},{"line_number":933,"context_line":"                    and (float(own_shard_range.epoch) +"},{"line_number":934,"context_line":"                         self.container_sharding_timeout"},{"line_number":935,"context_line":"                         \u003c float(Timestamp.now()))):"},{"line_number":936,"context_line":"                self.logger.warning("}],"source_content_type":"text/x-python","patch_set":1,"id":"8024d9f8_90b2aee0","line":933,"range":{"start_line":933,"start_character":46,"end_line":933,"end_character":52},"updated":"2022-09-29 17:43:36.000000000","message":"PITA but should probably check epoch is not None, see sharder.py line 1166: \n\n  # We\u0027ve seen a case in production where the roots own_shard_range\n  # epoch is reset to None, and state set to ACTIVE (like re-defaulted)","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1f833810a6451d812bda053decb356a3a9a008fa","unresolved":false,"context_lines":[{"line_number":930,"context_line":""},{"line_number":931,"context_line":"            if ((own_shard_range.state \u003d\u003d ShardRange.SHARDING or"},{"line_number":932,"context_line":"                 db_state \u003d\u003d SHARDING)"},{"line_number":933,"context_line":"                    and (float(own_shard_range.epoch) +"},{"line_number":934,"context_line":"                         self.container_sharding_timeout"},{"line_number":935,"context_line":"                         \u003c float(Timestamp.now()))):"},{"line_number":936,"context_line":"                self.logger.warning("}],"source_content_type":"text/x-python","patch_set":1,"id":"ee8f3474_8e75da2b","line":933,"range":{"start_line":933,"start_character":46,"end_line":933,"end_character":52},"in_reply_to":"8024d9f8_90b2aee0","updated":"2022-10-13 23:10:45.000000000","message":"Done","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"b03ef10088c1996e9ec896962a55d9f7e95c0c53","unresolved":true,"context_lines":[{"line_number":933,"context_line":"                    and (float(own_shard_range.epoch) +"},{"line_number":934,"context_line":"                         self.container_sharding_timeout"},{"line_number":935,"context_line":"                         \u003c float(Timestamp.now()))):"},{"line_number":936,"context_line":"                self.logger.warning("},{"line_number":937,"context_line":"                    \u0027Container db is stuck waiting for sharding done \u0027"},{"line_number":938,"context_line":"                    \u0027since %s. Container DB file and path: %s (%s), \u0027,"},{"line_number":939,"context_line":"                    \u0027own_shard_range state: %s, DB state: %s\u0027,"}],"source_content_type":"text/x-python","patch_set":1,"id":"6f8d3438_07888779","line":936,"updated":"2022-09-27 15:50:21.000000000","message":"osr.epoch can apparently be None in tests\n\n\tsharder-test ERROR: Unhandled exception while dumping progress for /mnt/tmp/tmp42yq622r/sdb/containers/0/ash/c1hash/c1hash.db: float() argument must be a string or a real number, not \u0027NoneType\u0027: \n\tTraceback (most recent call last):\n\t  File \"/home/vagrant/swift/swift/container/sharder.py\", line 2252, in _one_shard_cycle\n\t    self._record_sharding_progress(broker, node, error)\n\t  File \"/home/vagrant/swift/swift/container/sharder.py\", line 933, in _record_sharding_progress\n\t    and (float(own_shard_range.epoch) +","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1f833810a6451d812bda053decb356a3a9a008fa","unresolved":false,"context_lines":[{"line_number":933,"context_line":"                    and (float(own_shard_range.epoch) +"},{"line_number":934,"context_line":"                         self.container_sharding_timeout"},{"line_number":935,"context_line":"                         \u003c float(Timestamp.now()))):"},{"line_number":936,"context_line":"                self.logger.warning("},{"line_number":937,"context_line":"                    \u0027Container db is stuck waiting for sharding done \u0027"},{"line_number":938,"context_line":"                    \u0027since %s. Container DB file and path: %s (%s), \u0027,"},{"line_number":939,"context_line":"                    \u0027own_shard_range state: %s, DB state: %s\u0027,"}],"source_content_type":"text/x-python","patch_set":1,"id":"829da121_a45b9ba3","line":936,"in_reply_to":"6f8d3438_07888779","updated":"2022-10-13 23:10:45.000000000","message":"Done","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"bb230319be0076c15ac1268336abf66e02c8942e","unresolved":true,"context_lines":[{"line_number":934,"context_line":"                         self.container_sharding_timeout"},{"line_number":935,"context_line":"                         \u003c float(Timestamp.now()))):"},{"line_number":936,"context_line":"                self.logger.warning("},{"line_number":937,"context_line":"                    \u0027Container db is stuck waiting for sharding done \u0027"},{"line_number":938,"context_line":"                    \u0027since %s. Container DB file and path: %s (%s), \u0027,"},{"line_number":939,"context_line":"                    \u0027own_shard_range state: %s, DB state: %s\u0027,"},{"line_number":940,"context_line":"                    own_shard_range.epoch.isoformat, broker.db_file,"}],"source_content_type":"text/x-python","patch_set":1,"id":"c6968d9e_6383c34d","line":937,"range":{"start_line":937,"start_character":21,"end_line":937,"end_character":68},"updated":"2022-09-29 17:43:36.000000000","message":"This IS in essence what we\u0027re worried about, so the message is fine, \n\n...but being pedantic, there is no requirement that epoch equals the time at which the own_shard_range was merged into the container DB, which predicates sharding starting. (s-m-s-r and auto-sharding DO set epoch and then merge, but someone could do it differently).\n\nSo it is possible that this warning would fire \"too soon\" if epoch was chosen to be in the past w.r.t when the shard ranges got merged and sharding actually began.\n\nIf we really cared, we could extend CleavingContext and embed the actual time the cleaving context was created in the context, and use that value to monitor progress. But I\u0027m not sure I do care enough to add that work!\n\nInstead, the warning could be just boringly factual:\n\n  \u0027Sharding has not completed in %f seconds since epoch\u0027 % (now - epoch)","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1f833810a6451d812bda053decb356a3a9a008fa","unresolved":false,"context_lines":[{"line_number":934,"context_line":"                         self.container_sharding_timeout"},{"line_number":935,"context_line":"                         \u003c float(Timestamp.now()))):"},{"line_number":936,"context_line":"                self.logger.warning("},{"line_number":937,"context_line":"                    \u0027Container db is stuck waiting for sharding done \u0027"},{"line_number":938,"context_line":"                    \u0027since %s. Container DB file and path: %s (%s), \u0027,"},{"line_number":939,"context_line":"                    \u0027own_shard_range state: %s, DB state: %s\u0027,"},{"line_number":940,"context_line":"                    own_shard_range.epoch.isoformat, broker.db_file,"}],"source_content_type":"text/x-python","patch_set":1,"id":"9ab30df0_785c4812","line":937,"range":{"start_line":937,"start_character":21,"end_line":937,"end_character":68},"in_reply_to":"c6968d9e_6383c34d","updated":"2022-10-13 23:10:45.000000000","message":"Done","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"bb230319be0076c15ac1268336abf66e02c8942e","unresolved":true,"context_lines":[{"line_number":935,"context_line":"                         \u003c float(Timestamp.now()))):"},{"line_number":936,"context_line":"                self.logger.warning("},{"line_number":937,"context_line":"                    \u0027Container db is stuck waiting for sharding done \u0027"},{"line_number":938,"context_line":"                    \u0027since %s. Container DB file and path: %s (%s), \u0027,"},{"line_number":939,"context_line":"                    \u0027own_shard_range state: %s, DB state: %s\u0027,"},{"line_number":940,"context_line":"                    own_shard_range.epoch.isoformat, broker.db_file,"},{"line_number":941,"context_line":"                    quote(broker.path), own_shard_range.state_text, db_state)"},{"line_number":942,"context_line":"            info \u003d self._make_stats_info(broker, node, own_shard_range)"}],"source_content_type":"text/x-python","patch_set":1,"id":"7210e268_0d7543f6","line":939,"range":{"start_line":938,"start_character":31,"end_line":939,"end_character":60},"updated":"2022-09-29 17:43:36.000000000","message":"+1 include the path and file, but please also include the elapsed time.\n\n(sharder) logging is an evolved mess when it comes to including DBs and paths :/ One day I\u0027d like to make log messages more uniform, for example by using a helper method that is passed the message and broker and *always* append broker.path, broker.db_file in a uniform way to the log message. But that is way off-topic for this patch!","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1f833810a6451d812bda053decb356a3a9a008fa","unresolved":false,"context_lines":[{"line_number":935,"context_line":"                         \u003c float(Timestamp.now()))):"},{"line_number":936,"context_line":"                self.logger.warning("},{"line_number":937,"context_line":"                    \u0027Container db is stuck waiting for sharding done \u0027"},{"line_number":938,"context_line":"                    \u0027since %s. Container DB file and path: %s (%s), \u0027,"},{"line_number":939,"context_line":"                    \u0027own_shard_range state: %s, DB state: %s\u0027,"},{"line_number":940,"context_line":"                    own_shard_range.epoch.isoformat, broker.db_file,"},{"line_number":941,"context_line":"                    quote(broker.path), own_shard_range.state_text, db_state)"},{"line_number":942,"context_line":"            info \u003d self._make_stats_info(broker, node, own_shard_range)"}],"source_content_type":"text/x-python","patch_set":1,"id":"7015e165_bc21d00d","line":939,"range":{"start_line":938,"start_character":31,"end_line":939,"end_character":60},"in_reply_to":"7210e268_0d7543f6","updated":"2022-10-13 23:10:45.000000000","message":"Done","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"bb230319be0076c15ac1268336abf66e02c8942e","unresolved":true,"context_lines":[{"line_number":950,"context_line":"                state_count[ShardRange.STATES[state]] \u003d 0"},{"line_number":951,"context_line":"            for shard_range in shard_ranges:"},{"line_number":952,"context_line":"                state_count[shard_range.state_text] +\u003d 1"},{"line_number":953,"context_line":"            info.update(state_count)"},{"line_number":954,"context_line":"            info[\u0027error\u0027] \u003d error and str(error)"},{"line_number":955,"context_line":"            self._append_stat(\u0027sharding_in_progress\u0027, \u0027all\u0027, info)"},{"line_number":956,"context_line":""}],"source_content_type":"text/x-python","patch_set":1,"id":"96735b26_b6748ad0","line":953,"range":{"start_line":953,"start_character":12,"end_line":953,"end_character":36},"updated":"2022-09-29 17:43:36.000000000","message":"maybe we could include state_count in the warning (if the progress check is moved after here)","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1f833810a6451d812bda053decb356a3a9a008fa","unresolved":false,"context_lines":[{"line_number":950,"context_line":"                state_count[ShardRange.STATES[state]] \u003d 0"},{"line_number":951,"context_line":"            for shard_range in shard_ranges:"},{"line_number":952,"context_line":"                state_count[shard_range.state_text] +\u003d 1"},{"line_number":953,"context_line":"            info.update(state_count)"},{"line_number":954,"context_line":"            info[\u0027error\u0027] \u003d error and str(error)"},{"line_number":955,"context_line":"            self._append_stat(\u0027sharding_in_progress\u0027, \u0027all\u0027, info)"},{"line_number":956,"context_line":""}],"source_content_type":"text/x-python","patch_set":1,"id":"93675fe7_1761b570","line":953,"range":{"start_line":953,"start_character":12,"end_line":953,"end_character":36},"in_reply_to":"96735b26_b6748ad0","updated":"2022-10-13 23:10:45.000000000","message":"Done","commit_id":"c4036abf790470258b67f929dbbba329586cc1b7"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"749a28f3bd7186ca6ed3ca2873a64db27fcc0e48","unresolved":false,"context_lines":[{"line_number":540,"context_line":"    * ``ranges_todo``: the number of shard ranges that are yet to be"},{"line_number":541,"context_line":"      cleaved from the retiring DB."},{"line_number":542,"context_line":"    \"\"\""},{"line_number":543,"context_line":""},{"line_number":544,"context_line":"    def __init__(self, ref, cursor\u003d\u0027\u0027, max_row\u003dNone, cleave_to_row\u003dNone,"},{"line_number":545,"context_line":"                 last_cleave_to_row\u003dNone, cleaving_done\u003dFalse,"},{"line_number":546,"context_line":"                 misplaced_done\u003dFalse, ranges_done\u003d0, ranges_todo\u003d0):"}],"source_content_type":"text/x-python","patch_set":3,"id":"a3bc6e0f_8501713a","line":543,"updated":"2022-10-14 00:59:08.000000000","message":"I am going to remove this line of change.","commit_id":"97da75f3e07d4cd27452d7eba22b688f495d9f41"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"b340ead2d941d6b7bdcacb875c0317605a38eb61","unresolved":true,"context_lines":[{"line_number":918,"context_line":"        own_shard_range \u003d broker.get_own_shard_range()"},{"line_number":919,"context_line":"        if own_shard_range.state not in ("},{"line_number":920,"context_line":"                ShardRange.SHARDING, ShardRange.SHARDED,"},{"line_number":921,"context_line":"                ShardRange.SHRINKING, ShardRange.SHRUNK):"},{"line_number":922,"context_line":"            return"},{"line_number":923,"context_line":""},{"line_number":924,"context_line":"        if db_state \u003d\u003d SHARDED:"}],"source_content_type":"text/x-python","patch_set":4,"id":"50404586_8ee5f8c2","line":921,"range":{"start_line":921,"start_character":16,"end_line":921,"end_character":55},"updated":"2022-10-14 14:49:16.000000000","message":"ok, report shrinking as well as sharding\n\nthis change is not unit tested","commit_id":"ee50a598df65697f6ae59a8f34f1f2d706a8c490"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"9746dd55dd811ecb327290be3dc46ae93b6f81a3","unresolved":false,"context_lines":[{"line_number":918,"context_line":"        own_shard_range \u003d broker.get_own_shard_range()"},{"line_number":919,"context_line":"        if own_shard_range.state not in ("},{"line_number":920,"context_line":"                ShardRange.SHARDING, ShardRange.SHARDED,"},{"line_number":921,"context_line":"                ShardRange.SHRINKING, ShardRange.SHRUNK):"},{"line_number":922,"context_line":"            return"},{"line_number":923,"context_line":""},{"line_number":924,"context_line":"        if db_state \u003d\u003d SHARDED:"}],"source_content_type":"text/x-python","patch_set":4,"id":"35313b73_5d3e16c8","line":921,"range":{"start_line":921,"start_character":16,"end_line":921,"end_character":55},"in_reply_to":"50404586_8ee5f8c2","updated":"2022-10-14 15:54:38.000000000","message":"Done","commit_id":"ee50a598df65697f6ae59a8f34f1f2d706a8c490"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"b340ead2d941d6b7bdcacb875c0317605a38eb61","unresolved":true,"context_lines":[{"line_number":945,"context_line":"            state_count[shard_range.state_text] +\u003d 1"},{"line_number":946,"context_line":"        info.update(state_count)"},{"line_number":947,"context_line":"        info[\u0027error\u0027] \u003d error and str(error)"},{"line_number":948,"context_line":"        self._append_stat(\u0027sharding_in_progress\u0027, \u0027all\u0027, info)"},{"line_number":949,"context_line":""},{"line_number":950,"context_line":"        if broker.sharding_required() and ("},{"line_number":951,"context_line":"                own_shard_range.epoch is not None) and ("}],"source_content_type":"text/x-python","patch_set":4,"id":"b621ba42_c6e42e3e","line":948,"range":{"start_line":948,"start_character":27,"end_line":948,"end_character":35},"updated":"2022-10-14 14:49:16.000000000","message":"here and below in the Warning log we use the verb \u0027sharding\u0027 when the action could be \u0027sharding\u0027 or \u0027shrinking\u0027. Strictly, the coomon verb for both actions is \u0027cleaving\u0027. But there is plenty of ambiguity in the code: originally the sharder only sharded, and cleaving was only a shrading primitive. But when we added shrinking support and realised that shrinking would re-use the sharder\u0027s cleaving primitives, then the confusion arose.\n\nBoth sharding and shrinking DBs \u0027shard\u0027 (or \u0027cleave\u0027) themselves into shards!\n\nI\u0027m inclined to live with the ambiguity for now since we do so little shrinking.","commit_id":"ee50a598df65697f6ae59a8f34f1f2d706a8c490"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"9746dd55dd811ecb327290be3dc46ae93b6f81a3","unresolved":false,"context_lines":[{"line_number":945,"context_line":"            state_count[shard_range.state_text] +\u003d 1"},{"line_number":946,"context_line":"        info.update(state_count)"},{"line_number":947,"context_line":"        info[\u0027error\u0027] \u003d error and str(error)"},{"line_number":948,"context_line":"        self._append_stat(\u0027sharding_in_progress\u0027, \u0027all\u0027, info)"},{"line_number":949,"context_line":""},{"line_number":950,"context_line":"        if broker.sharding_required() and ("},{"line_number":951,"context_line":"                own_shard_range.epoch is not None) and ("}],"source_content_type":"text/x-python","patch_set":4,"id":"f486695d_eca7f009","line":948,"range":{"start_line":948,"start_character":27,"end_line":948,"end_character":35},"in_reply_to":"b621ba42_c6e42e3e","updated":"2022-10-14 15:54:38.000000000","message":"thanks for the explanation.","commit_id":"ee50a598df65697f6ae59a8f34f1f2d706a8c490"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"b340ead2d941d6b7bdcacb875c0317605a38eb61","unresolved":true,"context_lines":[{"line_number":958,"context_line":"            # auto-sharding do set epoch and then merge, so we use it to tell"},{"line_number":959,"context_line":"            # whether sharding has been taking too long or not."},{"line_number":960,"context_line":"            self.logger.warning("},{"line_number":961,"context_line":"                \u0027Sharding has not completed in %.2f seconds since %s.\u0027"},{"line_number":962,"context_line":"                \u0027 Container DB file and path: %s (%s), DB state: %s,\u0027"},{"line_number":963,"context_line":"                \u0027 own_shard_range state: %s, state count of shard ranges: %s\u0027 %"},{"line_number":964,"context_line":"                (time.time() - float(own_shard_range.epoch),"}],"source_content_type":"text/x-python","patch_set":4,"id":"185adf01_9eda8a25","line":961,"range":{"start_line":961,"start_character":17,"end_line":961,"end_character":25},"updated":"2022-10-14 14:49:16.000000000","message":"we could differentiate shrinking vs sharding but actually it is helpful to have only one warning log to pattern match for an alert, and the message includes the own shard range state which *does* differentiate.\n\nIf we were to change this verb I would advocate using \u0027Cleaving\u0027, but I\u0027m not sure it\u0027s necessary.","commit_id":"ee50a598df65697f6ae59a8f34f1f2d706a8c490"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"9746dd55dd811ecb327290be3dc46ae93b6f81a3","unresolved":false,"context_lines":[{"line_number":958,"context_line":"            # auto-sharding do set epoch and then merge, so we use it to tell"},{"line_number":959,"context_line":"            # whether sharding has been taking too long or not."},{"line_number":960,"context_line":"            self.logger.warning("},{"line_number":961,"context_line":"                \u0027Sharding has not completed in %.2f seconds since %s.\u0027"},{"line_number":962,"context_line":"                \u0027 Container DB file and path: %s (%s), DB state: %s,\u0027"},{"line_number":963,"context_line":"                \u0027 own_shard_range state: %s, state count of shard ranges: %s\u0027 %"},{"line_number":964,"context_line":"                (time.time() - float(own_shard_range.epoch),"}],"source_content_type":"text/x-python","patch_set":4,"id":"cfe09847_2fcc0506","line":961,"range":{"start_line":961,"start_character":17,"end_line":961,"end_character":25},"in_reply_to":"185adf01_9eda8a25","updated":"2022-10-14 15:54:38.000000000","message":"Let\u0027s change it to \u0027Cleaving\u0027, now I have same feeling as yours.","commit_id":"ee50a598df65697f6ae59a8f34f1f2d706a8c490"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"b340ead2d941d6b7bdcacb875c0317605a38eb61","unresolved":true,"context_lines":[{"line_number":947,"context_line":"        info[\u0027error\u0027] \u003d error and str(error)"},{"line_number":948,"context_line":"        self._append_stat(\u0027sharding_in_progress\u0027, \u0027all\u0027, info)"},{"line_number":949,"context_line":""},{"line_number":950,"context_line":"        if broker.sharding_required() and ("},{"line_number":951,"context_line":"                own_shard_range.epoch is not None) and ("},{"line_number":952,"context_line":"                float(own_shard_range.epoch) +"},{"line_number":953,"context_line":"                self.container_sharding_timeout \u003c"},{"line_number":954,"context_line":"                time.time()):"},{"line_number":955,"context_line":"            # Note: There is no requirement that own_shard_range.epoch equals"},{"line_number":956,"context_line":"            # the time at which the own_shard_range was merged into the"},{"line_number":957,"context_line":"            # container DB, which predicates sharding starting. But s-m-s-r and"},{"line_number":958,"context_line":"            # auto-sharding do set epoch and then merge, so we use it to tell"},{"line_number":959,"context_line":"            # whether sharding has been taking too long or not."},{"line_number":960,"context_line":"            self.logger.warning("},{"line_number":961,"context_line":"                \u0027Sharding has not completed in %.2f seconds since %s.\u0027"},{"line_number":962,"context_line":"                \u0027 Container DB file and path: %s (%s), DB state: %s,\u0027"},{"line_number":963,"context_line":"                \u0027 own_shard_range state: %s, state count of shard ranges: %s\u0027 %"},{"line_number":964,"context_line":"                (time.time() - float(own_shard_range.epoch),"},{"line_number":965,"context_line":"                 own_shard_range.epoch.isoformat, broker.db_file,"},{"line_number":966,"context_line":"                 quote(broker.path), db_state,"},{"line_number":967,"context_line":"                 own_shard_range.state_text, str(state_count)))"},{"line_number":968,"context_line":""},{"line_number":969,"context_line":"    def _report_stats(self):"},{"line_number":970,"context_line":"        # report accumulated stats since start of one sharder cycle"}],"source_content_type":"text/x-python","patch_set":4,"id":"edb007a7_eb2cdb94","line":967,"range":{"start_line":950,"start_character":8,"end_line":967,"end_character":63},"updated":"2022-10-14 14:49:16.000000000","message":"this is the significant new part","commit_id":"ee50a598df65697f6ae59a8f34f1f2d706a8c490"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"9746dd55dd811ecb327290be3dc46ae93b6f81a3","unresolved":false,"context_lines":[{"line_number":947,"context_line":"        info[\u0027error\u0027] \u003d error and str(error)"},{"line_number":948,"context_line":"        self._append_stat(\u0027sharding_in_progress\u0027, \u0027all\u0027, info)"},{"line_number":949,"context_line":""},{"line_number":950,"context_line":"        if broker.sharding_required() and ("},{"line_number":951,"context_line":"                own_shard_range.epoch is not None) and ("},{"line_number":952,"context_line":"                float(own_shard_range.epoch) +"},{"line_number":953,"context_line":"                self.container_sharding_timeout \u003c"},{"line_number":954,"context_line":"                time.time()):"},{"line_number":955,"context_line":"            # Note: There is no requirement that own_shard_range.epoch equals"},{"line_number":956,"context_line":"            # the time at which the own_shard_range was merged into the"},{"line_number":957,"context_line":"            # container DB, which predicates sharding starting. But s-m-s-r and"},{"line_number":958,"context_line":"            # auto-sharding do set epoch and then merge, so we use it to tell"},{"line_number":959,"context_line":"            # whether sharding has been taking too long or not."},{"line_number":960,"context_line":"            self.logger.warning("},{"line_number":961,"context_line":"                \u0027Sharding has not completed in %.2f seconds since %s.\u0027"},{"line_number":962,"context_line":"                \u0027 Container DB file and path: %s (%s), DB state: %s,\u0027"},{"line_number":963,"context_line":"                \u0027 own_shard_range state: %s, state count of shard ranges: %s\u0027 %"},{"line_number":964,"context_line":"                (time.time() - float(own_shard_range.epoch),"},{"line_number":965,"context_line":"                 own_shard_range.epoch.isoformat, broker.db_file,"},{"line_number":966,"context_line":"                 quote(broker.path), db_state,"},{"line_number":967,"context_line":"                 own_shard_range.state_text, str(state_count)))"},{"line_number":968,"context_line":""},{"line_number":969,"context_line":"    def _report_stats(self):"},{"line_number":970,"context_line":"        # report accumulated stats since start of one sharder cycle"}],"source_content_type":"text/x-python","patch_set":4,"id":"ab31a935_61959865","line":967,"range":{"start_line":950,"start_character":8,"end_line":967,"end_character":63},"in_reply_to":"edb007a7_eb2cdb94","updated":"2022-10-14 15:54:38.000000000","message":"Done","commit_id":"ee50a598df65697f6ae59a8f34f1f2d706a8c490"}]}
