)]}'
{"/COMMIT_MSG":[{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb52b92714fd5ae23772a5027d03af2ca951a571","unresolved":true,"context_lines":[{"line_number":10,"context_line":"whenever it was streaming the complete object. This minimizes the"},{"line_number":11,"context_line":"possibility of returning corrupted data to clients, but"},{"line_number":12,"context_line":""},{"line_number":13,"context_line":"- Clients that only ever make ranged requests get no benefit and"},{"line_number":14,"context_line":"- MD5 can be rather CPU-intensive; this is especially noticeable"},{"line_number":15,"context_line":"  in all-flash clusters/policies where Swift is not disk-constrained."},{"line_number":16,"context_line":""}],"source_content_type":"text/x-gerrit-commit-message","patch_set":3,"id":"866c681d_5c0b9251","line":13,"updated":"2024-09-19 22:49:39.000000000","message":"this really is a bug I think.","commit_id":"64d216607f9fb948613ce1d74ecf17ec472c721b"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"a7e7a1db7e1ad63a97ed3f91e5861f1cafa6fa6c","unresolved":false,"context_lines":[{"line_number":10,"context_line":"whenever it was streaming the complete object. This minimizes the"},{"line_number":11,"context_line":"possibility of returning corrupted data to clients, but"},{"line_number":12,"context_line":""},{"line_number":13,"context_line":"- Clients that only ever make ranged requests get no benefit and"},{"line_number":14,"context_line":"- MD5 can be rather CPU-intensive; this is especially noticeable"},{"line_number":15,"context_line":"  in all-flash clusters/policies where Swift is not disk-constrained."},{"line_number":16,"context_line":""}],"source_content_type":"text/x-gerrit-commit-message","patch_set":3,"id":"2b6b28e3_87648372","line":13,"in_reply_to":"15d7f6d2_d4e27386","updated":"2024-12-17 04:54:41.000000000","message":"Acknowledged","commit_id":"64d216607f9fb948613ce1d74ecf17ec472c721b"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"85a5ad07b0928c6862f2281780b17edafc29f05a","unresolved":true,"context_lines":[{"line_number":10,"context_line":"whenever it was streaming the complete object. This minimizes the"},{"line_number":11,"context_line":"possibility of returning corrupted data to clients, but"},{"line_number":12,"context_line":""},{"line_number":13,"context_line":"- Clients that only ever make ranged requests get no benefit and"},{"line_number":14,"context_line":"- MD5 can be rather CPU-intensive; this is especially noticeable"},{"line_number":15,"context_line":"  in all-flash clusters/policies where Swift is not disk-constrained."},{"line_number":16,"context_line":""}],"source_content_type":"text/x-gerrit-commit-message","patch_set":3,"id":"15d7f6d2_d4e27386","line":13,"in_reply_to":"866c681d_5c0b9251","updated":"2024-10-07 23:22:38.000000000","message":"I don\u0027t know that there\u0027s much we can do for it, though. In the limit, I\u0027m not sure we can do much to help a client that wants to read a whole object one byte at a time, for example.\n\nMaybe we could add some checksum-per-mebibyte or so? Probably wouldn\u0027t be too bad to do that down in liberasurecode, with the per-fragment checksums -- but replicated policies will be more annoying, since we\u0027ll need to add to the storage overhead.","commit_id":"64d216607f9fb948613ce1d74ecf17ec472c721b"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb52b92714fd5ae23772a5027d03af2ca951a571","unresolved":true,"context_lines":[{"line_number":18,"context_line":"This takes values from 100 (default; all whole-object downloads are"},{"line_number":19,"context_line":"validated) down to 0 (none are)."},{"line_number":20,"context_line":""},{"line_number":21,"context_line":"Note that even with etag validation turned off, the object-auditor"},{"line_number":22,"context_line":"should eventually detect and quarantine corrupted objects."},{"line_number":23,"context_line":""},{"line_number":24,"context_line":"Change-Id: Iae48e8db642f6772114c0ae7c6bdd9c653cd035b"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":3,"id":"6e3551e5_e08c06ed","line":21,"updated":"2024-09-19 22:49:39.000000000","message":"I have some concern on the wording of this sentence. object-auditor won\u0027t help detect those transient read errors during client read path, if MD5 on object-server is skipped. I feel we should warn people clearly that they might read out corrupted data (chances are very low though) if etag validation ratio is less than 100.","commit_id":"64d216607f9fb948613ce1d74ecf17ec472c721b"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"85a5ad07b0928c6862f2281780b17edafc29f05a","unresolved":false,"context_lines":[{"line_number":18,"context_line":"This takes values from 100 (default; all whole-object downloads are"},{"line_number":19,"context_line":"validated) down to 0 (none are)."},{"line_number":20,"context_line":""},{"line_number":21,"context_line":"Note that even with etag validation turned off, the object-auditor"},{"line_number":22,"context_line":"should eventually detect and quarantine corrupted objects."},{"line_number":23,"context_line":""},{"line_number":24,"context_line":"Change-Id: Iae48e8db642f6772114c0ae7c6bdd9c653cd035b"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":3,"id":"fe537896_888bc5fa","line":21,"in_reply_to":"6e3551e5_e08c06ed","updated":"2024-10-07 23:22:38.000000000","message":"Done","commit_id":"64d216607f9fb948613ce1d74ecf17ec472c721b"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"a7e7a1db7e1ad63a97ed3f91e5861f1cafa6fa6c","unresolved":true,"context_lines":[{"line_number":20,"context_line":""},{"line_number":21,"context_line":"Note that even with etag validation turned off, the object-auditor"},{"line_number":22,"context_line":"should eventually detect and quarantine corrupted objects. However,"},{"line_number":23,"context_line":"transient read errors may cause clients to download corrupted data."},{"line_number":24,"context_line":""},{"line_number":25,"context_line":"Change-Id: Iae48e8db642f6772114c0ae7c6bdd9c653cd035b"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":4,"id":"f624fb31_68ec089e","line":23,"updated":"2024-12-17 04:54:41.000000000","message":"Worth to mention that, ``etag_validate_pct \u003d 0`` along with other system and OS optimizations contributed to a quite decent performance gain for single object server (from 65Gbps download throughput to 100Gbps NIC limit) in the SSD swift cluster testing, so it\u0027s going to be a great trade-off for some users.\n\nAlso after spending months on this work, profiling, identified MD5 as bottleneck, wrote a change to disable etag validation but didn\u0027t see performance gain, figured out new bottlenecks and got them solved, and setting up benchmark jobs in order to verify this patch, oh my, maybe also worth to mention me as a co-author. 😜","commit_id":"845394b4d2a4ac8db63733de90ff15979a1c30e7"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42ea92f040283a4ec3302a6f75913abfbe2b4239","unresolved":false,"context_lines":[{"line_number":20,"context_line":""},{"line_number":21,"context_line":"Note that even with etag validation turned off, the object-auditor"},{"line_number":22,"context_line":"should eventually detect and quarantine corrupted objects. However,"},{"line_number":23,"context_line":"transient read errors may cause clients to download corrupted data."},{"line_number":24,"context_line":""},{"line_number":25,"context_line":"Change-Id: Iae48e8db642f6772114c0ae7c6bdd9c653cd035b"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":4,"id":"cbc18f4c_3a845bc1","line":23,"in_reply_to":"f624fb31_68ec089e","updated":"2025-01-08 16:59:35.000000000","message":"Done","commit_id":"845394b4d2a4ac8db63733de90ff15979a1c30e7"}],"/PATCHSET_LEVEL":[{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb52b92714fd5ae23772a5027d03af2ca951a571","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":3,"id":"929b52e0_9a8ea2e0","updated":"2024-09-19 22:49:39.000000000","message":"This patch will be very useful to bring down CPU usages when etag validation is not strictly required, and the new percentage config is a great idea.\n\nHowever, the current implementation will lost ``self._started_at_0 \u003d True`` when etag validation is skipped, so ``self._handle_close_quarantine`` won\u0027t be called, but the total read bytes check is still valuable in this case, and I feel we should save this check even when etag validation is skipped.","commit_id":"64d216607f9fb948613ce1d74ecf17ec472c721b"},{"author":{"_account_id":6968,"name":"Christian Schwede","email":"cschwede@nvidia.com","username":"cschwede"},"change_message_id":"f41290addeed8439c1fad85d51fa9bb94834aee3","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":4,"id":"c541c117_d1c48e76","updated":"2024-10-22 23:05:23.000000000","message":"In case the object is not verified by the server during a download - does it make sense to add a header to notify the client about this? Clients might expect any downloaded object to be verified and need to know if an object might be corrupted (because it is not validated).","commit_id":"845394b4d2a4ac8db63733de90ff15979a1c30e7"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"e5d6a03cff1119ddc2e9a4ec0805ae33da9e1e92","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"1139ebf7_03658c0c","updated":"2024-10-17 15:58:15.000000000","message":"LGTM, I like the idea of new fraction config, then users can set a proper value per their object-server CPU usages.","commit_id":"845394b4d2a4ac8db63733de90ff15979a1c30e7"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42ea92f040283a4ec3302a6f75913abfbe2b4239","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"14bde583_3f7ef22a","updated":"2025-01-08 16:59:35.000000000","message":"This patch has been verified on test cluster, let\u0027s get it merged!","commit_id":"845394b4d2a4ac8db63733de90ff15979a1c30e7"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"2974ad9f2bb05972b544f13e56482b64c40e6e2b","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"5ac4b269_49de0f93","updated":"2024-12-13 00:08:25.000000000","message":"Verified on a testing cluster, with etag_validate_pct \u003d 0 that there would be no etag MD5 calculation performed and CPU usages got reduced.","commit_id":"845394b4d2a4ac8db63733de90ff15979a1c30e7"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"9ea493a38e89b4b77ebdca836ca17402d8d8b5d9","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"4d2f2a12_ee360433","in_reply_to":"1106bf3c_49dc4c61","updated":"2025-01-08 19:35:54.000000000","message":"also, this is a new option with default to be 100 (all whole-object downloads are\nvalidated), so if cluster operator is going to tune it down, he/she is expected to notify users.","commit_id":"845394b4d2a4ac8db63733de90ff15979a1c30e7"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"d96d49b5a4ef5d399c24e6f4dff387efcc18c73b","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":4,"id":"1106bf3c_49dc4c61","in_reply_to":"c541c117_d1c48e76","updated":"2024-12-12 17:41:38.000000000","message":"If a client is paying that much attention to corruption detection, it\u0027s probably already performing its own validation. At any rate, it *should* do it -- on-disk corruption is not the only concern: there\u0027s also in-memory corruption and network bit-flips to worry about.","commit_id":"845394b4d2a4ac8db63733de90ff15979a1c30e7"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"9ea493a38e89b4b77ebdca836ca17402d8d8b5d9","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":5,"id":"c5e29ee4_2e2ae44a","updated":"2025-01-08 19:35:54.000000000","message":"I forgot to click the \"+1\" on Workflow to kick off zuul merge process, let\u0027s do it again.","commit_id":"3d8fb046cbc439eb5e0b18d97b874c7f7b9ca489"}],"etc/object-server.conf-sample":[{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb52b92714fd5ae23772a5027d03af2ca951a571","unresolved":false,"context_lines":[{"line_number":160,"context_line":"# object-server. Since range-request-heavy clients don\u0027t get these integrity"},{"line_number":161,"context_line":"# checks, it seems reasonable to give operators a chance to tune it down and"},{"line_number":162,"context_line":"# instead rely on the object-auditor to detect and quarantine corrupted objects."},{"line_number":163,"context_line":"# etag_validate_pct \u003d 100"},{"line_number":164,"context_line":"#"},{"line_number":165,"context_line":"# on PUTs, sync data every n MB"},{"line_number":166,"context_line":"# mb_per_sync \u003d 512"}],"source_content_type":"application/octet-stream","patch_set":3,"id":"deb6329a_16851712","line":163,"updated":"2024-09-19 22:49:39.000000000","message":"Percentage is a better idea than a boolean value, user can adjust this value depends on how much free CPU they have on object nodes, it also help reduce the risk of reading corrupted data.","commit_id":"64d216607f9fb948613ce1d74ecf17ec472c721b"}],"swift/obj/diskfile.py":[{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb52b92714fd5ae23772a5027d03af2ca951a571","unresolved":true,"context_lines":[{"line_number":2157,"context_line":"    def _init_checks(self):"},{"line_number":2158,"context_line":"        if self._fp.tell() \u003d\u003d 0 and \\"},{"line_number":2159,"context_line":"                random.random() \u003c self._etag_validate_frac:"},{"line_number":2160,"context_line":"            self._started_at_0 \u003d True"},{"line_number":2161,"context_line":"            self._iter_etag \u003d md5(usedforsecurity\u003dFalse)"},{"line_number":2162,"context_line":""},{"line_number":2163,"context_line":"    def _update_checks(self, chunk):"}],"source_content_type":"text/x-python","patch_set":3,"id":"884d4e75_8f90675f","line":2160,"updated":"2024-09-19 22:49:39.000000000","message":"if ``random.random() \u003c self._etag_validate_frac``, even though ``self._fp.tell() \u003d\u003d 0``, we will lose ``self._started_at_0 \u003d True``. And ``self._started_at_0`` can be used later on to detect below  issue in function ``_handle_close_quarantine``.\n\n        if self._bytes_read !\u003d self._obj_size:\n            self._quarantine(\n                \"Bytes read: %s, does not match metadata: %s\" % (\n                    self._bytes_read, self._obj_size))","commit_id":"64d216607f9fb948613ce1d74ecf17ec472c721b"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"85a5ad07b0928c6862f2281780b17edafc29f05a","unresolved":false,"context_lines":[{"line_number":2157,"context_line":"    def _init_checks(self):"},{"line_number":2158,"context_line":"        if self._fp.tell() \u003d\u003d 0 and \\"},{"line_number":2159,"context_line":"                random.random() \u003c self._etag_validate_frac:"},{"line_number":2160,"context_line":"            self._started_at_0 \u003d True"},{"line_number":2161,"context_line":"            self._iter_etag \u003d md5(usedforsecurity\u003dFalse)"},{"line_number":2162,"context_line":""},{"line_number":2163,"context_line":"    def _update_checks(self, chunk):"}],"source_content_type":"text/x-python","patch_set":3,"id":"b54aebc6_051cb802","line":2160,"in_reply_to":"884d4e75_8f90675f","updated":"2024-10-07 23:22:38.000000000","message":"Good call! Yeah, we ought to keep the length validation since it\u0027s cheap.","commit_id":"64d216607f9fb948613ce1d74ecf17ec472c721b"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb52b92714fd5ae23772a5027d03af2ca951a571","unresolved":true,"context_lines":[{"line_number":2377,"context_line":"        if self._iter_etag and not self._md5_of_sent_bytes:"},{"line_number":2378,"context_line":"            self._md5_of_sent_bytes \u003d self._iter_etag.hexdigest()"},{"line_number":2379,"context_line":""},{"line_number":2380,"context_line":"        if self._bytes_read !\u003d self._obj_size:"},{"line_number":2381,"context_line":"            self._quarantine("},{"line_number":2382,"context_line":"                \"Bytes read: %s, does not match metadata: %s\" % ("},{"line_number":2383,"context_line":"                    self._bytes_read, self._obj_size))"}],"source_content_type":"text/x-python","patch_set":3,"id":"22356871_69924a70","line":2380,"updated":"2024-09-19 22:49:39.000000000","message":"this still will provide valuable check even without etag validation","commit_id":"64d216607f9fb948613ce1d74ecf17ec472c721b"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"85a5ad07b0928c6862f2281780b17edafc29f05a","unresolved":false,"context_lines":[{"line_number":2377,"context_line":"        if self._iter_etag and not self._md5_of_sent_bytes:"},{"line_number":2378,"context_line":"            self._md5_of_sent_bytes \u003d self._iter_etag.hexdigest()"},{"line_number":2379,"context_line":""},{"line_number":2380,"context_line":"        if self._bytes_read !\u003d self._obj_size:"},{"line_number":2381,"context_line":"            self._quarantine("},{"line_number":2382,"context_line":"                \"Bytes read: %s, does not match metadata: %s\" % ("},{"line_number":2383,"context_line":"                    self._bytes_read, self._obj_size))"}],"source_content_type":"text/x-python","patch_set":3,"id":"caf9cd17_c082846e","line":2380,"in_reply_to":"22356871_69924a70","updated":"2024-10-07 23:22:38.000000000","message":"Acknowledged","commit_id":"64d216607f9fb948613ce1d74ecf17ec472c721b"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb52b92714fd5ae23772a5027d03af2ca951a571","unresolved":true,"context_lines":[{"line_number":2396,"context_line":"        \"\"\""},{"line_number":2397,"context_line":"        if self._fp:"},{"line_number":2398,"context_line":"            try:"},{"line_number":2399,"context_line":"                if self._started_at_0 and self._read_to_eof:"},{"line_number":2400,"context_line":"                    self._handle_close_quarantine()"},{"line_number":2401,"context_line":"            except DiskFileQuarantined:"},{"line_number":2402,"context_line":"                raise"}],"source_content_type":"text/x-python","patch_set":3,"id":"697f2c43_420c1ad3","line":2399,"updated":"2024-09-19 22:49:39.000000000","message":"since we lost ``self._started_at_0 \u003d True`` when etag validation is skipped, ``self._handle_close_quarantine`` won\u0027t be called.","commit_id":"64d216607f9fb948613ce1d74ecf17ec472c721b"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"85a5ad07b0928c6862f2281780b17edafc29f05a","unresolved":false,"context_lines":[{"line_number":2396,"context_line":"        \"\"\""},{"line_number":2397,"context_line":"        if self._fp:"},{"line_number":2398,"context_line":"            try:"},{"line_number":2399,"context_line":"                if self._started_at_0 and self._read_to_eof:"},{"line_number":2400,"context_line":"                    self._handle_close_quarantine()"},{"line_number":2401,"context_line":"            except DiskFileQuarantined:"},{"line_number":2402,"context_line":"                raise"}],"source_content_type":"text/x-python","patch_set":3,"id":"9d3cf457_f694dfb5","line":2399,"in_reply_to":"697f2c43_420c1ad3","updated":"2024-10-07 23:22:38.000000000","message":"Acknowledged","commit_id":"64d216607f9fb948613ce1d74ecf17ec472c721b"}],"test/unit/obj/test_server.py":[{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"e5d6a03cff1119ddc2e9a4ec0805ae33da9e1e92","unresolved":false,"context_lines":[{"line_number":4315,"context_line":"        resp \u003d req.get_response(object_controller)"},{"line_number":4316,"context_line":"        self.assertEqual(\u0027404 Not Found\u0027, resp.status)"},{"line_number":4317,"context_line":"        self.assertFalse(os.path.exists(disk_file._datadir))"},{"line_number":4318,"context_line":"        self.assertTrue(os.path.exists(quar_dir))"},{"line_number":4319,"context_line":""},{"line_number":4320,"context_line":"    def test_GET_quarantine_zbyte(self):"},{"line_number":4321,"context_line":"        # Test swift.obj.server.ObjectController.GET"}],"source_content_type":"text/x-python","patch_set":4,"id":"c98c8d42_f663a1a2","line":4318,"updated":"2024-10-17 15:58:15.000000000","message":"with etag_validate_pct is set to 0, ETag mismatches are ignored, but other issue (size mismatches) still trigger quarantining.","commit_id":"845394b4d2a4ac8db63733de90ff15979a1c30e7"}]}
