)]}'
{"swift/obj/diskfile.py":[{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"99a06990e0f7df584e305d9690d9dbc1f0bf8c51","unresolved":false,"context_lines":[{"line_number":303,"context_line":""},{"line_number":304,"context_line":"    # Cleanup possible corrupted data"},{"line_number":305,"context_line":"    for suffix in hashes.keys():"},{"line_number":306,"context_line":"        if not isinstance(suffix, str):"},{"line_number":307,"context_line":"            del hashes[suffix]"},{"line_number":308,"context_line":""},{"line_number":309,"context_line":"    # hashes.pkl w/o valid updated key is \"valid\" but \"forever old\""}],"source_content_type":"text/x-python","patch_set":1,"id":"bfb3d3c7_ddcd6485","line":306,"range":{"start_line":306,"start_character":15,"end_line":306,"end_character":38},"updated":"2019-05-29 17:17:33.000000000","message":"So this is one type of possible corruption... what about the other obvious corruption (given the error message and repro in the bug report): having a str that includes a \u0027\\x00\u0027?\n\nWhile tring to repro that specific \"TypeError: must be encoded string without NULL bytes, not unicode\" message, I noticed that the null byte seems like it might be *the only* problem, at least for some os functions:\n\n\u003e\u003e\u003e os.listdir(u\u0027./\\x00\u0027)\nTraceback (most recent call last):\n  File \"\u003cstdin\u003e\", line 1, in \u003cmodule\u003e\nTypeError: listdir() argument 1 must be encoded string without null bytes, not unicode\n\u003e\u003e\u003e os.listdir(\u0027./\\x00\u0027)\nTraceback (most recent call last):\n  File \"\u003cstdin\u003e\", line 1, in \u003cmodule\u003e\nTypeError: listdir() argument 1 must be encoded string without null bytes, not str\n\u003e\u003e\u003e os.listdir(\u0027./\\xff\u0027)\nTraceback (most recent call last):\n  File \"\u003cstdin\u003e\", line 1, in \u003cmodule\u003e\nOSError: [Errno 2] No such file or directory: \u0027./\\xff\u0027\n\u003e\u003e\u003e os.listdir(u\u0027./\\xff\u0027)\nTraceback (most recent call last):\n  File \"\u003cstdin\u003e\", line 1, in \u003cmodule\u003e\nOSError: [Errno 2] No such file or directory: \u0027./\\xc3\\xbf\u0027\n\nI think we could make tighter assertions on those hashes keys, yeah? Maybe something like\n\n for key in hashes.keys():\n     if key not in (\u0027valid\u0027, \u0027updated\u0027) and (len(key) !\u003d 3 or any(c not in \u00270123456789abcdef\u0027 for c in key)):\n         return {\u0027valid\u0027: False}","commit_id":"9fe0ab84b77e9a5dcf0089d5d598bf365500f8dd"},{"author":{"_account_id":6968,"name":"Christian Schwede","email":"cschwede@redhat.com","username":"cschwede"},"change_message_id":"94bf03d4860b7cba6ab29633c84f7c7f634db29c","unresolved":false,"context_lines":[{"line_number":303,"context_line":""},{"line_number":304,"context_line":"    # Cleanup possible corrupted data"},{"line_number":305,"context_line":"    for suffix in hashes.keys():"},{"line_number":306,"context_line":"        if not isinstance(suffix, str):"},{"line_number":307,"context_line":"            del hashes[suffix]"},{"line_number":308,"context_line":""},{"line_number":309,"context_line":"    # hashes.pkl w/o valid updated key is \"valid\" but \"forever old\""}],"source_content_type":"text/x-python","patch_set":1,"id":"bfb3d3c7_68b494f2","line":306,"range":{"start_line":306,"start_character":15,"end_line":306,"end_character":38},"in_reply_to":"bfb3d3c7_ddcd6485","updated":"2019-05-31 08:58:49.000000000","message":"\u003e So this is one type of possible corruption... what about the other\n \u003e obvious corruption (given the error message and repro in the bug\n \u003e report): having a str that includes a \u0027\\x00\u0027?\n\nTrue - I think checking if the key is actually alpha-numeric makes most sense then?\n\n \u003e I think we could make tighter assertions on those hashes keys,\n \u003e yeah? Maybe something like\n \u003e \n \u003e for key in hashes.keys():\n \u003e if key not in (\u0027valid\u0027, \u0027updated\u0027) and (len(key) !\u003d 3 or any(c not\n \u003e in \u00270123456789abcdef\u0027 for c in key)):\n \u003e return {\u0027valid\u0027: False}\n\nThese are already handled today, though not in read_hashes(). I tested various invalid hash entries in the pkl file, and all of them besides the one containing a NULL byte got fixed eventually with todays code.\n\nSo maybe a \"if not key.isalnum()\" is enough? That works fine on py2 and py3 then.","commit_id":"9fe0ab84b77e9a5dcf0089d5d598bf365500f8dd"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"99a06990e0f7df584e305d9690d9dbc1f0bf8c51","unresolved":false,"context_lines":[{"line_number":304,"context_line":"    # Cleanup possible corrupted data"},{"line_number":305,"context_line":"    for suffix in hashes.keys():"},{"line_number":306,"context_line":"        if not isinstance(suffix, str):"},{"line_number":307,"context_line":"            del hashes[suffix]"},{"line_number":308,"context_line":""},{"line_number":309,"context_line":"    # hashes.pkl w/o valid updated key is \"valid\" but \"forever old\""},{"line_number":310,"context_line":"    hashes.setdefault(\u0027valid\u0027, True)"}],"source_content_type":"text/x-python","patch_set":1,"id":"bfb3d3c7_ea5d0047","line":307,"updated":"2019-05-29 17:17:33.000000000","message":"If there\u0027s been some corruption, why are we assuming that any of the rest of it is OK? I was kind of expecting us to just nuke it and\n\n return {\u0027valid\u0027: False}","commit_id":"9fe0ab84b77e9a5dcf0089d5d598bf365500f8dd"},{"author":{"_account_id":6968,"name":"Christian Schwede","email":"cschwede@redhat.com","username":"cschwede"},"change_message_id":"94bf03d4860b7cba6ab29633c84f7c7f634db29c","unresolved":false,"context_lines":[{"line_number":304,"context_line":"    # Cleanup possible corrupted data"},{"line_number":305,"context_line":"    for suffix in hashes.keys():"},{"line_number":306,"context_line":"        if not isinstance(suffix, str):"},{"line_number":307,"context_line":"            del hashes[suffix]"},{"line_number":308,"context_line":""},{"line_number":309,"context_line":"    # hashes.pkl w/o valid updated key is \"valid\" but \"forever old\""},{"line_number":310,"context_line":"    hashes.setdefault(\u0027valid\u0027, True)"}],"source_content_type":"text/x-python","patch_set":1,"id":"bfb3d3c7_2d10ee56","line":307,"in_reply_to":"bfb3d3c7_ea5d0047","updated":"2019-05-31 08:58:49.000000000","message":"Yes, let\u0027s just invalidate it and re-calculate it.","commit_id":"9fe0ab84b77e9a5dcf0089d5d598bf365500f8dd"}],"test/unit/obj/test_diskfile.py":[{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"99a06990e0f7df584e305d9690d9dbc1f0bf8c51","unresolved":false,"context_lines":[{"line_number":8105,"context_line":"        self.assertEqual(hashes, result)"},{"line_number":8106,"context_line":""},{"line_number":8107,"context_line":"    def test_ignore_corrupted_hashes(self):"},{"line_number":8108,"context_line":"        corrupted_hashes \u003d {u\u0027\\x00\\x00\\x00\u0027: False}"},{"line_number":8109,"context_line":"        diskfile.write_hashes(self.testdir, corrupted_hashes)"},{"line_number":8110,"context_line":"        result \u003d diskfile.read_hashes(self.testdir)"},{"line_number":8111,"context_line":"        self.assertNotIn(\u0027\\x00\\x00\\x00\u0027, result)"}],"source_content_type":"text/x-python","patch_set":1,"id":"bfb3d3c7_fda9488f","line":8108,"range":{"start_line":8108,"start_character":28,"end_line":8108,"end_character":42},"updated":"2019-05-29 17:17:33.000000000","message":"But this *is* a str on py3 :-/","commit_id":"9fe0ab84b77e9a5dcf0089d5d598bf365500f8dd"},{"author":{"_account_id":6968,"name":"Christian Schwede","email":"cschwede@redhat.com","username":"cschwede"},"change_message_id":"94bf03d4860b7cba6ab29633c84f7c7f634db29c","unresolved":false,"context_lines":[{"line_number":8105,"context_line":"        self.assertEqual(hashes, result)"},{"line_number":8106,"context_line":""},{"line_number":8107,"context_line":"    def test_ignore_corrupted_hashes(self):"},{"line_number":8108,"context_line":"        corrupted_hashes \u003d {u\u0027\\x00\\x00\\x00\u0027: False}"},{"line_number":8109,"context_line":"        diskfile.write_hashes(self.testdir, corrupted_hashes)"},{"line_number":8110,"context_line":"        result \u003d diskfile.read_hashes(self.testdir)"},{"line_number":8111,"context_line":"        self.assertNotIn(\u0027\\x00\\x00\\x00\u0027, result)"}],"source_content_type":"text/x-python","patch_set":1,"id":"bfb3d3c7_487bd026","line":8108,"range":{"start_line":8108,"start_character":28,"end_line":8108,"end_character":42},"in_reply_to":"bfb3d3c7_fda9488f","updated":"2019-05-31 08:58:49.000000000","message":"Changed the isinstance(str) to check for isalnum(), which catches this as expected on py2 and py3.","commit_id":"9fe0ab84b77e9a5dcf0089d5d598bf365500f8dd"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"99a06990e0f7df584e305d9690d9dbc1f0bf8c51","unresolved":false,"context_lines":[{"line_number":8108,"context_line":"        corrupted_hashes \u003d {u\u0027\\x00\\x00\\x00\u0027: False}"},{"line_number":8109,"context_line":"        diskfile.write_hashes(self.testdir, corrupted_hashes)"},{"line_number":8110,"context_line":"        result \u003d diskfile.read_hashes(self.testdir)"},{"line_number":8111,"context_line":"        self.assertNotIn(\u0027\\x00\\x00\\x00\u0027, result)"},{"line_number":8112,"context_line":""},{"line_number":8113,"context_line":"if __name__ \u003d\u003d \u0027__main__\u0027:"},{"line_number":8114,"context_line":"    unittest.main()"}],"source_content_type":"text/x-python","patch_set":1,"id":"bfb3d3c7_dd924457","line":8111,"updated":"2019-05-29 17:17:33.000000000","message":"I think this test ought to assert whether result[\u0027valid\u0027] is true or false.","commit_id":"9fe0ab84b77e9a5dcf0089d5d598bf365500f8dd"},{"author":{"_account_id":6968,"name":"Christian Schwede","email":"cschwede@redhat.com","username":"cschwede"},"change_message_id":"94bf03d4860b7cba6ab29633c84f7c7f634db29c","unresolved":false,"context_lines":[{"line_number":8108,"context_line":"        corrupted_hashes \u003d {u\u0027\\x00\\x00\\x00\u0027: False}"},{"line_number":8109,"context_line":"        diskfile.write_hashes(self.testdir, corrupted_hashes)"},{"line_number":8110,"context_line":"        result \u003d diskfile.read_hashes(self.testdir)"},{"line_number":8111,"context_line":"        self.assertNotIn(\u0027\\x00\\x00\\x00\u0027, result)"},{"line_number":8112,"context_line":""},{"line_number":8113,"context_line":"if __name__ \u003d\u003d \u0027__main__\u0027:"},{"line_number":8114,"context_line":"    unittest.main()"}],"source_content_type":"text/x-python","patch_set":1,"id":"bfb3d3c7_e89c046b","line":8111,"in_reply_to":"bfb3d3c7_dd924457","updated":"2019-05-31 08:58:49.000000000","message":"Done","commit_id":"9fe0ab84b77e9a5dcf0089d5d598bf365500f8dd"}]}
