)]}' {"/COMMIT_MSG":[{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"21c22828a364ca7ceca1e84b584a78fad440e5ab","unresolved":true,"context_lines":[{"line_number":18,"context_line":"Additionally, add a new tunable, `stale_worker_timeout`. Once this many"},{"line_number":19,"context_line":"seconds pass following a reload, the manager process will issue SIGKILLs"},{"line_number":20,"context_line":"to the remaining stale workers. This ensures there is some limit to how"},{"line_number":21,"context_line":"long old code and configs may still be running."},{"line_number":22,"context_line":""},{"line_number":23,"context_line":"Drive-by: Add logging to ServersPerPortStrategy.register_worker_exit"},{"line_number":24,"context_line":"that\u0027s comparable to what WorkersStrategy does."}],"source_content_type":"text/x-gerrit-commit-message","patch_set":7,"id":"af1f6e1c_2edf4a6d","line":21,"updated":"2023-04-17 17:10:59.000000000","message":"Really, this is the meat of the change -- I should reword this to highlight it, and mention the cleaned up logging only in passing.","commit_id":"ada5d346658347527977c106c3975024d59ae50d"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"3f1933f88b26f11c91382bfda04459df83426574","unresolved":false,"context_lines":[{"line_number":18,"context_line":"Additionally, add a new tunable, `stale_worker_timeout`. Once this many"},{"line_number":19,"context_line":"seconds pass following a reload, the manager process will issue SIGKILLs"},{"line_number":20,"context_line":"to the remaining stale workers. This ensures there is some limit to how"},{"line_number":21,"context_line":"long old code and configs may still be running."},{"line_number":22,"context_line":""},{"line_number":23,"context_line":"Drive-by: Add logging to ServersPerPortStrategy.register_worker_exit"},{"line_number":24,"context_line":"that\u0027s comparable to what WorkersStrategy does."}],"source_content_type":"text/x-gerrit-commit-message","patch_set":7,"id":"f90f1106_f0866bd1","line":21,"in_reply_to":"af1f6e1c_2edf4a6d","updated":"2023-11-03 21:32:19.000000000","message":"Ack","commit_id":"ada5d346658347527977c106c3975024d59ae50d"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"5a77fe5bfe160442b5a76f832ceac0c5829ff477","unresolved":true,"context_lines":[{"line_number":8,"context_line":""},{"line_number":9,"context_line":"Add a new tunable, `stale_worker_timeout`, defaulting to 7200 (i.e. two hours)."},{"line_number":10,"context_line":"Once this time elapses following a reload, the manager process will issue"},{"line_number":11,"context_line":"SIGKILLs to any remaining stale workers."},{"line_number":12,"context_line":""},{"line_number":13,"context_line":"This gives operators a way to configure a limit for how long old code and"},{"line_number":14,"context_line":"configs may still be running in their cluster."}],"source_content_type":"text/x-gerrit-commit-message","patch_set":8,"id":"9186c04f_4d84f10e","line":11,"updated":"2023-04-17 15:45:08.000000000","message":"I don\u0027t think i understand the behavior before we had this timeout - maybe the new parent process would leave orphans running until they were cleaned up externally?","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"3f1933f88b26f11c91382bfda04459df83426574","unresolved":false,"context_lines":[{"line_number":8,"context_line":""},{"line_number":9,"context_line":"Add a new tunable, `stale_worker_timeout`, defaulting to 7200 (i.e. two hours)."},{"line_number":10,"context_line":"Once this time elapses following a reload, the manager process will issue"},{"line_number":11,"context_line":"SIGKILLs to any remaining stale workers."},{"line_number":12,"context_line":""},{"line_number":13,"context_line":"This gives operators a way to configure a limit for how long old code and"},{"line_number":14,"context_line":"configs may still be running in their cluster."}],"source_content_type":"text/x-gerrit-commit-message","patch_set":8,"id":"aeb893db_72d7b484","line":11,"in_reply_to":"3d8e6842_e75f5d3e","updated":"2023-11-03 21:32:19.000000000","message":"Ack","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"21c22828a364ca7ceca1e84b584a78fad440e5ab","unresolved":true,"context_lines":[{"line_number":8,"context_line":""},{"line_number":9,"context_line":"Add a new tunable, `stale_worker_timeout`, defaulting to 7200 (i.e. two hours)."},{"line_number":10,"context_line":"Once this time elapses following a reload, the manager process will issue"},{"line_number":11,"context_line":"SIGKILLs to any remaining stale workers."},{"line_number":12,"context_line":""},{"line_number":13,"context_line":"This gives operators a way to configure a limit for how long old code and"},{"line_number":14,"context_line":"configs may still be running in their cluster."}],"source_content_type":"text/x-gerrit-commit-message","patch_set":8,"id":"3d8e6842_e75f5d3e","line":11,"in_reply_to":"9186c04f_4d84f10e","updated":"2023-04-17 17:10:59.000000000","message":"Yes -- the assumption was that orphans would gracefully stop after completing their outstanding requests. But we\u0027ve seen ops needing to manually go around killing orphans hours after a reload -- and `swift-orphans` (or other, comparable external-to-the-process orphan checkers) often won\u0027t catch it because the children are still tied to the active manager process.\n\nYou *might* be able to make a better `swift-orphans` by comparing etimes across children... but it seems to me that the manager process is a reasonable locus of responsibility.","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"5a77fe5bfe160442b5a76f832ceac0c5829ff477","unresolved":true,"context_lines":[{"line_number":26,"context_line":""},{"line_number":27,"context_line":"With the improved knowledge of subprocesses, we can kick the log level"},{"line_number":28,"context_line":"for the above message up from info to warning; we no longer expect it to"},{"line_number":29,"context_line":"trigger in practice."},{"line_number":30,"context_line":""},{"line_number":31,"context_line":"Drive-by: Add logging to ServersPerPortStrategy.register_worker_exit"},{"line_number":32,"context_line":"that\u0027s comparable to what WorkersStrategy does."}],"source_content_type":"text/x-gerrit-commit-message","patch_set":8,"id":"f94cc278_33e34a9d","line":29,"updated":"2023-04-17 15:45:08.000000000","message":"that sounds better than what we have","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"3f1933f88b26f11c91382bfda04459df83426574","unresolved":false,"context_lines":[{"line_number":26,"context_line":""},{"line_number":27,"context_line":"With the improved knowledge of subprocesses, we can kick the log level"},{"line_number":28,"context_line":"for the above message up from info to warning; we no longer expect it to"},{"line_number":29,"context_line":"trigger in practice."},{"line_number":30,"context_line":""},{"line_number":31,"context_line":"Drive-by: Add logging to ServersPerPortStrategy.register_worker_exit"},{"line_number":32,"context_line":"that\u0027s comparable to what WorkersStrategy does."}],"source_content_type":"text/x-gerrit-commit-message","patch_set":8,"id":"1ce80fca_a604d4f8","line":29,"in_reply_to":"2e1e912e_20e4908f","updated":"2023-11-03 21:32:19.000000000","message":"Ack","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"21c22828a364ca7ceca1e84b584a78fad440e5ab","unresolved":true,"context_lines":[{"line_number":26,"context_line":""},{"line_number":27,"context_line":"With the improved knowledge of subprocesses, we can kick the log level"},{"line_number":28,"context_line":"for the above message up from info to warning; we no longer expect it to"},{"line_number":29,"context_line":"trigger in practice."},{"line_number":30,"context_line":""},{"line_number":31,"context_line":"Drive-by: Add logging to ServersPerPortStrategy.register_worker_exit"},{"line_number":32,"context_line":"that\u0027s comparable to what WorkersStrategy does."}],"source_content_type":"text/x-gerrit-commit-message","patch_set":8,"id":"2e1e912e_20e4908f","line":29,"in_reply_to":"f94cc278_33e34a9d","updated":"2023-04-17 17:10:59.000000000","message":"Yeah, it was stupid -- this was actually where I started on this patch, if you look back through the patchsets, \u0027cause it just seemed *so dumb*.","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"3f1933f88b26f11c91382bfda04459df83426574","unresolved":true,"context_lines":[{"line_number":17,"context_line":"to complete then closes the accept socket on all the old workers) has"},{"line_number":18,"context_line":"grown the ability to send state to the re-exec\u0027ed manager. Currently,"},{"line_number":19,"context_line":"this is limited to just the set of pre-re-exec child PIDs and their"},{"line_number":20,"context_line":"reload times, though it was designed to be reasonably extensible."},{"line_number":21,"context_line":""},{"line_number":22,"context_line":"This allows the new manager to recognize stale workers as they exit"},{"line_number":23,"context_line":"instead of logging"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":14,"id":"983fbc71_df595446","line":20,"updated":"2023-11-03 21:32:19.000000000","message":"i need like a diagram - something with crayon - who is the temporary reload child; who is the re-exec\u0027d manager.\n\nwhat is \"pre-re-exec child PIDS\" - do we spawn workers we can\u0027t monitor? Or is this something to do with old code during upgrades?\n\nWith this change, if I SIGUSR1 - I still get a new manager (same pid, exec\u0027d new code) forking off new workers (who tell it when they\u0027re online so he can notify systemd); but the OLD manager (who came out of a fork, so new child pid) stays around until all the children die or it has to kill them?\n\ndo I have any of that right?","commit_id":"c242f1520bc741a24822a712e4a9758a61832227"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"789e9dbdd634756149299b5b52fda7fb079416b1","unresolved":true,"context_lines":[{"line_number":17,"context_line":"to complete then closes the accept socket on all the old workers) has"},{"line_number":18,"context_line":"grown the ability to send state to the re-exec\u0027ed manager. Currently,"},{"line_number":19,"context_line":"this is limited to just the set of pre-re-exec child PIDs and their"},{"line_number":20,"context_line":"reload times, though it was designed to be reasonably extensible."},{"line_number":21,"context_line":""},{"line_number":22,"context_line":"This allows the new manager to recognize stale workers as they exit"},{"line_number":23,"context_line":"instead of logging"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":14,"id":"29052150_8cf3e722","line":20,"in_reply_to":"54193cd2_8198fc97","updated":"2024-12-12 21:16:17.000000000","message":"I\u0027m not sure what to do with this thread. There was some confusion; is it resolved?\n\nI think the root of the confusion was ever introducing the idea of an \"old manager\". There\u0027s _the_ manager, which occasionally re-exec\u0027s itself. Rather than \"old\" and \"new\", it\u0027s probably better to think about it as \"pre-re-exec\" and \"post-re-exec\".\n\nBy and large, though, it\u0027s in the business of forking, and occasionally it does that to help itself through the re-exec. I think the docs at https://docs.openstack.org/swift/latest/overview_wsgi_management.html agree with all of that.","commit_id":"c242f1520bc741a24822a712e4a9758a61832227"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"5eb46e85e443875a642c9fa327b0000ce451cfd3","unresolved":true,"context_lines":[{"line_number":17,"context_line":"to complete then closes the accept socket on all the old workers) has"},{"line_number":18,"context_line":"grown the ability to send state to the re-exec\u0027ed manager. Currently,"},{"line_number":19,"context_line":"this is limited to just the set of pre-re-exec child PIDs and their"},{"line_number":20,"context_line":"reload times, though it was designed to be reasonably extensible."},{"line_number":21,"context_line":""},{"line_number":22,"context_line":"This allows the new manager to recognize stale workers as they exit"},{"line_number":23,"context_line":"instead of logging"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":14,"id":"abb22954_7c5e12ec","line":20,"in_reply_to":"983fbc71_df595446","updated":"2023-11-07 00:02:34.000000000","message":"Would https://review.opendev.org/c/openstack/swift/+/900256 help? [Preview](https://751d0174016ee5b4fc58-b1ef20a786528726723bdd34809a6289.ssl.cf2.rackcdn.com/900256/1/check/openstack-tox-docs/d6874f3/docs/overview_wsgi_reload.html)\n\n\u003e what is \"pre-re-exec child PIDS\"\n\nThe old workers\n\n\u003e do we spawn workers we can\u0027t monitor?\n\nKind of -- we *used* to be minding the old workers, then we re-exec\u0027ed, so we lost track of all that state.\n\n\u003e Or is this something to do with old code during upgrades?\n\nNot just upgrades; it impacts config changes, too.\n\n\u003e the OLD manager (who came out of a fork, so new child pid)\n\nWhat? No -- old manager has to wrap up by re-exec\u0027ing into the new manager -- otherwise we can\u0027t keep the same PID. We fork off a socket-closer, but it won\u0027t\n\n\u003e [stay] around until all the children die or it has to kill them\n\nI can think of a few reason why we *wouldn\u0027t* want it to do that:\n\n- It would split management responsibilities across multiple processes, potentially running different versions of the code.\n- It wouldn\u0027t be able to use `os.wait`, as the socket-closer has no children -- it would need to just periodically poll the old workers.\n- The new manager still reaps the old workers as they exit (because it *is* the parent), so we would continue to get a bunch of\n ```\n Ignoring wait() result from unknown PID ...\n ```\n messages.","commit_id":"c242f1520bc741a24822a712e4a9758a61832227"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"20fcd7921037434fd1541dd4f0388261ff944e26","unresolved":true,"context_lines":[{"line_number":17,"context_line":"to complete then closes the accept socket on all the old workers) has"},{"line_number":18,"context_line":"grown the ability to send state to the re-exec\u0027ed manager. Currently,"},{"line_number":19,"context_line":"this is limited to just the set of pre-re-exec child PIDs and their"},{"line_number":20,"context_line":"reload times, though it was designed to be reasonably extensible."},{"line_number":21,"context_line":""},{"line_number":22,"context_line":"This allows the new manager to recognize stale workers as they exit"},{"line_number":23,"context_line":"instead of logging"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":14,"id":"54193cd2_8198fc97","line":20,"in_reply_to":"abb22954_7c5e12ec","updated":"2023-11-07 21:57:12.000000000","message":"In my reading the manager that calls fork to create the socket closer very quickly reexecs - while the \"clone\" of the \"old manager\" created with fork lives on holding onto the list of pids of the old workers and their sockets in it\u0027s strategy brain... so I think the name \"old manager\" is kind of ambigous. We we fork we have to copies of the exact some process - and at the time of fork it\u0027s the only manager process their is - it doesn\u0027t become \"old\" until one of them reexecs creating a *new* manager leaving behind... the... old... \"socket closer\"\n\nexcept but when you fork you don\u0027t inherit child processes\u0027s to call wait on them!?","commit_id":"c242f1520bc741a24822a712e4a9758a61832227"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"c35c7759f7ea89c9761d8ce53c1d28e8f0e5a91e","unresolved":true,"context_lines":[{"line_number":8,"context_line":""},{"line_number":9,"context_line":"Add a new tunable, `stale_worker_timeout`, defaulting to 86400 (i.e. 24"},{"line_number":10,"context_line":"hours). Once this time elapses following a reload, the manager process"},{"line_number":11,"context_line":"will issue SIGKILLs to any remaining stale workers."},{"line_number":12,"context_line":""},{"line_number":13,"context_line":"This gives operators a way to configure a limit for how long old code"},{"line_number":14,"context_line":"and configs may still be running in their cluster."}],"source_content_type":"text/x-gerrit-commit-message","patch_set":16,"id":"407ec01c_786d8eb2","line":11,"updated":"2023-11-13 18:20:44.000000000","message":"Hmm... I wonder if we should change the `HUP` behavior to deal with long-running requests, too...","commit_id":"9de6a504f40b0523b6413f4e556c99873d234323"}],"/PATCHSET_LEVEL":[{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"c15bb5a2c985782d98447ac711254fd27209186a","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":8,"id":"fa871c90_0ca56b5e","updated":"2023-04-18 16:03:18.000000000","message":"If nothing else, I really _should_ re-wrap that commit message.","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"5a77fe5bfe160442b5a76f832ceac0c5829ff477","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":8,"id":"c8f24166_92a10e6a","updated":"2023-04-17 15:45:08.000000000","message":"the commit message line length seems to be wrapping, not sure if that\u0027s a big deal\n\nThere\u0027s some singal handling tests in probe/test_signals.py - but I don\u0027t think any of them handle SIGUSR1. I\u0027m not exactly sure what a good test for this change should look like. Can I repro a orphan worker, maybe by holding a connection? Is this related to https://bugs.launchpad.net/swift/+bug/1792615","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"50326a2eb0d19986323078a783a604f20e3e7b94","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":9,"id":"6ec26c2b_f7d75fed","updated":"2023-04-28 22:00:06.000000000","message":"recheck\n\nFailure was caused by https://bugs.launchpad.net/swift/+bug/2017021 -- now resolved.","commit_id":"72add5ee9b47593fd1887ba4bd4a730406cb5e6c"},{"author":{"_account_id":22042,"name":"Chris Smart","email":"distroguy@gmail.com","username":"csmart"},"change_message_id":"969e935521bd2d1d458cbb30caaa02a5df72e596","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":11,"id":"7fae4ebc_b35ac8ae","updated":"2023-06-09 06:47:48.000000000","message":"The approach looks good to me, although maybe the default of 7200 sec is a bit on the short side in that there could be many users who are on slow connections? And there might not be much harm in letting them finish that off, even though it\u0027s on older Swift code? FWIW we currently have monitoring that alerts on orphaned pids after 24 hours and then kills them with Ansible.","commit_id":"1cabdd0aea4ab847c35e5d5d8643c07bbec18730"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"8f3731b120e045980e6b5c5633c1084359ccf668","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":11,"id":"81b1535c_7f6cf1cb","in_reply_to":"7fae4ebc_b35ac8ae","updated":"2023-06-09 21:55:47.000000000","message":"24h seems reasonable; it\u0027d match the default for `max_get_time` in slo and dlo. I might look at preserving the scheduled kill time across reloads -- I wasn\u0027t too worried about it with the lower time, but I could kinda see someone (either by chance or policy) reloading their proxies at least once a day for a week or something and being confused about why there\u0027s longer-living workers than expected.","commit_id":"1cabdd0aea4ab847c35e5d5d8643c07bbec18730"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"17902fe7ab53867324ee4a1cca354dd61ef0708d","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":13,"id":"8abc44fe_1a653ac5","updated":"2023-06-27 00:46:46.000000000","message":"I think of all the patches I\u0027m considering carrying this is the one that seems most likely to accidently have some un-expected interaction with our deployment stuff and cause a fire-drill.","commit_id":"4e38af269558bb7f8c3b38f3f1d448172d392d30"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"959f6af57336e73a2726753f5937631c036da643","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":13,"id":"5c6291b1_16692c55","in_reply_to":"8abc44fe_1a653ac5","updated":"2023-08-21 22:26:27.000000000","message":"We\u0027ve had this running a couple months; did it cause a fire-drill? 😜","commit_id":"4e38af269558bb7f8c3b38f3f1d448172d392d30"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"5eb46e85e443875a642c9fa327b0000ce451cfd3","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":14,"id":"a51b1ee2_18b05707","updated":"2023-11-07 00:02:34.000000000","message":"Will respin.","commit_id":"c242f1520bc741a24822a712e4a9758a61832227"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"3f1933f88b26f11c91382bfda04459df83426574","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":14,"id":"d5975550_0dc88f2b","updated":"2023-11-03 21:32:19.000000000","message":"we\u0027ve been running this in prod for a LONG time and SRE isn\u0027t complaining about stuck orphan processes TO ME...\n\n... but I can\u0027t merge this - I don\u0027t know how it works! I promise I\u0027m trying.","commit_id":"c242f1520bc741a24822a712e4a9758a61832227"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"1ff9b803cf4dd8cadf536746d2d1652a7e8a18cd","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":16,"id":"26d289e5_9bce5464","updated":"2023-11-08 17:41:17.000000000","message":"I realized I\u0027ve got this nagging feeling like we could still drop some connections during a reload -- like, what happens to all the connections in the listen socket queue (waiting for us to call `accept`) when you close the listen socket? I\u0027d be surprised if the kernel had some way to redistribute those across whatever other listen sockets have been opened...","commit_id":"9de6a504f40b0523b6413f4e556c99873d234323"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"20fcd7921037434fd1541dd4f0388261ff944e26","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":16,"id":"1ab7af1e_f959a9a8","updated":"2023-11-07 21:57:12.000000000","message":"I\u0027m not sure where I got the idea that the socket-closer-fork-pattern wasn\u0027t something that was already happening on master. I REALLY appreciate the docs; but I\u0027m still getting up to speed and I don\u0027t think I can review this properly until I better understand how it works today.\n\nIs the fork/IPC model necessary for *correctness* when the manager recieves SIGUSR1 and wants to reexec and spawn new workers with new code then signal the old workers listen socket to close and wait for them to finish? Or is the fork/IPC model mostly tactical to separate concerns, i.e. split the running process in two: one copy of the process will keep the existing pid, spawn new workers and live on - while the copy will shutdown the old workers and die.","commit_id":"9de6a504f40b0523b6413f4e556c99873d234323"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"390551729fb1d65df5c5726b4f80b931729ab7d1","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":17,"id":"f6a59710_1950cbde","updated":"2023-11-10 17:43:19.000000000","message":"I keep breaking this function in super obvious ways everytime I try to work on it and find the manual testing loop somewhat tedius for dumb stuff like type errors. I really think we should shore up testing so we can get more serious about testing reload from legacy environment on upgrade.\n\nhttps://review.opendev.org/c/openstack/swift/+/900054\n\nI have a pretty solid handle on what this change is doing and how. I couldn\u0027t have done it without https://review.opendev.org/c/openstack/swift/+/900256/3 and a great deal of patient explination on your part. I would like to undrestand more about the motivations behind the pipe/binary/json message protcol - where are you going with this?\n\nIf the implementation is hard to explain, it\u0027s a bad idea.\nIf the implementation is easy to explain, it may be a good idea.\nhttps://github.com/NVIDIA/swift/blob/master/CONTRIBUTING.rst#swift-design-principles","commit_id":"c8e775a94eb1350c1650e91f7615ae83b2e5081f"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"789e9dbdd634756149299b5b52fda7fb079416b1","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":20,"id":"c73b9d1a_7e5191d9","updated":"2024-12-12 21:16:17.000000000","message":"Added some more logging.\n\nWe\u0027ve been running with this for months; are we content to merge it yet?","commit_id":"76ef9c3d7fe24787139d4b2ebcd22e8bb9c33cef"},{"author":{"_account_id":7233,"name":"Matthew Oliver","email":"matt@oliver.net.au","username":"mattoliverau"},"change_message_id":"d30b4d3d27ac431deba320efee4f1b1abedbe1eb","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":20,"id":"cebff6e2_6c80811d","updated":"2024-12-12 21:52:40.000000000","message":"Yeah, I think we\u0027ve been running this long enough and does what it\u0027s suppse to do.","commit_id":"76ef9c3d7fe24787139d4b2ebcd22e8bb9c33cef"}],"doc/source/overview_wsgi_reload.rst":[{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"20fcd7921037434fd1541dd4f0388261ff944e26","unresolved":true,"context_lines":[{"line_number":39,"context_line":" connections are picked up by new workers. Once an old worker completes"},{"line_number":40,"context_line":" all of its oustanding requests, it exits. Beginning with Swift 2.33.0,"},{"line_number":41,"context_line":" if any workers persist beyond ``stale_worker_timeout``, the new manager"},{"line_number":42,"context_line":" will clean them up with ``KILL`` signals."},{"line_number":43,"context_line":""},{"line_number":44,"context_line":".. image:: images/reload_process_tree_6.svg"},{"line_number":45,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"b5b13c43_3b20b0d0","line":42,"updated":"2023-11-07 21:57:12.000000000","message":"wait... so this change is only adding this small new behavior?","commit_id":"9de6a504f40b0523b6413f4e556c99873d234323"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"789e9dbdd634756149299b5b52fda7fb079416b1","unresolved":true,"context_lines":[{"line_number":39,"context_line":" connections are picked up by new workers. Once an old worker completes"},{"line_number":40,"context_line":" all of its oustanding requests, it exits. Beginning with Swift 2.33.0,"},{"line_number":41,"context_line":" if any workers persist beyond ``stale_worker_timeout``, the new manager"},{"line_number":42,"context_line":" will clean them up with ``KILL`` signals."},{"line_number":43,"context_line":""},{"line_number":44,"context_line":".. image:: images/reload_process_tree_6.svg"},{"line_number":45,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"6fa24a87_152a9ec4","line":42,"in_reply_to":"b5b13c43_3b20b0d0","updated":"2024-12-12 21:16:17.000000000","message":"Yes, that\u0027s it: pass the list of known processes to the newly-re-exec\u0027d manager, and have it start killing them after a (large) timeout.","commit_id":"9de6a504f40b0523b6413f4e556c99873d234323"}],"etc/proxy-server.conf-sample":[{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"5a77fe5bfe160442b5a76f832ceac0c5829ff477","unresolved":true,"context_lines":[{"line_number":328,"context_line":"# configure the grace period (in seconds), after which the reloaded server"},{"line_number":329,"context_line":"# will issue SIGKILLs to remaining stale workers."},{"line_number":330,"context_line":"# stale_worker_timeout \u003d 7200"},{"line_number":331,"context_line":"#"},{"line_number":332,"context_line":"# When upgrading from liberasurecode\u003c\u003d1.5.0, you may want to continue writing"},{"line_number":333,"context_line":"# legacy CRCs until all nodes are upgraded and capabale of reading fragments"},{"line_number":334,"context_line":"# with zlib CRCs. liberasurecode\u003e\u003d1.6.2 checks for the environment variable"}],"source_content_type":"application/octet-stream","patch_set":8,"id":"a66689a8_04234b69","line":331,"updated":"2023-04-17 15:45:08.000000000","message":"should the default to max_get_time? what are these stale workers doing if they\u0027re not still handling client requests?","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"21c22828a364ca7ceca1e84b584a78fad440e5ab","unresolved":true,"context_lines":[{"line_number":328,"context_line":"# configure the grace period (in seconds), after which the reloaded server"},{"line_number":329,"context_line":"# will issue SIGKILLs to remaining stale workers."},{"line_number":330,"context_line":"# stale_worker_timeout \u003d 7200"},{"line_number":331,"context_line":"#"},{"line_number":332,"context_line":"# When upgrading from liberasurecode\u003c\u003d1.5.0, you may want to continue writing"},{"line_number":333,"context_line":"# legacy CRCs until all nodes are upgraded and capabale of reading fragments"},{"line_number":334,"context_line":"# with zlib CRCs. liberasurecode\u003e\u003d1.6.2 checks for the environment variable"}],"source_content_type":"application/octet-stream","patch_set":8,"id":"ccb65d23_b770f4c6","line":331,"in_reply_to":"a66689a8_04234b69","updated":"2023-04-17 17:10:59.000000000","message":"`max_get_time` is sneaky, though -- it only affects xLOs! If you\u0027ve got a 5GB plain-old swift object and a client slowly but surely pulling down data (100kB/min would probably do it with default configs), that connection could be held for days and days. On a similar note: it only affects `GET`s; any in-progress uploads will also go basically indefinitely.","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"17902fe7ab53867324ee4a1cca354dd61ef0708d","unresolved":true,"context_lines":[{"line_number":328,"context_line":"# configure the grace period (in seconds), after which the reloaded server"},{"line_number":329,"context_line":"# will issue SIGKILLs to remaining stale workers."},{"line_number":330,"context_line":"# stale_worker_timeout \u003d 7200"},{"line_number":331,"context_line":"#"},{"line_number":332,"context_line":"# When upgrading from liberasurecode\u003c\u003d1.5.0, you may want to continue writing"},{"line_number":333,"context_line":"# legacy CRCs until all nodes are upgraded and capabale of reading fragments"},{"line_number":334,"context_line":"# with zlib CRCs. liberasurecode\u003e\u003d1.6.2 checks for the environment variable"}],"source_content_type":"application/octet-stream","patch_set":8,"id":"d055d291_0b7f6b9d","line":331,"in_reply_to":"ccb65d23_b770f4c6","updated":"2023-06-27 00:46:46.000000000","message":"i don\u0027t think this example config matches the default value in code anymore","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"959f6af57336e73a2726753f5937631c036da643","unresolved":false,"context_lines":[{"line_number":328,"context_line":"# configure the grace period (in seconds), after which the reloaded server"},{"line_number":329,"context_line":"# will issue SIGKILLs to remaining stale workers."},{"line_number":330,"context_line":"# stale_worker_timeout \u003d 7200"},{"line_number":331,"context_line":"#"},{"line_number":332,"context_line":"# When upgrading from liberasurecode\u003c\u003d1.5.0, you may want to continue writing"},{"line_number":333,"context_line":"# legacy CRCs until all nodes are upgraded and capabale of reading fragments"},{"line_number":334,"context_line":"# with zlib CRCs. liberasurecode\u003e\u003d1.6.2 checks for the environment variable"}],"source_content_type":"application/octet-stream","patch_set":8,"id":"4c28fda4_e29f3c8f","line":331,"in_reply_to":"d055d291_0b7f6b9d","updated":"2023-08-21 22:26:27.000000000","message":"Done","commit_id":"5d1395585f005c90533fe052f75038846008e16c"}],"swift/common/utils/__init__.py":[{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"5a77fe5bfe160442b5a76f832ceac0c5829ff477","unresolved":true,"context_lines":[{"line_number":7092,"context_line":" try:"},{"line_number":7093,"context_line":" with open(\u0027/proc/%d/stat\u0027 % pid) as fp:"},{"line_number":7094,"context_line":" stats \u003d fp.read().split()"},{"line_number":7095,"context_line":" return int(stats[3])"},{"line_number":7096,"context_line":" except IOError as e:"},{"line_number":7097,"context_line":" if e.errno \u003d\u003d errno.ENOENT:"},{"line_number":7098,"context_line":" raise OSError(errno.ESRCH, \u0027No such process\u0027)"}],"source_content_type":"text/x-python","patch_set":8,"id":"18d7f72e_9e3b8c78","line":7095,"updated":"2023-04-17 15:45:08.000000000","message":"ok, so index 3\n\n pid, comm, state, ppid, ...","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"3f1933f88b26f11c91382bfda04459df83426574","unresolved":false,"context_lines":[{"line_number":7092,"context_line":" try:"},{"line_number":7093,"context_line":" with open(\u0027/proc/%d/stat\u0027 % pid) as fp:"},{"line_number":7094,"context_line":" stats \u003d fp.read().split()"},{"line_number":7095,"context_line":" return int(stats[3])"},{"line_number":7096,"context_line":" except IOError as e:"},{"line_number":7097,"context_line":" if e.errno \u003d\u003d errno.ENOENT:"},{"line_number":7098,"context_line":" raise OSError(errno.ESRCH, \u0027No such process\u0027)"}],"source_content_type":"text/x-python","patch_set":8,"id":"4519bb60_1858561a","line":7095,"in_reply_to":"18d7f72e_9e3b8c78","updated":"2023-11-03 21:32:19.000000000","message":"Ack","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"5a77fe5bfe160442b5a76f832ceac0c5829ff477","unresolved":true,"context_lines":[{"line_number":7096,"context_line":" except IOError as e:"},{"line_number":7097,"context_line":" if e.errno \u003d\u003d errno.ENOENT:"},{"line_number":7098,"context_line":" raise OSError(errno.ESRCH, \u0027No such process\u0027)"},{"line_number":7099,"context_line":" raise"}],"source_content_type":"text/x-python","patch_set":8,"id":"d98f9e22_1639adbd","line":7099,"updated":"2023-04-17 15:45:08.000000000","message":"how is this different than os.getppid()\n\nhttps://docs.python.org/3/library/os.html#os.getppid","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"3f1933f88b26f11c91382bfda04459df83426574","unresolved":false,"context_lines":[{"line_number":7096,"context_line":" except IOError as e:"},{"line_number":7097,"context_line":" if e.errno \u003d\u003d errno.ENOENT:"},{"line_number":7098,"context_line":" raise OSError(errno.ESRCH, \u0027No such process\u0027)"},{"line_number":7099,"context_line":" raise"}],"source_content_type":"text/x-python","patch_set":8,"id":"e36d34a7_ffa97d2a","line":7099,"in_reply_to":"a987cf52_74462164","updated":"2023-11-03 21:32:19.000000000","message":"Ack","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"21c22828a364ca7ceca1e84b584a78fad440e5ab","unresolved":true,"context_lines":[{"line_number":7096,"context_line":" except IOError as e:"},{"line_number":7097,"context_line":" if e.errno \u003d\u003d errno.ENOENT:"},{"line_number":7098,"context_line":" raise OSError(errno.ESRCH, \u0027No such process\u0027)"},{"line_number":7099,"context_line":" raise"}],"source_content_type":"text/x-python","patch_set":8,"id":"a987cf52_74462164","line":7099,"in_reply_to":"d98f9e22_1639adbd","updated":"2023-04-17 17:10:59.000000000","message":"`os.getppid()` takes no args -- it can only give the current process\u0027s parent PID.","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"390551729fb1d65df5c5726b4f80b931729ab7d1","unresolved":true,"context_lines":[{"line_number":6444,"context_line":""},{"line_number":6445,"context_line":"def get_ppid(pid):"},{"line_number":6446,"context_line":" \"\"\""},{"line_number":6447,"context_line":" Get the parent process\u0027s PID given a child pid."},{"line_number":6448,"context_line":""},{"line_number":6449,"context_line":" :raises OSError: if the child pid cannot be found"},{"line_number":6450,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":17,"id":"518dfdd1_19d248c6","line":6447,"updated":"2023-11-10 17:43:19.000000000","message":"reminder to self:\n\nhttps://docs.python.org/3/library/os.html#os.getppid\n\ndespite the similar name, stdlib doesn\u0027t have this function, os.getppid takes no arguments\n\npsutils has this:\n\nhttps://psutil.readthedocs.io/en/latest/#psutil.Process.ppid\n\nand a similar implementation: https://github.com/giampaolo/psutil/blob/master/psutil/_pslinux.py#L2260","commit_id":"c8e775a94eb1350c1650e91f7615ae83b2e5081f"}],"swift/common/wsgi.py":[{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"55c5bb87492d780612441ba191f074c0766e2044","unresolved":true,"context_lines":[{"line_number":766,"context_line":" reexec_signal_fd \u003d os.getenv(NOTIFY_FD_ENV_KEY)"},{"line_number":767,"context_line":" if reexec_signal_fd:"},{"line_number":768,"context_line":" worker_state_fd \u003d None"},{"line_number":769,"context_line":" if \u0027,\u0027 in reexec_signal_fd:"},{"line_number":770,"context_line":" reexec_signal_fd, worker_state_fd \u003d reexec_signal_fd.split(\u0027,\u0027)"},{"line_number":771,"context_line":" reexec_signal_fd \u003d int(reexec_signal_fd)"},{"line_number":772,"context_line":" os.write(reexec_signal_fd, str(os.getpid()).encode(\u0027utf8\u0027))"}],"source_content_type":"text/x-python","patch_set":1,"id":"c2e161f1_10cddc20","line":769,"updated":"2021-04-30 21:33:45.000000000","message":"Smooth upgrade path.","commit_id":"d07a7f8a38176769fc51644df021ac560cf394cc"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"5eb46e85e443875a642c9fa327b0000ce451cfd3","unresolved":false,"context_lines":[{"line_number":766,"context_line":" reexec_signal_fd \u003d os.getenv(NOTIFY_FD_ENV_KEY)"},{"line_number":767,"context_line":" if reexec_signal_fd:"},{"line_number":768,"context_line":" worker_state_fd \u003d None"},{"line_number":769,"context_line":" if \u0027,\u0027 in reexec_signal_fd:"},{"line_number":770,"context_line":" reexec_signal_fd, worker_state_fd \u003d reexec_signal_fd.split(\u0027,\u0027)"},{"line_number":771,"context_line":" reexec_signal_fd \u003d int(reexec_signal_fd)"},{"line_number":772,"context_line":" os.write(reexec_signal_fd, str(os.getpid()).encode(\u0027utf8\u0027))"}],"source_content_type":"text/x-python","patch_set":1,"id":"264b518b_aad01ecc","line":769,"in_reply_to":"bef702e7_a5cb63b9","updated":"2023-11-07 00:02:34.000000000","message":"Sure, fair enough.","commit_id":"d07a7f8a38176769fc51644df021ac560cf394cc"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"3f1933f88b26f11c91382bfda04459df83426574","unresolved":true,"context_lines":[{"line_number":766,"context_line":" reexec_signal_fd \u003d os.getenv(NOTIFY_FD_ENV_KEY)"},{"line_number":767,"context_line":" if reexec_signal_fd:"},{"line_number":768,"context_line":" worker_state_fd \u003d None"},{"line_number":769,"context_line":" if \u0027,\u0027 in reexec_signal_fd:"},{"line_number":770,"context_line":" reexec_signal_fd, worker_state_fd \u003d reexec_signal_fd.split(\u0027,\u0027)"},{"line_number":771,"context_line":" reexec_signal_fd \u003d int(reexec_signal_fd)"},{"line_number":772,"context_line":" os.write(reexec_signal_fd, str(os.getpid()).encode(\u0027utf8\u0027))"}],"source_content_type":"text/x-python","patch_set":1,"id":"bef702e7_a5cb63b9","line":769,"in_reply_to":"c2e161f1_10cddc20","updated":"2023-11-03 21:32:19.000000000","message":"I don\u0027t really understand why we cram both FDs into the same environ key:\n\nhttps://review.opendev.org/c/openstack/swift/+/900088","commit_id":"d07a7f8a38176769fc51644df021ac560cf394cc"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"8d1e65ac80c8d69c453ca867fae34e07cd395534","unresolved":true,"context_lines":[{"line_number":773,"context_line":" os.close(reexec_signal_fd)"},{"line_number":774,"context_line":" if worker_state_fd:"},{"line_number":775,"context_line":" worker_state_fd \u003d int(worker_state_fd)"},{"line_number":776,"context_line":" data_len \u003d os.read(worker_state_fd, 2)"},{"line_number":777,"context_line":" if len(data_len) !\u003d 2:"},{"line_number":778,"context_line":" self.logger.warning("},{"line_number":779,"context_line":" \u0027Invalid worker state received; expected 2 bytes \u0027"}],"source_content_type":"text/x-python","patch_set":1,"id":"2b6cbce3_41440e28","line":776,"range":{"start_line":776,"start_character":52,"end_line":776,"end_character":53},"updated":"2021-05-02 04:07:07.000000000","message":"4, not 2 -- changed my mind about H vs I.","commit_id":"d07a7f8a38176769fc51644df021ac560cf394cc"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"ded8482a9e7d2c4aa8b25435b790466968202871","unresolved":false,"context_lines":[{"line_number":773,"context_line":" os.close(reexec_signal_fd)"},{"line_number":774,"context_line":" if worker_state_fd:"},{"line_number":775,"context_line":" worker_state_fd \u003d int(worker_state_fd)"},{"line_number":776,"context_line":" data_len \u003d os.read(worker_state_fd, 2)"},{"line_number":777,"context_line":" if len(data_len) !\u003d 2:"},{"line_number":778,"context_line":" self.logger.warning("},{"line_number":779,"context_line":" \u0027Invalid worker state received; expected 2 bytes \u0027"}],"source_content_type":"text/x-python","patch_set":1,"id":"a3f7a554_c874850f","line":776,"range":{"start_line":776,"start_character":52,"end_line":776,"end_character":53},"in_reply_to":"2b6cbce3_41440e28","updated":"2022-11-02 19:52:36.000000000","message":"Done","commit_id":"d07a7f8a38176769fc51644df021ac560cf394cc"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"55c5bb87492d780612441ba191f074c0766e2044","unresolved":true,"context_lines":[{"line_number":792,"context_line":" \u0027Incomplete worker state received; expected %d \u0027"},{"line_number":793,"context_line":" \u0027bytes but only received %d\u0027, data_len, len(data))"},{"line_number":794,"context_line":" else:"},{"line_number":795,"context_line":" self.reload_pids.update(json.loads(data))"},{"line_number":796,"context_line":" os.close(worker_state_fd)"},{"line_number":797,"context_line":""},{"line_number":798,"context_line":" # Finally, signal systemd (if appropriate) that process started"}],"source_content_type":"text/x-python","patch_set":1,"id":"de06ea1e_59ec5311","line":795,"range":{"start_line":795,"start_character":48,"end_line":795,"end_character":58},"updated":"2021-04-30 21:33:45.000000000","message":"Some small measure of validation -- though maybe I should have some error handling around it...","commit_id":"d07a7f8a38176769fc51644df021ac560cf394cc"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"ded8482a9e7d2c4aa8b25435b790466968202871","unresolved":false,"context_lines":[{"line_number":792,"context_line":" \u0027Incomplete worker state received; expected %d \u0027"},{"line_number":793,"context_line":" \u0027bytes but only received %d\u0027, data_len, len(data))"},{"line_number":794,"context_line":" else:"},{"line_number":795,"context_line":" self.reload_pids.update(json.loads(data))"},{"line_number":796,"context_line":" os.close(worker_state_fd)"},{"line_number":797,"context_line":""},{"line_number":798,"context_line":" # Finally, signal systemd (if appropriate) that process started"}],"source_content_type":"text/x-python","patch_set":1,"id":"fd281e80_abde1562","line":795,"range":{"start_line":795,"start_character":48,"end_line":795,"end_character":58},"in_reply_to":"de06ea1e_59ec5311","updated":"2022-11-02 19:52:36.000000000","message":"Done","commit_id":"d07a7f8a38176769fc51644df021ac560cf394cc"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"55c5bb87492d780612441ba191f074c0766e2044","unresolved":true,"context_lines":[{"line_number":1310,"context_line":" stale_pids.extend(strategy.get_worker_pids())"},{"line_number":1311,"context_line":" stale_pids.append(os.getpid())"},{"line_number":1312,"context_line":" data \u003d json.dumps(stale_pids).encode(\u0027ascii\u0027)"},{"line_number":1313,"context_line":" os.write(state_wfd, struct.pack(\u0027!I\u0027, len(data)) + data)"},{"line_number":1314,"context_line":" os.close(state_wfd)"},{"line_number":1315,"context_line":" else:"},{"line_number":1316,"context_line":" logger.warning(\u0027Old server temporary child PID\u003d%d *NOT* \u0027"}],"source_content_type":"text/x-python","patch_set":1,"id":"8d218cad_65414b0f","line":1313,"range":{"start_line":1313,"start_character":54,"end_line":1313,"end_character":55},"updated":"2021-04-30 21:33:45.000000000","message":"Figured 16 bits may not quite be enough if we decided we wanted to expand how much state we\u0027re passing along.","commit_id":"d07a7f8a38176769fc51644df021ac560cf394cc"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"5eb46e85e443875a642c9fa327b0000ce451cfd3","unresolved":true,"context_lines":[{"line_number":1310,"context_line":" stale_pids.extend(strategy.get_worker_pids())"},{"line_number":1311,"context_line":" stale_pids.append(os.getpid())"},{"line_number":1312,"context_line":" data \u003d json.dumps(stale_pids).encode(\u0027ascii\u0027)"},{"line_number":1313,"context_line":" os.write(state_wfd, struct.pack(\u0027!I\u0027, len(data)) + data)"},{"line_number":1314,"context_line":" os.close(state_wfd)"},{"line_number":1315,"context_line":" else:"},{"line_number":1316,"context_line":" logger.warning(\u0027Old server temporary child PID\u003d%d *NOT* \u0027"}],"source_content_type":"text/x-python","patch_set":1,"id":"dc112fd8_82aee998","line":1313,"range":{"start_line":1313,"start_character":54,"end_line":1313,"end_character":55},"in_reply_to":"11ba79d5_6f7d0f68","updated":"2023-11-07 00:02:34.000000000","message":"The socket-closer (being the only process with the old worker list in its head) is passing state to the new (re-exec\u0027ed) manager.","commit_id":"d07a7f8a38176769fc51644df021ac560cf394cc"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"1ff9b803cf4dd8cadf536746d2d1652a7e8a18cd","unresolved":true,"context_lines":[{"line_number":1310,"context_line":" stale_pids.extend(strategy.get_worker_pids())"},{"line_number":1311,"context_line":" stale_pids.append(os.getpid())"},{"line_number":1312,"context_line":" data \u003d json.dumps(stale_pids).encode(\u0027ascii\u0027)"},{"line_number":1313,"context_line":" os.write(state_wfd, struct.pack(\u0027!I\u0027, len(data)) + data)"},{"line_number":1314,"context_line":" os.close(state_wfd)"},{"line_number":1315,"context_line":" else:"},{"line_number":1316,"context_line":" logger.warning(\u0027Old server temporary child PID\u003d%d *NOT* \u0027"}],"source_content_type":"text/x-python","patch_set":1,"id":"c49c1543_7455a75e","line":1313,"range":{"start_line":1313,"start_character":54,"end_line":1313,"end_character":55},"in_reply_to":"26451f08_207fc68f","updated":"2023-11-08 17:41:17.000000000","message":"There are two messages being passed:\n\n- new manager tells socket-closer it\u0027s ready, so close the sockets -- this is already happening on master\n- socket-closer tells new manager about the old children, so it can `kill -9` them later -- this is new in this patch\n\nArguably, we could have the new manager look at `/proc/\u003cpid\u003e/task/*/children` to determine old children before the new code calls `fork`... but we\u0027d lose out on the \"time of first reload\" that we\u0027re also passing, which I wanted for consistent behavior in case an old worker survives multiple reloads.","commit_id":"d07a7f8a38176769fc51644df021ac560cf394cc"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"3f1933f88b26f11c91382bfda04459df83426574","unresolved":true,"context_lines":[{"line_number":1310,"context_line":" stale_pids.extend(strategy.get_worker_pids())"},{"line_number":1311,"context_line":" stale_pids.append(os.getpid())"},{"line_number":1312,"context_line":" data \u003d json.dumps(stale_pids).encode(\u0027ascii\u0027)"},{"line_number":1313,"context_line":" os.write(state_wfd, struct.pack(\u0027!I\u0027, len(data)) + data)"},{"line_number":1314,"context_line":" os.close(state_wfd)"},{"line_number":1315,"context_line":" else:"},{"line_number":1316,"context_line":" logger.warning(\u0027Old server temporary child PID\u003d%d *NOT* \u0027"}],"source_content_type":"text/x-python","patch_set":1,"id":"11ba79d5_6f7d0f68","line":1313,"range":{"start_line":1313,"start_character":54,"end_line":1313,"end_character":55},"in_reply_to":"8d218cad_65414b0f","updated":"2023-11-03 21:32:19.000000000","message":"WHO is passing this state to WHO!? I don\u0027t even know how this WORKS!?","commit_id":"d07a7f8a38176769fc51644df021ac560cf394cc"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"789e9dbdd634756149299b5b52fda7fb079416b1","unresolved":true,"context_lines":[{"line_number":1310,"context_line":" stale_pids.extend(strategy.get_worker_pids())"},{"line_number":1311,"context_line":" stale_pids.append(os.getpid())"},{"line_number":1312,"context_line":" data \u003d json.dumps(stale_pids).encode(\u0027ascii\u0027)"},{"line_number":1313,"context_line":" os.write(state_wfd, struct.pack(\u0027!I\u0027, len(data)) + data)"},{"line_number":1314,"context_line":" os.close(state_wfd)"},{"line_number":1315,"context_line":" else:"},{"line_number":1316,"context_line":" logger.warning(\u0027Old server temporary child PID\u003d%d *NOT* \u0027"}],"source_content_type":"text/x-python","patch_set":1,"id":"3f2cb52c_c46484e2","line":1313,"range":{"start_line":1313,"start_character":54,"end_line":1313,"end_character":55},"in_reply_to":"a74d9ce9_c472fb23","updated":"2024-12-12 21:16:17.000000000","message":"Is the real beef the existence of the socket-closer? https://review.opendev.org/c/openstack/swift/+/900957","commit_id":"d07a7f8a38176769fc51644df021ac560cf394cc"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"390551729fb1d65df5c5726b4f80b931729ab7d1","unresolved":true,"context_lines":[{"line_number":1310,"context_line":" stale_pids.extend(strategy.get_worker_pids())"},{"line_number":1311,"context_line":" stale_pids.append(os.getpid())"},{"line_number":1312,"context_line":" data \u003d json.dumps(stale_pids).encode(\u0027ascii\u0027)"},{"line_number":1313,"context_line":" os.write(state_wfd, struct.pack(\u0027!I\u0027, len(data)) + data)"},{"line_number":1314,"context_line":" os.close(state_wfd)"},{"line_number":1315,"context_line":" else:"},{"line_number":1316,"context_line":" logger.warning(\u0027Old server temporary child PID\u003d%d *NOT* \u0027"}],"source_content_type":"text/x-python","patch_set":1,"id":"a74d9ce9_c472fb23","line":1313,"range":{"start_line":1313,"start_character":54,"end_line":1313,"end_character":55},"in_reply_to":"c49c1543_7455a75e","updated":"2023-11-10 17:43:19.000000000","message":"i\u0027m fine with passing state explicitly from the old-manager to the new-manager - we already do that with the notify_fd\n\nwhat I could NOT wrap my head around was why we we\u0027re doing it through another process like a game of telephone:\n\nhttps://review.opendev.org/c/openstack/swift/+/900638","commit_id":"d07a7f8a38176769fc51644df021ac560cf394cc"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"20fcd7921037434fd1541dd4f0388261ff944e26","unresolved":true,"context_lines":[{"line_number":1310,"context_line":" stale_pids.extend(strategy.get_worker_pids())"},{"line_number":1311,"context_line":" stale_pids.append(os.getpid())"},{"line_number":1312,"context_line":" data \u003d json.dumps(stale_pids).encode(\u0027ascii\u0027)"},{"line_number":1313,"context_line":" os.write(state_wfd, struct.pack(\u0027!I\u0027, len(data)) + data)"},{"line_number":1314,"context_line":" os.close(state_wfd)"},{"line_number":1315,"context_line":" else:"},{"line_number":1316,"context_line":" logger.warning(\u0027Old server temporary child PID\u003d%d *NOT* \u0027"}],"source_content_type":"text/x-python","patch_set":1,"id":"26451f08_207fc68f","line":1313,"range":{"start_line":1313,"start_character":54,"end_line":1313,"end_character":55},"in_reply_to":"dc112fd8_82aee998","updated":"2023-11-07 21:57:12.000000000","message":"WHAT? I thought it was the other way around - the new manager signals the socket closer once he has all the new workers up and listening on so that the socker-closre can shut kill the listen sockets on the old workers?","commit_id":"d07a7f8a38176769fc51644df021ac560cf394cc"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"dcd87aa6362b45cc35429ae106f650614c5d03a2","unresolved":true,"context_lines":[{"line_number":771,"context_line":" reexec_signal_fd \u003d int(reexec_signal_fd)"},{"line_number":772,"context_line":" os.write(reexec_signal_fd, str(os.getpid()).encode(\u0027utf8\u0027))"},{"line_number":773,"context_line":" os.close(reexec_signal_fd)"},{"line_number":774,"context_line":" self.read_state_from_old_manager(worker_state_fd)"},{"line_number":775,"context_line":""},{"line_number":776,"context_line":" # Finally, signal systemd (if appropriate) that process started"},{"line_number":777,"context_line":" # properly."}],"source_content_type":"text/x-python","patch_set":3,"id":"5aed8edc_7e0aa791","line":774,"updated":"2022-03-25 19:33:42.000000000","message":"This is all opportunistic -- may want to wrap all this in something like\n\n try:\n self.read_state_from_old_manager(worker_state_fd)\n except Exception as e:\n self.logger.warning(\u0027Failed to read state from the old manager: %r\u0027, e)\n\nso we don\u0027t go killing ourselves if something goes sideways (say, during a rollback from some future code change).","commit_id":"fa6c6b4efaed74543076f7a9124e3e9467fd65c0"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"21c22828a364ca7ceca1e84b584a78fad440e5ab","unresolved":false,"context_lines":[{"line_number":771,"context_line":" reexec_signal_fd \u003d int(reexec_signal_fd)"},{"line_number":772,"context_line":" os.write(reexec_signal_fd, str(os.getpid()).encode(\u0027utf8\u0027))"},{"line_number":773,"context_line":" os.close(reexec_signal_fd)"},{"line_number":774,"context_line":" self.read_state_from_old_manager(worker_state_fd)"},{"line_number":775,"context_line":""},{"line_number":776,"context_line":" # Finally, signal systemd (if appropriate) that process started"},{"line_number":777,"context_line":" # properly."}],"source_content_type":"text/x-python","patch_set":3,"id":"33056d81_a98c091f","line":774,"in_reply_to":"5aed8edc_7e0aa791","updated":"2023-04-17 17:10:59.000000000","message":"Done","commit_id":"fa6c6b4efaed74543076f7a9124e3e9467fd65c0"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"421e054efe8375a6ce08afd168871353d6408734","unresolved":true,"context_lines":[{"line_number":819,"context_line":" return"},{"line_number":820,"context_line":""},{"line_number":821,"context_line":" try:"},{"line_number":822,"context_line":" self.reload_pids.update(old_state[\"old_pids\"])"},{"line_number":823,"context_line":" except (KeyError, TypeError) as err:"},{"line_number":824,"context_line":" self.logger.error("},{"line_number":825,"context_line":" \u0027Invalid worker state received; \u0027"}],"source_content_type":"text/x-python","patch_set":3,"id":"99d5aae0_93a134e2","line":822,"updated":"2022-10-17 04:34:18.000000000","message":"Idea: Also send these off to a greenthread that sleeps for some configurable amount of time (default to, say, an hour?) then starts handing out SIGKILLs.","commit_id":"fa6c6b4efaed74543076f7a9124e3e9467fd65c0"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"21c22828a364ca7ceca1e84b584a78fad440e5ab","unresolved":false,"context_lines":[{"line_number":819,"context_line":" return"},{"line_number":820,"context_line":""},{"line_number":821,"context_line":" try:"},{"line_number":822,"context_line":" self.reload_pids.update(old_state[\"old_pids\"])"},{"line_number":823,"context_line":" except (KeyError, TypeError) as err:"},{"line_number":824,"context_line":" self.logger.error("},{"line_number":825,"context_line":" \u0027Invalid worker state received; \u0027"}],"source_content_type":"text/x-python","patch_set":3,"id":"9c7bb730_7d5c54e0","line":822,"in_reply_to":"99d5aae0_93a134e2","updated":"2023-04-17 17:10:59.000000000","message":"Done","commit_id":"fa6c6b4efaed74543076f7a9124e3e9467fd65c0"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"5a77fe5bfe160442b5a76f832ceac0c5829ff477","unresolved":true,"context_lines":[{"line_number":554,"context_line":" len(data_len))"},{"line_number":555,"context_line":" return"},{"line_number":556,"context_line":""},{"line_number":557,"context_line":" data_len \u003d struct.unpack(\u0027!I\u0027, data_len)[0]"},{"line_number":558,"context_line":" data \u003d b\u0027\u0027"},{"line_number":559,"context_line":" while len(data) \u003c data_len:"},{"line_number":560,"context_line":" chunk \u003d os.read(worker_state_fd, data_len - len(data))"}],"source_content_type":"text/x-python","patch_set":8,"id":"f96ed846_41e221fb","line":557,"updated":"2023-04-17 15:45:08.000000000","message":"oh, com\u0027n a new binary protocol?","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"789e9dbdd634756149299b5b52fda7fb079416b1","unresolved":false,"context_lines":[{"line_number":554,"context_line":" len(data_len))"},{"line_number":555,"context_line":" return"},{"line_number":556,"context_line":""},{"line_number":557,"context_line":" data_len \u003d struct.unpack(\u0027!I\u0027, data_len)[0]"},{"line_number":558,"context_line":" data \u003d b\u0027\u0027"},{"line_number":559,"context_line":" while len(data) \u003c data_len:"},{"line_number":560,"context_line":" chunk \u003d os.read(worker_state_fd, data_len - len(data))"}],"source_content_type":"text/x-python","patch_set":8,"id":"9a255ffa_2fae7350","line":557,"in_reply_to":"f95544c8_6c2d44f0","updated":"2024-12-12 21:16:17.000000000","message":"Acknowledged","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"3f1933f88b26f11c91382bfda04459df83426574","unresolved":true,"context_lines":[{"line_number":554,"context_line":" len(data_len))"},{"line_number":555,"context_line":" return"},{"line_number":556,"context_line":""},{"line_number":557,"context_line":" data_len \u003d struct.unpack(\u0027!I\u0027, data_len)[0]"},{"line_number":558,"context_line":" data \u003d b\u0027\u0027"},{"line_number":559,"context_line":" while len(data) \u003c data_len:"},{"line_number":560,"context_line":" chunk \u003d os.read(worker_state_fd, data_len - len(data))"}],"source_content_type":"text/x-python","patch_set":8,"id":"f95544c8_6c2d44f0","line":557,"in_reply_to":"f96ed846_41e221fb","updated":"2023-11-03 21:32:19.000000000","message":"fwiw, it\u0027s length prefixed json \n\n\t\u003e\u003e\u003e struct.pack(\u0027!I\u0027, len(data)) + data.encode(\u0027ascii\u0027)\n\tb\u0027\\x00\\x00\\x00\\x1d{\"old_pids\": [123, 234, 345]}\u0027","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"5a77fe5bfe160442b5a76f832ceac0c5829ff477","unresolved":true,"context_lines":[{"line_number":602,"context_line":" except OSError as e:"},{"line_number":603,"context_line":" if e.errno !\u003d errno.ESRCH:"},{"line_number":604,"context_line":" self.logger.error("},{"line_number":605,"context_line":" \"Could not kill stale pid %d: %s\", pid, e)"},{"line_number":606,"context_line":" # else, pid got re-used?"},{"line_number":607,"context_line":""},{"line_number":608,"context_line":" eventlet.spawn_n(smother)"}],"source_content_type":"text/x-python","patch_set":8,"id":"1c81aebc_0cba663a","line":605,"updated":"2023-04-17 15:45:08.000000000","message":"i forget how linux pid groups work - I think this is saying we expect all the orphans to know they are owned by the new proxy parent process, but I also thought what we\u0027d seen is that stale workers get owned by init/pid1","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"50326a2eb0d19986323078a783a604f20e3e7b94","unresolved":true,"context_lines":[{"line_number":602,"context_line":" except OSError as e:"},{"line_number":603,"context_line":" if e.errno !\u003d errno.ESRCH:"},{"line_number":604,"context_line":" self.logger.error("},{"line_number":605,"context_line":" \"Could not kill stale pid %d: %s\", pid, e)"},{"line_number":606,"context_line":" # else, pid got re-used?"},{"line_number":607,"context_line":""},{"line_number":608,"context_line":" eventlet.spawn_n(smother)"}],"source_content_type":"text/x-python","patch_set":8,"id":"aedde7d8_9f4d66bb","line":605,"in_reply_to":"1c81aebc_0cba663a","updated":"2023-04-28 22:00:06.000000000","message":"Child processes get -- IDK, fostered? to pid1 if their parent goes away. We\u0027d see that during a restart, but not a reload.","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"3f1933f88b26f11c91382bfda04459df83426574","unresolved":false,"context_lines":[{"line_number":602,"context_line":" except OSError as e:"},{"line_number":603,"context_line":" if e.errno !\u003d errno.ESRCH:"},{"line_number":604,"context_line":" self.logger.error("},{"line_number":605,"context_line":" \"Could not kill stale pid %d: %s\", pid, e)"},{"line_number":606,"context_line":" # else, pid got re-used?"},{"line_number":607,"context_line":""},{"line_number":608,"context_line":" eventlet.spawn_n(smother)"}],"source_content_type":"text/x-python","patch_set":8,"id":"374efc63_6988bf3d","line":605,"in_reply_to":"aedde7d8_9f4d66bb","updated":"2023-11-03 21:32:19.000000000","message":"Ack","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"5a77fe5bfe160442b5a76f832ceac0c5829ff477","unresolved":true,"context_lines":[{"line_number":1096,"context_line":" # parent; set env var for fds and reexec ourselves"},{"line_number":1097,"context_line":" os.close(read_fd)"},{"line_number":1098,"context_line":" os.close(state_wfd)"},{"line_number":1099,"context_line":" os.putenv(NOTIFY_FD_ENV_KEY, \u0027%s,%s\u0027 % (write_fd, state_rfd))"},{"line_number":1100,"context_line":" myself \u003d os.path.realpath(sys.argv[0])"},{"line_number":1101,"context_line":" logger.info(\"Old server PID\u003d%d re\u0027execing as: %r\","},{"line_number":1102,"context_line":" orig_server_pid, [myself] + list(sys.argv))"}],"source_content_type":"text/x-python","patch_set":8,"id":"bbbfd703_d92ed2d8","line":1099,"updated":"2023-04-17 15:45:08.000000000","message":"and we don\u0027t have to worry about old code seeing the new csv and expecting it to be an int?","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"3f1933f88b26f11c91382bfda04459df83426574","unresolved":false,"context_lines":[{"line_number":1096,"context_line":" # parent; set env var for fds and reexec ourselves"},{"line_number":1097,"context_line":" os.close(read_fd)"},{"line_number":1098,"context_line":" os.close(state_wfd)"},{"line_number":1099,"context_line":" os.putenv(NOTIFY_FD_ENV_KEY, \u0027%s,%s\u0027 % (write_fd, state_rfd))"},{"line_number":1100,"context_line":" myself \u003d os.path.realpath(sys.argv[0])"},{"line_number":1101,"context_line":" logger.info(\"Old server PID\u003d%d re\u0027execing as: %r\","},{"line_number":1102,"context_line":" orig_server_pid, [myself] + list(sys.argv))"}],"source_content_type":"text/x-python","patch_set":8,"id":"f6ebf317_b895db9b","line":1099,"in_reply_to":"1b1680ec_8a7bf11c","updated":"2023-11-03 21:32:19.000000000","message":"Ack","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"c15bb5a2c985782d98447ac711254fd27209186a","unresolved":true,"context_lines":[{"line_number":1096,"context_line":" # parent; set env var for fds and reexec ourselves"},{"line_number":1097,"context_line":" os.close(read_fd)"},{"line_number":1098,"context_line":" os.close(state_wfd)"},{"line_number":1099,"context_line":" os.putenv(NOTIFY_FD_ENV_KEY, \u0027%s,%s\u0027 % (write_fd, state_rfd))"},{"line_number":1100,"context_line":" myself \u003d os.path.realpath(sys.argv[0])"},{"line_number":1101,"context_line":" logger.info(\"Old server PID\u003d%d re\u0027execing as: %r\","},{"line_number":1102,"context_line":" orig_server_pid, [myself] + list(sys.argv))"}],"source_content_type":"text/x-python","patch_set":8,"id":"1b1680ec_8a7bf11c","line":1099,"in_reply_to":"61a12947_92b00a74","updated":"2023-04-18 16:03:18.000000000","message":"Bah, that was something different I was thinking of.\n\nBut no, we don\u0027t have to worry about old code here; we\u0027re putting things into our own environment before re-exec\u0027ing -- the only way we could end up throwing off old code is during a rollback (i.e. we have new code already running, go back to old code, then reload). I can add an `UpgradeImpact` about how you may need to do a full restart rather than reload for a rollback if you like, but in general I\u0027m not *that* worried about making rollbacks bullet-proof -- new swift is the best swift we know how to make, and if there are issues, they\u0027ll get fixed in even-newer swift.\n\nCommitting to a rollback is hairy, especially if there\u0027s a chance newer features are already in use. (Imagine going back to pre-null-namespace swift after one of your users enabled s3-style versioning on a container!)","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"21c22828a364ca7ceca1e84b584a78fad440e5ab","unresolved":true,"context_lines":[{"line_number":1096,"context_line":" # parent; set env var for fds and reexec ourselves"},{"line_number":1097,"context_line":" os.close(read_fd)"},{"line_number":1098,"context_line":" os.close(state_wfd)"},{"line_number":1099,"context_line":" os.putenv(NOTIFY_FD_ENV_KEY, \u0027%s,%s\u0027 % (write_fd, state_rfd))"},{"line_number":1100,"context_line":" myself \u003d os.path.realpath(sys.argv[0])"},{"line_number":1101,"context_line":" logger.info(\"Old server PID\u003d%d re\u0027execing as: %r\","},{"line_number":1102,"context_line":" orig_server_pid, [myself] + list(sys.argv))"}],"source_content_type":"text/x-python","patch_set":8,"id":"61a12947_92b00a74","line":1099,"in_reply_to":"bbbfd703_d92ed2d8","updated":"2023-04-17 17:10:59.000000000","message":"Old code only ever looks to see that something was sent: https://github.com/openstack/swift/commit/1107f24179c0c6fdcb58771f3a6e6c025352b5d3#diff-18e64892103b77e3f608c7e1fc47d2acd7349e791253f42c97f5462d4aaa173dR1301","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"17902fe7ab53867324ee4a1cca354dd61ef0708d","unresolved":true,"context_lines":[{"line_number":470,"context_line":" self.reload_pids \u003d dict()"},{"line_number":471,"context_line":" # If they don\u0027t cleanup quickly, we\u0027ll start killing them after this"},{"line_number":472,"context_line":" self.stale_worker_timeout \u003d utils.non_negative_float("},{"line_number":473,"context_line":" conf.get(\u0027stale_worker_timeout\u0027, 86400))"},{"line_number":474,"context_line":""},{"line_number":475,"context_line":" def post_fork_hook(self):"},{"line_number":476,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":13,"id":"cf8e2bb9_d19865e0","line":473,"updated":"2023-06-27 00:46:46.000000000","message":"i guess chris thinks this is a reasonable default","commit_id":"4e38af269558bb7f8c3b38f3f1d448172d392d30"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"3f1933f88b26f11c91382bfda04459df83426574","unresolved":false,"context_lines":[{"line_number":470,"context_line":" self.reload_pids \u003d dict()"},{"line_number":471,"context_line":" # If they don\u0027t cleanup quickly, we\u0027ll start killing them after this"},{"line_number":472,"context_line":" self.stale_worker_timeout \u003d utils.non_negative_float("},{"line_number":473,"context_line":" conf.get(\u0027stale_worker_timeout\u0027, 86400))"},{"line_number":474,"context_line":""},{"line_number":475,"context_line":" def post_fork_hook(self):"},{"line_number":476,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":13,"id":"0955eb13_3ff4f6da","line":473,"in_reply_to":"cf8e2bb9_d19865e0","updated":"2023-11-03 21:32:19.000000000","message":"Ack","commit_id":"4e38af269558bb7f8c3b38f3f1d448172d392d30"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"3f1933f88b26f11c91382bfda04459df83426574","unresolved":true,"context_lines":[{"line_number":544,"context_line":""},{"line_number":545,"context_line":" self.signaled_ready \u003d True"},{"line_number":546,"context_line":""},{"line_number":547,"context_line":" def read_state_from_old_manager(self, worker_state_fd):"},{"line_number":548,"context_line":" if not worker_state_fd:"},{"line_number":549,"context_line":" return"},{"line_number":550,"context_line":" worker_state_fd \u003d int(worker_state_fd)"}],"source_content_type":"text/x-python","patch_set":14,"id":"4f719c2f_de77e20d","line":547,"updated":"2023-11-03 21:32:19.000000000","message":"like... there\u0027s not even a doc string. What does all this code do and how does it do it?","commit_id":"c242f1520bc741a24822a712e4a9758a61832227"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"20fcd7921037434fd1541dd4f0388261ff944e26","unresolved":false,"context_lines":[{"line_number":544,"context_line":""},{"line_number":545,"context_line":" self.signaled_ready \u003d True"},{"line_number":546,"context_line":""},{"line_number":547,"context_line":" def read_state_from_old_manager(self, worker_state_fd):"},{"line_number":548,"context_line":" if not worker_state_fd:"},{"line_number":549,"context_line":" return"},{"line_number":550,"context_line":" worker_state_fd \u003d int(worker_state_fd)"}],"source_content_type":"text/x-python","patch_set":14,"id":"de88ed8b_afb199b2","line":547,"in_reply_to":"4f719c2f_de77e20d","updated":"2023-11-07 21:57:12.000000000","message":"Done","commit_id":"c242f1520bc741a24822a712e4a9758a61832227"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"390551729fb1d65df5c5726b4f80b931729ab7d1","unresolved":true,"context_lines":[{"line_number":540,"context_line":" reexec_signal_fd \u003d os.getenv(NOTIFY_FD_ENV_KEY)"},{"line_number":541,"context_line":" if reexec_signal_fd:"},{"line_number":542,"context_line":" if \u0027,\u0027 in reexec_signal_fd:"},{"line_number":543,"context_line":" reexec_signal_fd, worker_state_fd \u003d reexec_signal_fd.split(\u0027,\u0027)"},{"line_number":544,"context_line":" reexec_signal_fd \u003d int(reexec_signal_fd)"},{"line_number":545,"context_line":" os.write(reexec_signal_fd, str(os.getpid()).encode(\u0027utf8\u0027))"},{"line_number":546,"context_line":" os.close(reexec_signal_fd)"}],"source_content_type":"text/x-python","patch_set":17,"id":"19fc06fb_7e5235ea","line":543,"updated":"2023-11-10 17:43:19.000000000","message":"this is probably useful to carry for a release - otherwise we orphan processes at best and explod-on-reload at worst just after initial upgrade\n\n... but if we end up using the json-binary-msg-over-second-pipe-through-fork-to-reexec implementation we may not have to merge this to master?","commit_id":"c8e775a94eb1350c1650e91f7615ae83b2e5081f"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"789e9dbdd634756149299b5b52fda7fb079416b1","unresolved":true,"context_lines":[{"line_number":540,"context_line":" reexec_signal_fd \u003d os.getenv(NOTIFY_FD_ENV_KEY)"},{"line_number":541,"context_line":" if reexec_signal_fd:"},{"line_number":542,"context_line":" if \u0027,\u0027 in reexec_signal_fd:"},{"line_number":543,"context_line":" reexec_signal_fd, worker_state_fd \u003d reexec_signal_fd.split(\u0027,\u0027)"},{"line_number":544,"context_line":" reexec_signal_fd \u003d int(reexec_signal_fd)"},{"line_number":545,"context_line":" os.write(reexec_signal_fd, str(os.getpid()).encode(\u0027utf8\u0027))"},{"line_number":546,"context_line":" os.close(reexec_signal_fd)"}],"source_content_type":"text/x-python","patch_set":17,"id":"e66d35cb_ba5c7ca6","line":543,"in_reply_to":"19fc06fb_7e5235ea","updated":"2024-12-12 21:16:17.000000000","message":"I\u0027m still not convinced we can use the same pipe for both messages; a pipe is not a socket.","commit_id":"c8e775a94eb1350c1650e91f7615ae83b2e5081f"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"390551729fb1d65df5c5726b4f80b931729ab7d1","unresolved":true,"context_lines":[{"line_number":939,"context_line":" sock.close()"},{"line_number":940,"context_line":" return"},{"line_number":941,"context_line":""},{"line_number":942,"context_line":" self.logger.warning(\u0027Ignoring wait() result from unknown PID %d\u0027, pid)"},{"line_number":943,"context_line":""},{"line_number":944,"context_line":" def iter_sockets(self):"},{"line_number":945,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":17,"id":"d57998aa_a5dae1e8","line":942,"updated":"2023-11-10 17:43:19.000000000","message":"cool, we shouldn\u0027t see too many of these... maybe something when the socker closer exits?","commit_id":"c8e775a94eb1350c1650e91f7615ae83b2e5081f"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"789e9dbdd634756149299b5b52fda7fb079416b1","unresolved":false,"context_lines":[{"line_number":939,"context_line":" sock.close()"},{"line_number":940,"context_line":" return"},{"line_number":941,"context_line":""},{"line_number":942,"context_line":" self.logger.warning(\u0027Ignoring wait() result from unknown PID %d\u0027, pid)"},{"line_number":943,"context_line":""},{"line_number":944,"context_line":" def iter_sockets(self):"},{"line_number":945,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":17,"id":"79932cb9_c196ed1b","line":942,"in_reply_to":"5df602d7_4dd90d6a","updated":"2024-12-12 21:16:17.000000000","message":"Acknowledged","commit_id":"c8e775a94eb1350c1650e91f7615ae83b2e5081f"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"c35c7759f7ea89c9761d8ce53c1d28e8f0e5a91e","unresolved":true,"context_lines":[{"line_number":939,"context_line":" sock.close()"},{"line_number":940,"context_line":" return"},{"line_number":941,"context_line":""},{"line_number":942,"context_line":" self.logger.warning(\u0027Ignoring wait() result from unknown PID %d\u0027, pid)"},{"line_number":943,"context_line":""},{"line_number":944,"context_line":" def iter_sockets(self):"},{"line_number":945,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":17,"id":"5df602d7_4dd90d6a","line":942,"in_reply_to":"d57998aa_a5dae1e8","updated":"2023-11-13 18:20:44.000000000","message":"Nope; the socket-closer adds itself to the pids sent back to the new manager.","commit_id":"c8e775a94eb1350c1650e91f7615ae83b2e5081f"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"390551729fb1d65df5c5726b4f80b931729ab7d1","unresolved":true,"context_lines":[{"line_number":954,"context_line":" return ["},{"line_number":955,"context_line":" pid"},{"line_number":956,"context_line":" for port_data in self.tracking_data.values()"},{"line_number":957,"context_line":" for pid, _sock in port_data]"},{"line_number":958,"context_line":""},{"line_number":959,"context_line":""},{"line_number":960,"context_line":"def check_config(conf_path, app_section, *args, **kwargs):"}],"source_content_type":"text/x-python","patch_set":17,"id":"e8ac0d36_3c7b1066","line":957,"updated":"2023-11-10 17:43:19.000000000","message":"this is a nice interface, I think server-per-port start has a bug in:\n\nhttps://review.opendev.org/c/openstack/swift/+/900638","commit_id":"c8e775a94eb1350c1650e91f7615ae83b2e5081f"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"789e9dbdd634756149299b5b52fda7fb079416b1","unresolved":true,"context_lines":[{"line_number":954,"context_line":" return ["},{"line_number":955,"context_line":" pid"},{"line_number":956,"context_line":" for port_data in self.tracking_data.values()"},{"line_number":957,"context_line":" for pid, _sock in port_data]"},{"line_number":958,"context_line":""},{"line_number":959,"context_line":""},{"line_number":960,"context_line":"def check_config(conf_path, app_section, *args, **kwargs):"}],"source_content_type":"text/x-python","patch_set":17,"id":"6e4407ae_f5b19ddc","line":957,"in_reply_to":"e8ac0d36_3c7b1066","updated":"2024-12-12 21:16:17.000000000","message":"I like https://review.opendev.org/c/openstack/swift/+/900957 even more.","commit_id":"c8e775a94eb1350c1650e91f7615ae83b2e5081f"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"390551729fb1d65df5c5726b4f80b931729ab7d1","unresolved":true,"context_lines":[{"line_number":1153,"context_line":" # then the old server can\u0027t actually ever exit."},{"line_number":1154,"context_line":" strategy.set_close_on_exec_on_listen_sockets()"},{"line_number":1155,"context_line":" read_fd, write_fd \u003d os.pipe()"},{"line_number":1156,"context_line":" state_rfd, state_wfd \u003d os.pipe()"},{"line_number":1157,"context_line":" orig_server_pid \u003d os.getpid()"},{"line_number":1158,"context_line":" child_pid \u003d os.fork()"},{"line_number":1159,"context_line":" if child_pid:"}],"source_content_type":"text/x-python","patch_set":17,"id":"f8ca8c70_08c3f5ed","line":1156,"updated":"2023-11-10 17:43:19.000000000","message":"IIUC one may choose to use a pipe for IPC\n\nthere\u0027s some complexity you can avoid if you only ever send ONE message over the pipe, and in this case we have that:\n\n manager -\u003e socket_closer: new workers are ready shut her down\n socket_closer -\u003e manager: here\u0027s your old pids you forgot about\n\nbut I\u0027ve never heard it suggested that pipes shouldn\u0027t be bi-directional?\n\n\tif child_pid:\n\t # after reexec\n\t old_workers \u003d msg_pipe.read()\n\t pop_new_workers()\n\t msg_pipe.send(\u0027ready!\u0027)\n\telse:\n\t msg_pipe.send(old_workers)\n\t when msg_pipe.read() \u003d\u003d \u0027ready\u0027:\n\t\tkill_em_all()\n\nWhy do we want to use TWO pipes? How many IPC pipes between two processes do you thin is too many?","commit_id":"c8e775a94eb1350c1650e91f7615ae83b2e5081f"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"c35c7759f7ea89c9761d8ce53c1d28e8f0e5a91e","unresolved":true,"context_lines":[{"line_number":1153,"context_line":" # then the old server can\u0027t actually ever exit."},{"line_number":1154,"context_line":" strategy.set_close_on_exec_on_listen_sockets()"},{"line_number":1155,"context_line":" read_fd, write_fd \u003d os.pipe()"},{"line_number":1156,"context_line":" state_rfd, state_wfd \u003d os.pipe()"},{"line_number":1157,"context_line":" orig_server_pid \u003d os.getpid()"},{"line_number":1158,"context_line":" child_pid \u003d os.fork()"},{"line_number":1159,"context_line":" if child_pid:"}],"source_content_type":"text/x-python","patch_set":17,"id":"702ea1f9_f62f8611","line":1156,"in_reply_to":"f8ca8c70_08c3f5ed","updated":"2023-11-13 18:20:44.000000000","message":"A single process can use a pipe to send data to *itself*, too; what\u0027s stopping the `msg_pipe.read() \u003d\u003d \u0027ready\u0027` check from reading the JSON that it just put in the pipe?","commit_id":"c8e775a94eb1350c1650e91f7615ae83b2e5081f"}],"test/unit/common/test_utils.py":[{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"5a77fe5bfe160442b5a76f832ceac0c5829ff477","unresolved":true,"context_lines":[{"line_number":8140,"context_line":"class TestGetPpid(unittest.TestCase):"},{"line_number":8141,"context_line":" def test_happy_path(self, mock_open):"},{"line_number":8142,"context_line":" mock_open.return_value.__enter__().read.return_value \u003d \\"},{"line_number":8143,"context_line":" \u0027pid comm stat 456 see the procfs(5) man page for more info\\n\u0027"},{"line_number":8144,"context_line":" self.assertEqual(utils.get_ppid(123), 456)"},{"line_number":8145,"context_line":" self.assertIn(mock.call(\u0027/proc/123/stat\u0027), mock_open.mock_calls)"},{"line_number":8146,"context_line":""}],"source_content_type":"text/x-python","patch_set":8,"id":"08ef7ed9_524d2265","line":8143,"updated":"2023-04-17 15:45:08.000000000","message":"vagrant@saio:/vagrant/.scratch/nvsts-middleware$ cat /proc/23446/stat\n23446 (swift-proxy-ser) S 23378 23378 23378 0 -1 4194368 2242 0 0 0 4 5 0 0 20 0 1 0 115491 72912896 11430 18446744073709551615 94494784036864 94494786865741 140737313260496 0 0 0 0 16781312 515 1 0 0 17 0 0 0 0 0 0 94494789220464 94494789504216 94494813884416 140737313262842 140737313262936 140737313262936 140737313263574 0\n\n /proc/[pid]/stat\n Status information about the process. This is used by ps(1). It is defined in the kernel source file fs/proc/array.c.\n\n The fields, in order, with their proper scanf(3) format specifiers, are listed below. Whether or not certain of these fields display valid information is governed by a ptrace access\n mode PTRACE_MODE_READ_FSCREDS | PTRACE_MODE_NOAUDIT check (refer to ptrace(2)). If the check denies access, then the field value is displayed as 0. The affected fields are indicated\n with the marking [PT].\n\n (1) pid %d\n The process ID.\n\n (2) comm %s\n The filename of the executable, in parentheses. Strings longer than TASK_COMM_LEN (16) characters (including the terminating null byte) are silently truncated. This is visi‐\n ble whether or not the executable is swapped out.\n\n (3) state %c\n One of the following characters, indicating process state:\n\n R Running\n\n S Sleeping in an interruptible wait\n\n D Waiting in uninterruptible disk sleep\n\n Z Zombie\n\n T Stopped (on a signal) or (before Linux 2.6.33) trace stopped\n\n t Tracing stop (Linux 2.6.33 onward)\n\n W Paging (only before Linux 2.6.0)\n\n X Dead (from Linux 2.6.0 onward)\n\n x Dead (Linux 2.6.33 to 3.13 only)\n\n K Wakekill (Linux 2.6.33 to 3.13 only)\n\n W Waking (Linux 2.6.33 to 3.13 only)\n\n P Parked (Linux 3.9 to 3.13 only)\n\n (4) ppid %d\n The PID of the parent of this process.\n\n (5) pgrp %d\n The process group ID of the process.","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"3f1933f88b26f11c91382bfda04459df83426574","unresolved":false,"context_lines":[{"line_number":8140,"context_line":"class TestGetPpid(unittest.TestCase):"},{"line_number":8141,"context_line":" def test_happy_path(self, mock_open):"},{"line_number":8142,"context_line":" mock_open.return_value.__enter__().read.return_value \u003d \\"},{"line_number":8143,"context_line":" \u0027pid comm stat 456 see the procfs(5) man page for more info\\n\u0027"},{"line_number":8144,"context_line":" self.assertEqual(utils.get_ppid(123), 456)"},{"line_number":8145,"context_line":" self.assertIn(mock.call(\u0027/proc/123/stat\u0027), mock_open.mock_calls)"},{"line_number":8146,"context_line":""}],"source_content_type":"text/x-python","patch_set":8,"id":"3052cfd6_769120c2","line":8143,"in_reply_to":"08ef7ed9_524d2265","updated":"2023-11-03 21:32:19.000000000","message":"Ack","commit_id":"5d1395585f005c90533fe052f75038846008e16c"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"390551729fb1d65df5c5726b4f80b931729ab7d1","unresolved":true,"context_lines":[{"line_number":7278,"context_line":" utils.get_ppid(123)"},{"line_number":7279,"context_line":" self.assertEqual(caught.exception.errno, errno.EPERM)"},{"line_number":7280,"context_line":" self.assertEqual(mock_open.mock_calls[0], mock.call(\u0027/proc/123/stat\u0027))"},{"line_number":7281,"context_line":""},{"line_number":7282,"context_line":""},{"line_number":7283,"context_line":"class TestShardName(unittest.TestCase):"},{"line_number":7284,"context_line":" def test(self):"}],"source_content_type":"text/x-python","patch_set":17,"id":"b1275a46_2300ed62","line":7281,"updated":"2023-11-10 17:43:19.000000000","message":"i was kind of hoping for `assert utils.get_ppid(os.getpid()) \u003d\u003d os.getppid()`\n\nhttps://review.opendev.org/c/openstack/swift/+/900633","commit_id":"c8e775a94eb1350c1650e91f7615ae83b2e5081f"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"c35c7759f7ea89c9761d8ce53c1d28e8f0e5a91e","unresolved":true,"context_lines":[{"line_number":7278,"context_line":" utils.get_ppid(123)"},{"line_number":7279,"context_line":" self.assertEqual(caught.exception.errno, errno.EPERM)"},{"line_number":7280,"context_line":" self.assertEqual(mock_open.mock_calls[0], mock.call(\u0027/proc/123/stat\u0027))"},{"line_number":7281,"context_line":""},{"line_number":7282,"context_line":""},{"line_number":7283,"context_line":"class TestShardName(unittest.TestCase):"},{"line_number":7284,"context_line":" def test(self):"}],"source_content_type":"text/x-python","patch_set":17,"id":"b7d57c88_2c65cb70","line":7281,"in_reply_to":"b1275a46_2300ed62","updated":"2023-11-13 18:20:44.000000000","message":"Sure, I can squash it in.","commit_id":"c8e775a94eb1350c1650e91f7615ae83b2e5081f"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"789e9dbdd634756149299b5b52fda7fb079416b1","unresolved":false,"context_lines":[{"line_number":7278,"context_line":" utils.get_ppid(123)"},{"line_number":7279,"context_line":" self.assertEqual(caught.exception.errno, errno.EPERM)"},{"line_number":7280,"context_line":" self.assertEqual(mock_open.mock_calls[0], mock.call(\u0027/proc/123/stat\u0027))"},{"line_number":7281,"context_line":""},{"line_number":7282,"context_line":""},{"line_number":7283,"context_line":"class TestShardName(unittest.TestCase):"},{"line_number":7284,"context_line":" def test(self):"}],"source_content_type":"text/x-python","patch_set":17,"id":"1a2ea915_8516c065","line":7281,"in_reply_to":"b7d57c88_2c65cb70","updated":"2024-12-12 21:16:17.000000000","message":"Done","commit_id":"c8e775a94eb1350c1650e91f7615ae83b2e5081f"}],"test/unit/common/test_wsgi.py":[{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"390551729fb1d65df5c5726b4f80b931729ab7d1","unresolved":true,"context_lines":[{"line_number":1338,"context_line":" self.assertEqual(mock_kill.mock_calls, ["},{"line_number":1339,"context_line":" mock.call(123, signal.SIGKILL),"},{"line_number":1340,"context_line":" mock.call(90, signal.SIGKILL)])"},{"line_number":1341,"context_line":" self.assertEqual(fake_time.sleeps, [86395, 2])"},{"line_number":1342,"context_line":""},{"line_number":1343,"context_line":""},{"line_number":1344,"context_line":"class TestServersPerPortStrategy(unittest.TestCase, CommonTestMixin):"}],"source_content_type":"text/x-python","patch_set":17,"id":"8573a259_04979fda","line":1341,"updated":"2023-11-10 17:43:19.000000000","message":"intersting....\n\n\tE AssertionError: Lists differ: [86315.0, 2.0] !\u003d [86395, 2]\n\tE \n\tE First differing element 0:\n\tE 86315.0\n\tE 86395\n\tE \n\tE - [86315.0, 2.0]\n\tE ? ^ -- --\n\tE \n\tE + [86395, 2]\n\tE ? ^\n\n\t/home/vagrant/swift/test/unit/common/test_wsgi.py:1341: AssertionError","commit_id":"c8e775a94eb1350c1650e91f7615ae83b2e5081f"}]}