)]}'
{"/COMMIT_MSG":[{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"f229a33521d73bffaf3617a3d33ba54cb3cd2316","unresolved":true,"context_lines":[{"line_number":4,"context_line":"Commit:     Dan Smith \u003cdansmith@redhat.com\u003e"},{"line_number":5,"context_line":"CommitDate: 2022-08-11 10:28:48 -0700"},{"line_number":6,"context_line":""},{"line_number":7,"context_line":"Avoid startup failure for keystone failures"},{"line_number":8,"context_line":""},{"line_number":9,"context_line":"Conductor creates a placement client for the potential case where"},{"line_number":10,"context_line":"it needs to make a call for certain operations. A transient network"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":1,"id":"4838e746_f639deb0","line":7,"range":{"start_line":7,"start_character":14,"end_line":7,"end_character":21},"updated":"2022-08-11 17:51:47.000000000","message":"s/failure/abort/ or something","commit_id":"91c2cf0fe2d13859d02d4313bd1501b547612a3f"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"4e2c41d9ef19e8c01331b3dc1b771a6fe6d2f856","unresolved":true,"context_lines":[{"line_number":16,"context_line":"abort startup on errors that are highly likely to be permanent"},{"line_number":17,"context_line":"configuration errors, and only warn about things like being unable"},{"line_number":18,"context_line":"to contact keystone/placement during initialization. If a non-fatal"},{"line_number":19,"context_line":"error is encountered at startup, later operations needing the"},{"line_number":20,"context_line":"placement client will retry initialization."},{"line_number":21,"context_line":""},{"line_number":22,"context_line":"Closes-Bug: #1846820"},{"line_number":23,"context_line":"Change-Id: Idb7fcbce0c9562e7b9bd3e80f2a6d4b9bc286830"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":3,"id":"a0b2a66b_6284caaf","line":20,"range":{"start_line":19,"start_character":33,"end_line":20,"end_character":43},"updated":"2022-08-16 10:29:23.000000000","message":"could you please provide a test case for this behavior?","commit_id":"2e97c91b4dd64d4295b6facc4f2e99f5d9d1bc98"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"2d276e481711adc27bc083f9033fe82d0f1a4b93","unresolved":false,"context_lines":[{"line_number":16,"context_line":"abort startup on errors that are highly likely to be permanent"},{"line_number":17,"context_line":"configuration errors, and only warn about things like being unable"},{"line_number":18,"context_line":"to contact keystone/placement during initialization. If a non-fatal"},{"line_number":19,"context_line":"error is encountered at startup, later operations needing the"},{"line_number":20,"context_line":"placement client will retry initialization."},{"line_number":21,"context_line":""},{"line_number":22,"context_line":"Closes-Bug: #1846820"},{"line_number":23,"context_line":"Change-Id: Idb7fcbce0c9562e7b9bd3e80f2a6d4b9bc286830"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":3,"id":"ad4c7e73_ad2ebbb4","line":20,"range":{"start_line":19,"start_character":33,"end_line":20,"end_character":43},"in_reply_to":"a0b2a66b_6284caaf","updated":"2022-08-16 18:27:44.000000000","message":"Ack","commit_id":"2e97c91b4dd64d4295b6facc4f2e99f5d9d1bc98"}],"/PATCHSET_LEVEL":[{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"2728973da7915f39e43fc84829bc8b502a6a9425","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"400b55d7_b621def4","updated":"2022-08-11 18:35:58.000000000","message":"This looks reasonable to me. Thoughts on whether to add a couple more non-fatal cases inline.\n\n+1, will want to get gibi\u0027s input when he\u0027s back from PTO.","commit_id":"91c2cf0fe2d13859d02d4313bd1501b547612a3f"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"0581ef753dcd8c566f849977af34544065feb734","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"2585a1e7_bb9286c9","in_reply_to":"400b55d7_b621def4","updated":"2022-08-11 18:43:25.000000000","message":"Ack, will add those and push up another rev and then yep, wait for Gibi of course.","commit_id":"91c2cf0fe2d13859d02d4313bd1501b547612a3f"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"fe91286f64758cb830ac41c393e55ad3e1a738d9","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":3,"id":"99e29ca0_db7fc91e","updated":"2022-08-11 19:08:53.000000000","message":"Looks good, thanks!","commit_id":"2e97c91b4dd64d4295b6facc4f2e99f5d9d1bc98"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"4e2c41d9ef19e8c01331b3dc1b771a6fe6d2f856","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":3,"id":"ed80f15c_1aa7b78a","updated":"2022-08-16 10:29:23.000000000","message":"This feels like a reasonable compromise. Thank you for proposing it.","commit_id":"2e97c91b4dd64d4295b6facc4f2e99f5d9d1bc98"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"ff17de88413091fc5026ca1e7fde8067a03bb5f8","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"72ec0f52_0a02bf00","updated":"2022-08-19 00:47:03.000000000","message":"gibi has reviewed and the test addition looks good","commit_id":"232684b44022f1bc4d72b07045900780de456e63"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"843a12e819d4656bc2d8008e1ce9e6a93dd1c32a","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"a409fe59_539a580e","updated":"2022-08-18 15:18:18.000000000","message":"thanks for the extra test. looks good","commit_id":"232684b44022f1bc4d72b07045900780de456e63"}],"nova/conductor/manager.py":[{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"2728973da7915f39e43fc84829bc8b502a6a9425","unresolved":true,"context_lines":[{"line_number":253,"context_line":"            self.report_client"},{"line_number":254,"context_line":"        except (ks_exc.EndpointNotFound,"},{"line_number":255,"context_line":"                ks_exc.DiscoveryFailure,"},{"line_number":256,"context_line":"                ks_exc.ConnectFailure) as e:"},{"line_number":257,"context_line":"            # Non-fatal, likely transient (although not definitely);"},{"line_number":258,"context_line":"            # continue startup but log the warning so that when things"},{"line_number":259,"context_line":"            # fail later, it will be clear why we can not do certain"}],"source_content_type":"text/x-python","patch_set":1,"id":"99dda5da_af4e0c45","line":256,"updated":"2022-08-11 18:35:58.000000000","message":"I know they\u0027re not in the existing handling but with the linked bug [1] in mind, I\u0027m thinking we may want to include a few more things in here such as:\n\n* ks_exc.RequestTimeout [2] (server 408) Server closes connection on the client bc client took too long to send a complete request. I have seen this happen when haproxy is involved and things in the environment are slow due to various issues, haproxy responds with 408. Can be transient.\n\nWhether we include this exception depends on whether we are OK to let conductor run in a degraded state with the warning log message being enough to tell the operator they need to investigate their environment. The potential downside of doing that being if the service is \"working\" operators may not feel motivated enough to investigate their environment.\n\n* ks_exc.GatewayTimeout [3] (server 504) The specific fail case from [1], request times out trying to communicate with the gateway. Can be and is usually transient.\n\nI\u0027ve seen this one in the gate enough times (example [4]) and I\u0027ve seen customers hit it. One thing I\u0027ll note is that there are likely gaps where other touch points are not tolerating 504 but some patches have landed to enable these cases to retry (example [5]).\n\n[1] https://bugs.launchpad.net/kolla-ansible/+bug/1846820\n[2] https://github.com/openstack/keystoneauth/blob/2445a5df78d808a773ea820505c7af5215c573cf/keystoneauth1/exceptions/http.py#L190\n[3] https://github.com/openstack/keystoneauth/blob/2445a5df78d808a773ea820505c7af5215c573cf/keystoneauth1/exceptions/http.py#L358\n[4] https://bugs.launchpad.net/openstack-gate/+bug/1912845\n[5] https://github.com/openstack/nova/commit/8f4b740ca5292556f8e953a30f2a11ed4fbc2945","commit_id":"91c2cf0fe2d13859d02d4313bd1501b547612a3f"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"0581ef753dcd8c566f849977af34544065feb734","unresolved":true,"context_lines":[{"line_number":253,"context_line":"            self.report_client"},{"line_number":254,"context_line":"        except (ks_exc.EndpointNotFound,"},{"line_number":255,"context_line":"                ks_exc.DiscoveryFailure,"},{"line_number":256,"context_line":"                ks_exc.ConnectFailure) as e:"},{"line_number":257,"context_line":"            # Non-fatal, likely transient (although not definitely);"},{"line_number":258,"context_line":"            # continue startup but log the warning so that when things"},{"line_number":259,"context_line":"            # fail later, it will be clear why we can not do certain"}],"source_content_type":"text/x-python","patch_set":1,"id":"517c8a7a_fa7eb3c4","line":256,"in_reply_to":"99dda5da_af4e0c45","updated":"2022-08-11 18:43:25.000000000","message":"\u003e * ks_exc.RequestTimeout [2] (server 408) Server closes connection on the client bc client took too long to send a complete request. I have seen this happen when haproxy is involved and things in the environment are slow due to various issues, haproxy responds with 408. Can be transient.\n\u003e \n\u003e Whether we include this exception depends on whether we are OK to let conductor run in a degraded state with the warning log message being enough to tell the operator they need to investigate their environment. The potential downside of doing that being if the service is \"working\" operators may not feel motivated enough to investigate their environment.\n\nIf it\u0027s raised separately, I suspect RequestTimeout is what you\u0027d actually get if your network is down and ConnectFailure comes if your network is up but keystone is down. So, agree, that belongs in the same bucket.\n\n\u003e * ks_exc.GatewayTimeout [3] (server 504) The specific fail case from [1], request times out trying to communicate with the gateway. Can be and is usually transient.\n\nAck, that\u0027s the same as timeout, but as detected from a different vantage point, so agree.","commit_id":"91c2cf0fe2d13859d02d4313bd1501b547612a3f"}]}
