)]}' {"/PATCHSET_LEVEL":[{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"16af0aa15cbd28009d9a4e2585808d32592e82a5","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"47a6cd07_96df6f33","updated":"2025-01-18 01:42:20.000000000","message":"if we proceed with this direction i woudl like to see a release note to notify operators of this change.","commit_id":"5dd3f2442819afd4574446482ccb1b8f0c16d876"}],"nova/compute/manager.py":[{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"4131847631e41a0d17fc5902c16adf756a9b517e","unresolved":true,"context_lines":[{"line_number":1779,"context_line":" mig_id \u003d migration.id"},{"line_number":1780,"context_line":""},{"line_number":1781,"context_line":" # Wait for live migration abort success."},{"line_number":1782,"context_line":" def _wait_for_migration_cleanup(retryctx):"},{"line_number":1783,"context_line":" mig \u003d objects.Migration.get_by_id(context, mig_id)"},{"line_number":1784,"context_line":" retryctx[\u0027retries\u0027] -\u003d 1"},{"line_number":1785,"context_line":" if mig.status !\u003d \u0027running\u0027:"}],"source_content_type":"text/x-python","patch_set":4,"id":"9a88393e_58706aac","line":1782,"updated":"2025-01-21 15:09:25.000000000","message":"Defining an inline function like this inside a wide exception handler is a very bad idea as it could mask all kinds of problems, even for non-deployment testing. Also, this is way way too deeply nested to be readable, especially since this is also defined inside a loop.","commit_id":"5dd3f2442819afd4574446482ccb1b8f0c16d876"},{"author":{"_account_id":24243,"name":"benlei","email":"15201676659@163.com","username":"xubenlei","status":"easystack employee"},"change_message_id":"e42181583d8776d5f5437f5c39deb5f1285f9f4f","unresolved":false,"context_lines":[{"line_number":1779,"context_line":" mig_id \u003d migration.id"},{"line_number":1780,"context_line":""},{"line_number":1781,"context_line":" # Wait for live migration abort success."},{"line_number":1782,"context_line":" def _wait_for_migration_cleanup(retryctx):"},{"line_number":1783,"context_line":" mig \u003d objects.Migration.get_by_id(context, mig_id)"},{"line_number":1784,"context_line":" retryctx[\u0027retries\u0027] -\u003d 1"},{"line_number":1785,"context_line":" if mig.status !\u003d \u0027running\u0027:"}],"source_content_type":"text/x-python","patch_set":4,"id":"20350248_123f6690","line":1782,"in_reply_to":"9a88393e_58706aac","updated":"2025-02-06 11:29:25.000000000","message":"Done","commit_id":"5dd3f2442819afd4574446482ccb1b8f0c16d876"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"16af0aa15cbd28009d9a4e2585808d32592e82a5","unresolved":true,"context_lines":[{"line_number":1790,"context_line":" if retryctx[\u0027retries\u0027] \u003c\u003d 0:"},{"line_number":1791,"context_line":" LOG.warning(\"Wait live migration abort operation \""},{"line_number":1792,"context_line":" \"timeout\", instance\u003dinstance)"},{"line_number":1793,"context_line":" raise loopingcall.LoopingCallDone()"},{"line_number":1794,"context_line":""},{"line_number":1795,"context_line":" timer \u003d loopingcall.FixedIntervalLoopingCall("},{"line_number":1796,"context_line":" _wait_for_migration_cleanup, retryctx)"}],"source_content_type":"text/x-python","patch_set":4,"id":"f604b650_7da7d2b9","line":1793,"updated":"2025-01-18 01:42:20.000000000","message":"i dont think loping is really the right approch\n\nif we were to do this i woudl liekly at most issue one abort to the hypervior for each instnace and exit.\n\nwe shoudl also log a warnign notifing hte admin that restarting the agent while live migration are runnign is not supproted and that e are just tryign to minimies the impact.\n\nin other words tellign the operator they shoudl not do this.","commit_id":"5dd3f2442819afd4574446482ccb1b8f0c16d876"},{"author":{"_account_id":24243,"name":"benlei","email":"15201676659@163.com","username":"xubenlei","status":"easystack employee"},"change_message_id":"e42181583d8776d5f5437f5c39deb5f1285f9f4f","unresolved":false,"context_lines":[{"line_number":1790,"context_line":" if retryctx[\u0027retries\u0027] \u003c\u003d 0:"},{"line_number":1791,"context_line":" LOG.warning(\"Wait live migration abort operation \""},{"line_number":1792,"context_line":" \"timeout\", instance\u003dinstance)"},{"line_number":1793,"context_line":" raise loopingcall.LoopingCallDone()"},{"line_number":1794,"context_line":""},{"line_number":1795,"context_line":" timer \u003d loopingcall.FixedIntervalLoopingCall("},{"line_number":1796,"context_line":" _wait_for_migration_cleanup, retryctx)"}],"source_content_type":"text/x-python","patch_set":4,"id":"6f204dee_05e77b34","line":1793,"in_reply_to":"ee96d532_47d97f5f","updated":"2025-02-06 11:29:25.000000000","message":"Done","commit_id":"5dd3f2442819afd4574446482ccb1b8f0c16d876"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"4131847631e41a0d17fc5902c16adf756a9b517e","unresolved":true,"context_lines":[{"line_number":1790,"context_line":" if retryctx[\u0027retries\u0027] \u003c\u003d 0:"},{"line_number":1791,"context_line":" LOG.warning(\"Wait live migration abort operation \""},{"line_number":1792,"context_line":" \"timeout\", instance\u003dinstance)"},{"line_number":1793,"context_line":" raise loopingcall.LoopingCallDone()"},{"line_number":1794,"context_line":""},{"line_number":1795,"context_line":" timer \u003d loopingcall.FixedIntervalLoopingCall("},{"line_number":1796,"context_line":" _wait_for_migration_cleanup, retryctx)"}],"source_content_type":"text/x-python","patch_set":4,"id":"ee96d532_47d97f5f","line":1793,"in_reply_to":"f604b650_7da7d2b9","updated":"2025-01-21 15:09:25.000000000","message":"I think this _is_ issuing one abort per instance. However, it issues one abort for an instance, then waits for that to complete, then moves on to the next one. What it should do is issue all the aborts immediately, and wait at the end (if we\u0027re going to wait at all).\n\nIf anything, this seems likely to prevent an operator from getting other cleanup tasks done cleanly before exit. The reason they may be doing a shutdown in the middle of a migration is because it\u0027s stuck or something and so this could further delay them from doing a clean shutdown and/or encourage a sigkill.\n\nIt also doesn\u0027t log much about why it\u0027s delaying as far as I can tell, only about when it fails, which means the operator won\u0027t likely know why nova appears stuck.\n\nI think maybe issuing all the aborts and then waiting a short period of time (10s or less) might be okay, but it should be verbose about how many are pending, how long it\u0027s going to wait, etc.","commit_id":"5dd3f2442819afd4574446482ccb1b8f0c16d876"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"4131847631e41a0d17fc5902c16adf756a9b517e","unresolved":true,"context_lines":[{"line_number":1795,"context_line":" timer \u003d loopingcall.FixedIntervalLoopingCall("},{"line_number":1796,"context_line":" _wait_for_migration_cleanup, retryctx)"},{"line_number":1797,"context_line":" timer.start(interval\u003d1).wait()"},{"line_number":1798,"context_line":" except Exception:"},{"line_number":1799,"context_line":" LOG.exception(\u0027Failed to abort live-migration\u0027,"},{"line_number":1800,"context_line":" instance\u003dinstance)"},{"line_number":1801,"context_line":" # Try to check guest is still on host or not."}],"source_content_type":"text/x-python","patch_set":4,"id":"0afebcbc_66e86e4e","line":1798,"updated":"2025-01-21 15:09:25.000000000","message":"You raise specific exceptions inside the loop, so you should catch those here and only reserve the \"except Exception\" case for a last resort.","commit_id":"5dd3f2442819afd4574446482ccb1b8f0c16d876"},{"author":{"_account_id":24243,"name":"benlei","email":"15201676659@163.com","username":"xubenlei","status":"easystack employee"},"change_message_id":"e42181583d8776d5f5437f5c39deb5f1285f9f4f","unresolved":false,"context_lines":[{"line_number":1795,"context_line":" timer \u003d loopingcall.FixedIntervalLoopingCall("},{"line_number":1796,"context_line":" _wait_for_migration_cleanup, retryctx)"},{"line_number":1797,"context_line":" timer.start(interval\u003d1).wait()"},{"line_number":1798,"context_line":" except Exception:"},{"line_number":1799,"context_line":" LOG.exception(\u0027Failed to abort live-migration\u0027,"},{"line_number":1800,"context_line":" instance\u003dinstance)"},{"line_number":1801,"context_line":" # Try to check guest is still on host or not."}],"source_content_type":"text/x-python","patch_set":4,"id":"dd21dcce_182c8a58","line":1798,"in_reply_to":"0afebcbc_66e86e4e","updated":"2025-02-06 11:29:25.000000000","message":"Done","commit_id":"5dd3f2442819afd4574446482ccb1b8f0c16d876"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"3f084d1ac262ee32b55bfa9f41bd8f857279e7b8","unresolved":true,"context_lines":[{"line_number":1791,"context_line":" LOG.warning(\"Live migration abort failed while \""},{"line_number":1792,"context_line":" \"cleanup host\", instance\u003dinstance)"},{"line_number":1793,"context_line":" else:"},{"line_number":1794,"context_line":" # At this point, we think instance process already"},{"line_number":1795,"context_line":" # running on destination node. We just set instance"},{"line_number":1796,"context_line":" # vm state to error."},{"line_number":1797,"context_line":" if migration:"},{"line_number":1798,"context_line":" self._set_migration_status(migration, \u0027error\u0027)"},{"line_number":1799,"context_line":" instance.refresh()"}],"source_content_type":"text/x-python","patch_set":6,"id":"4340430e_515fa43d","line":1796,"range":{"start_line":1794,"start_character":22,"end_line":1796,"end_character":40},"updated":"2025-02-06 15:25:51.000000000","message":"Why? You\u0027re doing this in the not-sure-what-went-wrong exception handler, why would you assume it\u0027s running on the remote node *and* that you should set it to error based on some exception you don\u0027t know the details of? I would expect the diaper case to pretty much just log and move on.","commit_id":"13af296fcf219653db805ed18b9fe011930df8ae"},{"author":{"_account_id":24243,"name":"benlei","email":"15201676659@163.com","username":"xubenlei","status":"easystack employee"},"change_message_id":"a6649024324e71163bad32904dc04c75800832e6","unresolved":true,"context_lines":[{"line_number":1791,"context_line":" LOG.warning(\"Live migration abort failed while \""},{"line_number":1792,"context_line":" \"cleanup host\", instance\u003dinstance)"},{"line_number":1793,"context_line":" else:"},{"line_number":1794,"context_line":" # At this point, we think instance process already"},{"line_number":1795,"context_line":" # running on destination node. We just set instance"},{"line_number":1796,"context_line":" # vm state to error."},{"line_number":1797,"context_line":" if migration:"},{"line_number":1798,"context_line":" self._set_migration_status(migration, \u0027error\u0027)"},{"line_number":1799,"context_line":" instance.refresh()"}],"source_content_type":"text/x-python","patch_set":6,"id":"9764ebdb_997099a4","line":1796,"range":{"start_line":1794,"start_character":22,"end_line":1796,"end_character":40},"in_reply_to":"4340430e_515fa43d","updated":"2025-02-07 07:38:22.000000000","message":"The Exception will capture all abort operation exception and the mostly error is libvirt.libvirtError, so we check instance is running on source host or not firstly.\nMay be there are other reasons to make abort failed and at the same time self.driver.instance_exists return false and the live migration still running, but the nova compute progress is stopping or restarting at this point, so we could set vm state error to remind user or operator that live migration got some exceptions.","commit_id":"13af296fcf219653db805ed18b9fe011930df8ae"},{"author":{"_account_id":24243,"name":"benlei","email":"15201676659@163.com","username":"xubenlei","status":"easystack employee"},"change_message_id":"d46be71f655116b3e299eb35b665eda006a5935f","unresolved":false,"context_lines":[{"line_number":1791,"context_line":" LOG.warning(\"Live migration abort failed while \""},{"line_number":1792,"context_line":" \"cleanup host\", instance\u003dinstance)"},{"line_number":1793,"context_line":" else:"},{"line_number":1794,"context_line":" # At this point, we think instance process already"},{"line_number":1795,"context_line":" # running on destination node. We just set instance"},{"line_number":1796,"context_line":" # vm state to error."},{"line_number":1797,"context_line":" if migration:"},{"line_number":1798,"context_line":" self._set_migration_status(migration, \u0027error\u0027)"},{"line_number":1799,"context_line":" instance.refresh()"}],"source_content_type":"text/x-python","patch_set":6,"id":"653fecc1_3959378b","line":1796,"range":{"start_line":1794,"start_character":22,"end_line":1796,"end_character":40},"in_reply_to":"9764ebdb_997099a4","updated":"2025-02-13 08:23:03.000000000","message":"Done","commit_id":"13af296fcf219653db805ed18b9fe011930df8ae"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"3f084d1ac262ee32b55bfa9f41bd8f857279e7b8","unresolved":true,"context_lines":[{"line_number":1829,"context_line":" LOG.warning(\"The aborted live migration task status not \""},{"line_number":1830,"context_line":" \"changed to expected status after wait %s \""},{"line_number":1831,"context_line":" \"seconds\", retry_count, instance\u003dinst)"},{"line_number":1832,"context_line":" [mig_inst_mapping.pop(mig_id) for mig_id in completed_check]"},{"line_number":1833,"context_line":" retry_count +\u003d 1"},{"line_number":1834,"context_line":" time.sleep(1)"},{"line_number":1835,"context_line":""}],"source_content_type":"text/x-python","patch_set":6,"id":"1c8f5be7_f391bbfb","line":1832,"updated":"2025-02-06 15:25:51.000000000","message":"Please do not define listcomps purely for their side effects. They\u0027re very easy to break, ignore, and misinterpret.","commit_id":"13af296fcf219653db805ed18b9fe011930df8ae"},{"author":{"_account_id":24243,"name":"benlei","email":"15201676659@163.com","username":"xubenlei","status":"easystack employee"},"change_message_id":"a6649024324e71163bad32904dc04c75800832e6","unresolved":false,"context_lines":[{"line_number":1829,"context_line":" LOG.warning(\"The aborted live migration task status not \""},{"line_number":1830,"context_line":" \"changed to expected status after wait %s \""},{"line_number":1831,"context_line":" \"seconds\", retry_count, instance\u003dinst)"},{"line_number":1832,"context_line":" [mig_inst_mapping.pop(mig_id) for mig_id in completed_check]"},{"line_number":1833,"context_line":" retry_count +\u003d 1"},{"line_number":1834,"context_line":" time.sleep(1)"},{"line_number":1835,"context_line":""}],"source_content_type":"text/x-python","patch_set":6,"id":"a5163ba9_0036b69c","line":1832,"in_reply_to":"1c8f5be7_f391bbfb","updated":"2025-02-07 07:38:22.000000000","message":"Done","commit_id":"13af296fcf219653db805ed18b9fe011930df8ae"}],"nova/conf/compute.py":[{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"16af0aa15cbd28009d9a4e2585808d32592e82a5","unresolved":true,"context_lines":[{"line_number":744,"context_line":""},{"line_number":745,"context_line":"* Any positive integer representing greenthreads count."},{"line_number":746,"context_line":"\"\"\"),"},{"line_number":747,"context_line":" cfg.IntOpt(\u0027live_migration_cleanup_retry_count\u0027,"},{"line_number":748,"context_line":" default\u003d120,"},{"line_number":749,"context_line":" min\u003d0,"},{"line_number":750,"context_line":" help\u003d\"\"\""}],"source_content_type":"text/x-python","patch_set":4,"id":"117e4cee_72584201","line":747,"updated":"2025-01-18 01:42:20.000000000","message":"so the main issue with this is that graceful shutdown is still expected to exit in a timely manner.\n\nin genreal if sigterm take mroe hten a handful fo secodn e.g. \u003c 10 seconds\nit will be escalated to sig kill so its not really reasoabel to expect nova shutdown to take 2 mins.\n\nthis default is much longer then systemd or similar should ever wait for a deamon to shutdown.","commit_id":"5dd3f2442819afd4574446482ccb1b8f0c16d876"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"4131847631e41a0d17fc5902c16adf756a9b517e","unresolved":true,"context_lines":[{"line_number":744,"context_line":""},{"line_number":745,"context_line":"* Any positive integer representing greenthreads count."},{"line_number":746,"context_line":"\"\"\"),"},{"line_number":747,"context_line":" cfg.IntOpt(\u0027live_migration_cleanup_retry_count\u0027,"},{"line_number":748,"context_line":" default\u003d120,"},{"line_number":749,"context_line":" min\u003d0,"},{"line_number":750,"context_line":" help\u003d\"\"\""}],"source_content_type":"text/x-python","patch_set":4,"id":"dc5b1e0c_5b329b6f","line":747,"in_reply_to":"117e4cee_72584201","updated":"2025-01-21 15:09:25.000000000","message":"Yep, 120s *per instance* is way way too long to wait, IMHO.","commit_id":"5dd3f2442819afd4574446482ccb1b8f0c16d876"},{"author":{"_account_id":24243,"name":"benlei","email":"15201676659@163.com","username":"xubenlei","status":"easystack employee"},"change_message_id":"e42181583d8776d5f5437f5c39deb5f1285f9f4f","unresolved":false,"context_lines":[{"line_number":744,"context_line":""},{"line_number":745,"context_line":"* Any positive integer representing greenthreads count."},{"line_number":746,"context_line":"\"\"\"),"},{"line_number":747,"context_line":" cfg.IntOpt(\u0027live_migration_cleanup_retry_count\u0027,"},{"line_number":748,"context_line":" default\u003d120,"},{"line_number":749,"context_line":" min\u003d0,"},{"line_number":750,"context_line":" help\u003d\"\"\""}],"source_content_type":"text/x-python","patch_set":4,"id":"ce322955_e62661ae","line":747,"in_reply_to":"dc5b1e0c_5b329b6f","updated":"2025-02-06 11:29:25.000000000","message":"ok,now leave 10 seconds for all instance live migration task check.","commit_id":"5dd3f2442819afd4574446482ccb1b8f0c16d876"}]}