)]}'
{"/COMMIT_MSG":[{"author":{"_account_id":34858,"name":"J.P.Klippel","email":"openstack@kl1pp3l.de","username":"jklippel"},"change_message_id":"c54a1876ff03d529ab0db957301d2fb8e408a1ca","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":1,"id":"c8324f29_0f6b5d05","line":19,"updated":"2022-12-15 07:05:00.000000000","message":"You could add\n\nCloses-Bug: #1999607\n\nto your commit-message, making it easier to find your bug report.","commit_id":"3d7c59e15f6189aaec801993ddfe15716f0b1532"},{"author":{"_account_id":35587,"name":"Hiroki Narukawa","email":"hnarukaw@lycorp.co.jp","username":"nhirokinet"},"change_message_id":"658fb7dbcd642ac70602b8bd2dbf974d4215f824","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":1,"id":"effe2201_126c40d2","line":19,"in_reply_to":"c8324f29_0f6b5d05","updated":"2022-12-15 08:12:43.000000000","message":"Added it in patchset 2.","commit_id":"3d7c59e15f6189aaec801993ddfe15716f0b1532"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"45861599bca3ccad49306072079f3dde625aed76","unresolved":true,"context_lines":[{"line_number":6,"context_line":""},{"line_number":7,"context_line":"libvirt: retry libvirt connection on live_migration_monitor"},{"line_number":8,"context_line":""},{"line_number":9,"context_line":"When libvirtd is restarted, libvirtd aborts live migrations and"},{"line_number":10,"context_line":"disconnect client connection. In this case, current nova fails to"},{"line_number":11,"context_line":"continue live_migration_monitor and fails with unclean state."},{"line_number":12,"context_line":""},{"line_number":13,"context_line":"By this commit, at least for the cases that the domain is still on the"},{"line_number":14,"context_line":"source host, nova can know that the live migration is cancelled and thus"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":3,"id":"ca285878_bbd15189","line":11,"range":{"start_line":9,"start_character":0,"end_line":11,"end_character":61},"updated":"2023-01-20 09:34:00.000000000","message":"i suspect that this is infact correct however that is not a edgecase that we intended to support.\n\nwe can attempt to harden nova for in advertent restart due to an error in libvirt and systemd startign it again via a restart poicly or socket activation but operators should be aware that if they intentionlly restart libvirt while vms are migrating the should expect it to break nova.\n\nso this change is protecting against inadvertnet restart where we are still in the window where a revert is valid but does not make restarting libvirt during a live migration supported.","commit_id":"51d888c053aa38d94937d850fb4c8afc3fddbaaf"}],"/PATCHSET_LEVEL":[{"author":{"_account_id":7730,"name":"Sahid Orentino Ferdjaoui","email":"sahid.ferdjaoui@industrialdiscipline.com","username":"sahid"},"change_message_id":"cce005f23bccc6aadaa95d6c1f140ed859656044","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"2b479cb9_3daa1b66","updated":"2022-12-13 10:39:20.000000000","message":"Looks like a valid case, could you open a bug for it and report details, perhaps logs?","commit_id":"3d7c59e15f6189aaec801993ddfe15716f0b1532"},{"author":{"_account_id":35587,"name":"Hiroki Narukawa","email":"hnarukaw@lycorp.co.jp","username":"nhirokinet"},"change_message_id":"d4ab8be234c53fb8c49e77726648a8c26dcbac03","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"304b5de9_ba51b515","updated":"2022-12-12 08:17:07.000000000","message":"recheck","commit_id":"3d7c59e15f6189aaec801993ddfe15716f0b1532"},{"author":{"_account_id":35587,"name":"Hiroki Narukawa","email":"hnarukaw@lycorp.co.jp","username":"nhirokinet"},"change_message_id":"5c3f44b849bdebf9712dd2f20ee6becb441fef70","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"91741444_5d884cce","updated":"2022-12-09 07:50:20.000000000","message":"recheck","commit_id":"3d7c59e15f6189aaec801993ddfe15716f0b1532"},{"author":{"_account_id":35587,"name":"Hiroki Narukawa","email":"hnarukaw@lycorp.co.jp","username":"nhirokinet"},"change_message_id":"81de568bbffa848c3d73decbf8bbef2e36704c3c","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"e3a47a4e_7f96d1b7","in_reply_to":"2b479cb9_3daa1b66","updated":"2022-12-14 07:22:56.000000000","message":"Thank you for your comment, I wrote a bug report for this.\n\nhttps://bugs.launchpad.net/nova/+bug/1999607","commit_id":"3d7c59e15f6189aaec801993ddfe15716f0b1532"},{"author":{"_account_id":19234,"name":"Alexey Stupnikov","email":"aleksey.stupnikov@gmail.com","username":"astupnikov"},"change_message_id":"9bce16796eb55dbe55cb7dbd3de7c02f7de62d97","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":2,"id":"a0905817_58e7a348","updated":"2023-01-19 11:01:20.000000000","message":"I think that it would be nice to introduce some functional test to reproduce the problem and properly capture VM\u0027s state change before this change.","commit_id":"1680b139050aded74b11e7e3719fef87534777dd"},{"author":{"_account_id":35587,"name":"Hiroki Narukawa","email":"hnarukaw@lycorp.co.jp","username":"nhirokinet"},"change_message_id":"13e995c6edd315afb4fab85296f0cee39dbc4fe0","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"29dfdb09_9736bcb1","updated":"2023-01-10 05:59:51.000000000","message":"Sorry maybe I pressed the wrong button, I think I correctly reverted...","commit_id":"1680b139050aded74b11e7e3719fef87534777dd"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"45861599bca3ccad49306072079f3dde625aed76","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":3,"id":"ffba81fc_2a10294c","updated":"2023-01-20 09:34:00.000000000","message":"This need a functional regression test to demonstrate the exsitng live migration bug in a seperate patch and then this patch should be be the second in the seriese which adresses the bug.\n\nhttps://github.com/openstack/nova/blob/master/nova/tests/functional/regressions/test_bug_1983753.py is an example of where to create the test and how to create one.\n\naddtionally this need a release note","commit_id":"51d888c053aa38d94937d850fb4c8afc3fddbaaf"},{"author":{"_account_id":35587,"name":"Hiroki Narukawa","email":"hnarukaw@lycorp.co.jp","username":"nhirokinet"},"change_message_id":"06a43505220d3b7b5bcb224131f35ba30d6f7603","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":3,"id":"1ccd562c_5284db2e","in_reply_to":"ffba81fc_2a10294c","updated":"2023-03-16 06:12:47.000000000","message":"I added regression test in other dependent commit.\nhttps://review.opendev.org/c/openstack/nova/+/877582\n\nI added release not in patchset 6.","commit_id":"51d888c053aa38d94937d850fb4c8afc3fddbaaf"},{"author":{"_account_id":35587,"name":"Hiroki Narukawa","email":"hnarukaw@lycorp.co.jp","username":"nhirokinet"},"change_message_id":"33ac842b7d86f6c683972cac23a76f22e527354d","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":6,"id":"d8f9a57a_b64d29f3","updated":"2023-03-16 06:32:44.000000000","message":"For release note, I could not find the rules for file name but I added random 16 hex letters in the filename. Is it correct?","commit_id":"a5b7a452b23971b1c24e8a3616fc9a7c6d51bb1b"},{"author":{"_account_id":35587,"name":"Hiroki Narukawa","email":"hnarukaw@lycorp.co.jp","username":"nhirokinet"},"change_message_id":"06a43505220d3b7b5bcb224131f35ba30d6f7603","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":6,"id":"978fd35e_debef0cb","updated":"2023-03-16 06:12:47.000000000","message":"Sorry for late response, thank you for your comments.\n\nI added new commit adding a test and then applied the suggestions in comments.\n\nhttps://review.opendev.org/c/openstack/nova/+/877582","commit_id":"a5b7a452b23971b1c24e8a3616fc9a7c6d51bb1b"},{"author":{"_account_id":7730,"name":"Sahid Orentino Ferdjaoui","email":"sahid.ferdjaoui@industrialdiscipline.com","username":"sahid"},"change_message_id":"3c914107a3f736e0ddacb31e195632f1022bf317","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":6,"id":"7bbd5fcf_8499056b","in_reply_to":"d8f9a57a_b64d29f3","updated":"2023-03-17 09:45:33.000000000","message":"You should just need to install \u0027reno\u0027 and use \u0027reno new \"blabla\"\u0027","commit_id":"a5b7a452b23971b1c24e8a3616fc9a7c6d51bb1b"},{"author":{"_account_id":35587,"name":"Hiroki Narukawa","email":"hnarukaw@lycorp.co.jp","username":"nhirokinet"},"change_message_id":"0e0b3cd7d0e6dbc3f71dd9025e27fddae2cb048d","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":8,"id":"cade750a_8459214b","updated":"2023-07-07 04:55:42.000000000","message":"Hi, how is the status of this patch?","commit_id":"e6457fa581142458772bbf1fe0fef9e6240c73d5"},{"author":{"_account_id":7730,"name":"Sahid Orentino Ferdjaoui","email":"sahid.ferdjaoui@industrialdiscipline.com","username":"sahid"},"change_message_id":"3c914107a3f736e0ddacb31e195632f1022bf317","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":8,"id":"988ec748_8f4528f5","updated":"2023-03-17 09:45:33.000000000","message":"Sounds good thank you.","commit_id":"e6457fa581142458772bbf1fe0fef9e6240c73d5"},{"author":{"_account_id":8878,"name":"Masahito Muroi","email":"masahito.muroi@linecorp.com","username":"masa"},"change_message_id":"96ead4b73eb87917a11e19e838a100d80ac69ac0","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":8,"id":"79adf42e_71ae1038","updated":"2024-12-30 16:45:59.000000000","message":"This is weak -1. But if it\u0027s easy to detect the job phase at reconnection, it\u0027s better to check job phase and to handle some extra tasks basing on the phase.","commit_id":"e6457fa581142458772bbf1fe0fef9e6240c73d5"}],"nova/virt/libvirt/driver.py":[{"author":{"_account_id":7730,"name":"Sahid Orentino Ferdjaoui","email":"sahid.ferdjaoui@industrialdiscipline.com","username":"sahid"},"change_message_id":"d54018a2a8c07504012ffd368c29163f7234dd22","unresolved":true,"context_lines":[{"line_number":10362,"context_line":"        monitor_interval_sec \u003d 0.5"},{"line_number":10363,"context_line":"        while True:"},{"line_number":10364,"context_line":"            retries \u003d 120"},{"line_number":10365,"context_line":"            for i in range(retries):"},{"line_number":10366,"context_line":"                try:"},{"line_number":10367,"context_line":"                    if i \u003e 0:"},{"line_number":10368,"context_line":"                        # libvirt connection was lost on previous iteration."}],"source_content_type":"text/x-python","patch_set":2,"id":"240bbc82_06d0e63e","line":10365,"updated":"2023-01-18 12:20:45.000000000","message":"I don\u0027t think that :you need this other for loop. You can reuse the one that is already exist and add a test in it.\n\n  while True:\n    try:\n      info \u003d guest.get_job_info()\n    except libvirtError:\n      LOG....\n      continue\n   \nYou can also add a retry mech so that this is not an infinty loop.","commit_id":"1680b139050aded74b11e7e3719fef87534777dd"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"45861599bca3ccad49306072079f3dde625aed76","unresolved":true,"context_lines":[{"line_number":10397,"context_line":"                    LOG.exception(\"Cannot connect to libvirt, and cannot \""},{"line_number":10398,"context_line":"                                  \"know the result of migration job. \""},{"line_number":10399,"context_line":"                                  \"Giving up.\", instance\u003dinstance)"},{"line_number":10400,"context_line":"                    raise"},{"line_number":10401,"context_line":"                LOG.info(\"Cannot connect to libvirt, but live migration \""},{"line_number":10402,"context_line":"                         \"should be ongoing. Retry %(current)d / %(max)d\","},{"line_number":10403,"context_line":"                         {\u0027current\u0027: get_guest_failure_count, \u0027max\u0027: retries_limit},"}],"source_content_type":"text/x-python","patch_set":3,"id":"a29ceda7_6d4a361b","line":10400,"range":{"start_line":10400,"start_character":20,"end_line":10400,"end_character":25},"updated":"2023-01-20 09:34:00.000000000","message":"there is a specific edgecase that im considerign namely that if we have entered the post copy phase we must not attempt to roolback.\n\nnova only allow live migrtation to rollback while the vm is still running on the source host if there are any failure on the dest host we must not attempt to rollback.\n\nso if libvirt is restarted durign postcopy nova must not attempt to revert.","commit_id":"51d888c053aa38d94937d850fb4c8afc3fddbaaf"},{"author":{"_account_id":35587,"name":"Hiroki Narukawa","email":"hnarukaw@lycorp.co.jp","username":"nhirokinet"},"change_message_id":"59db33ed8b5f4a8a8da9bc3a1b2ea05eb8a4fe20","unresolved":true,"context_lines":[{"line_number":10397,"context_line":"                    LOG.exception(\"Cannot connect to libvirt, and cannot \""},{"line_number":10398,"context_line":"                                  \"know the result of migration job. \""},{"line_number":10399,"context_line":"                                  \"Giving up.\", instance\u003dinstance)"},{"line_number":10400,"context_line":"                    raise"},{"line_number":10401,"context_line":"                LOG.info(\"Cannot connect to libvirt, but live migration \""},{"line_number":10402,"context_line":"                         \"should be ongoing. Retry %(current)d / %(max)d\","},{"line_number":10403,"context_line":"                         {\u0027current\u0027: get_guest_failure_count, \u0027max\u0027: retries_limit},"}],"source_content_type":"text/x-python","patch_set":3,"id":"463bad4a_2b6bbb8f","line":10400,"range":{"start_line":10400,"start_character":20,"end_line":10400,"end_character":25},"in_reply_to":"39dbbb88_4cfa711c","updated":"2025-01-07 09:53:02.000000000","message":"Sorry, I\u0027m not sure what the situation means.\n\nIn my understanding, if step 2 means that live-migration in libvirt (or qemu) perspective is completed, then the situation should be as follows:\n\n- nova-compute must not rollback the live migration anymore,  as VM process is running on the destination. Currently, live_migration_abort emits signal to libvirt but it will result that nothing happens.\n- If step 3 means that libvirt daemon is literally down, then information about migration is lost there, regardless of failed or succeeded. Therefore, even if connection revives later, nova-compute will miss to get the corresponding information.\n- If step 3 includes libvirt daemon is alive but connection has error, then new code will just retry until step 6 happens.\n\nAbove is my understanding but I guess this is different from your expectation. Could you explain a bit more about the situation?","commit_id":"51d888c053aa38d94937d850fb4c8afc3fddbaaf"},{"author":{"_account_id":8878,"name":"Masahito Muroi","email":"masahito.muroi@linecorp.com","username":"masa"},"change_message_id":"87d9bab249a3fb131c18ae7c203027789de32b8d","unresolved":true,"context_lines":[{"line_number":10397,"context_line":"                    LOG.exception(\"Cannot connect to libvirt, and cannot \""},{"line_number":10398,"context_line":"                                  \"know the result of migration job. \""},{"line_number":10399,"context_line":"                                  \"Giving up.\", instance\u003dinstance)"},{"line_number":10400,"context_line":"                    raise"},{"line_number":10401,"context_line":"                LOG.info(\"Cannot connect to libvirt, but live migration \""},{"line_number":10402,"context_line":"                         \"should be ongoing. Retry %(current)d / %(max)d\","},{"line_number":10403,"context_line":"                         {\u0027current\u0027: get_guest_failure_count, \u0027max\u0027: retries_limit},"}],"source_content_type":"text/x-python","patch_set":3,"id":"b340ea8e_25b909e4","line":10400,"range":{"start_line":10400,"start_character":20,"end_line":10400,"end_character":25},"in_reply_to":"463bad4a_2b6bbb8f","updated":"2025-01-09 06:52:00.000000000","message":"According to the following comment, I imagine the calling the libvirt job abort API is required to cancel job or clean up any objects in src or dest host after any libvirt API exception. But this change hides all `libvirt.libvirtError` by the retry logic.\n\nIf this behavior change is not an issue, I have +1 for this change. Please correct me if my comment is not correct.\n\n\n```\n        except Exception as ex:\n            LOG.warning(\"Error monitoring migration: %(ex)s\",\n                        {\"ex\": ex}, instance\u003dinstance, exc_info\u003dTrue)\n            # NOTE(aarents): Ensure job is aborted if still running before\n            # raising the exception so this would avoid the migration to be\n            # done and the libvirt guest to be resumed on the target while\n            # the instance record would still related to the source host.\n            try:\n                # If migration is running in post-copy mode and guest\n                # already running on dest host, libvirt will refuse to\n                # cancel migration job.\n                self.live_migration_abort(instance)\n```","commit_id":"51d888c053aa38d94937d850fb4c8afc3fddbaaf"},{"author":{"_account_id":8878,"name":"Masahito Muroi","email":"masahito.muroi@linecorp.com","username":"masa"},"change_message_id":"96ead4b73eb87917a11e19e838a100d80ac69ac0","unresolved":true,"context_lines":[{"line_number":10397,"context_line":"                    LOG.exception(\"Cannot connect to libvirt, and cannot \""},{"line_number":10398,"context_line":"                                  \"know the result of migration job. \""},{"line_number":10399,"context_line":"                                  \"Giving up.\", instance\u003dinstance)"},{"line_number":10400,"context_line":"                    raise"},{"line_number":10401,"context_line":"                LOG.info(\"Cannot connect to libvirt, but live migration \""},{"line_number":10402,"context_line":"                         \"should be ongoing. Retry %(current)d / %(max)d\","},{"line_number":10403,"context_line":"                         {\u0027current\u0027: get_guest_failure_count, \u0027max\u0027: retries_limit},"}],"source_content_type":"text/x-python","patch_set":3,"id":"39dbbb88_4cfa711c","line":10400,"range":{"start_line":10400,"start_character":20,"end_line":10400,"end_character":25},"in_reply_to":"6e16385c_fb9cc08b","updated":"2024-12-30 16:45:59.000000000","message":"I\u0027d say it could trigger the edge case. Of course, it could happen only in a super short period.\n\n1. N times while-loop works fine with `libvirt.VIR_DOMAIN_JOB_UNBOUNDED`\n2. Move migration phase to post-copy\n3. libvirt down somehow\n4. nova-compute reconnects libvirt again and raises libvirt.libvirtError in N+1 loop.\n5. libvirt is revived.\n6. libvirt connection is also revived at N+2 loop.\n\nThen no chance to call `libvit_migration_abort` method. Original code tries to call the `livbirt_migration_abort` at the first exception, at step4 of the example.\n\nIf step6 or info object can detects the libvirt reconnection or migration phase, the change can handle the timing issue.","commit_id":"51d888c053aa38d94937d850fb4c8afc3fddbaaf"},{"author":{"_account_id":35587,"name":"Hiroki Narukawa","email":"hnarukaw@lycorp.co.jp","username":"nhirokinet"},"change_message_id":"06a43505220d3b7b5bcb224131f35ba30d6f7603","unresolved":true,"context_lines":[{"line_number":10397,"context_line":"                    LOG.exception(\"Cannot connect to libvirt, and cannot \""},{"line_number":10398,"context_line":"                                  \"know the result of migration job. \""},{"line_number":10399,"context_line":"                                  \"Giving up.\", instance\u003dinstance)"},{"line_number":10400,"context_line":"                    raise"},{"line_number":10401,"context_line":"                LOG.info(\"Cannot connect to libvirt, but live migration \""},{"line_number":10402,"context_line":"                         \"should be ongoing. Retry %(current)d / %(max)d\","},{"line_number":10403,"context_line":"                         {\u0027current\u0027: get_guest_failure_count, \u0027max\u0027: retries_limit},"}],"source_content_type":"text/x-python","patch_set":3,"id":"6e16385c_fb9cc08b","line":10400,"range":{"start_line":10400,"start_character":20,"end_line":10400,"end_character":25},"in_reply_to":"a29ceda7_6d4a361b","updated":"2023-03-16 06:12:47.000000000","message":"I think this raise will not change the behavior for that case.","commit_id":"51d888c053aa38d94937d850fb4c8afc3fddbaaf"},{"author":{"_account_id":26286,"name":"huanhongda","email":"hongda.xun@easystack.cn","username":"huanhongda"},"change_message_id":"546ae279d37f14f93a0a99a535527a5e061d7b03","unresolved":true,"context_lines":[{"line_number":10397,"context_line":"        # vpmem does not support post copy"},{"line_number":10398,"context_line":"        is_post_copy_enabled \u0026\u003d not bool(self._get_vpmems(instance))"},{"line_number":10399,"context_line":"        while True:"},{"line_number":10400,"context_line":"            info \u003d guest.get_job_info()"},{"line_number":10401,"context_line":""},{"line_number":10402,"context_line":"            if info.type \u003d\u003d libvirt.VIR_DOMAIN_JOB_NONE:"},{"line_number":10403,"context_line":"                # Either still running, or failed or completed,"}],"source_content_type":"text/x-python","patch_set":8,"id":"1d7ff6e0_1ac876da","side":"PARENT","line":10400,"range":{"start_line":10400,"start_character":12,"end_line":10400,"end_character":39},"updated":"2025-01-11 14:51:28.000000000","message":"If libvirtd restarted, we can allow this method raise exception. And rollback in manager.py. Such as: https://review.opendev.org/c/openstack/nova/+/938154/3/nova/compute/manager.py#9545","commit_id":"c1a7bac6ff06d1407034e2cc9278ce70b7cf6089"},{"author":{"_account_id":35587,"name":"Hiroki Narukawa","email":"hnarukaw@lycorp.co.jp","username":"nhirokinet"},"change_message_id":"bc20b2c2745cc852ecf6c71edef4685a8fe5606e","unresolved":true,"context_lines":[{"line_number":10397,"context_line":"        # vpmem does not support post copy"},{"line_number":10398,"context_line":"        is_post_copy_enabled \u0026\u003d not bool(self._get_vpmems(instance))"},{"line_number":10399,"context_line":"        while True:"},{"line_number":10400,"context_line":"            info \u003d guest.get_job_info()"},{"line_number":10401,"context_line":""},{"line_number":10402,"context_line":"            if info.type \u003d\u003d libvirt.VIR_DOMAIN_JOB_NONE:"},{"line_number":10403,"context_line":"                # Either still running, or failed or completed,"}],"source_content_type":"text/x-python","patch_set":8,"id":"ec487d99_18a91356","side":"PARENT","line":10400,"range":{"start_line":10400,"start_character":12,"end_line":10400,"end_character":39},"in_reply_to":"1d7ff6e0_1ac876da","updated":"2025-01-24 05:01:39.000000000","message":"Thank you for your comment, the issue looks like the same as mine.\nLooks like your patch is better in a point that it supports rolling back for more problems.","commit_id":"c1a7bac6ff06d1407034e2cc9278ce70b7cf6089"}]}
