)]}'
{"/COMMIT_MSG":[{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"457986a903792b0e69912ea1acc032732fc35f6e","unresolved":false,"context_lines":[{"line_number":12,"context_line":"this patch proposes such changes:"},{"line_number":13,"context_line":""},{"line_number":14,"context_line":"1.Do not treat VIR_ERR_OPERATION_INVALID as migration job completed."},{"line_number":15,"context_line":"2.`Pretend` the migration job is still running after hitting"},{"line_number":16,"context_line":"  VIR_ERR_OPERATION_INVALID by returning VIR_DOMAIN_JOB_UNBOUNDED for"},{"line_number":17,"context_line":"  a few times."},{"line_number":18,"context_line":"3.Number of retries can be configured by CONF.job_info_retry_count."}],"source_content_type":"text/x-gerrit-commit-message","patch_set":12,"id":"3fa7e38b_f8cbbd6f","line":15,"updated":"2020-01-06 14:33:59.000000000","message":"can we ever complete the migration successfully at this point. if we are doing a live migration and qemu exits on the source or dest then it is going to fail. we should probably try and revert instead if it not a post copy migration. If it is post then we proably should mark the migration as failed.\n\nin a post copy migration if we have already got to the post copy stage then if either qemu exits we cannot recover as the instnace is executing on the dest node at that point paging ram locally and remotely form the source node.\n\nin the non post copy case or if we have not switch to post copy mode then we should be able to revert and continue executing on the source node provided the source vm is not the one that was killed. \n\nif the source vm is the the one that is killed we can only ever complete sucessfully if we are doing a pre copy migration and we have already swapped to executing on the remote node. in all other cases if the source node is killed the we cant complete succesfully as the contents of ram form the source node will be lost.","commit_id":"bf12c9e065458f6919027539236f0f5ad5e267bc"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"7306be0dd8451ce157167b972261057c8eb6177d","unresolved":false,"context_lines":[{"line_number":12,"context_line":"this patch proposes such changes:"},{"line_number":13,"context_line":""},{"line_number":14,"context_line":"1.Do not treat VIR_ERR_OPERATION_INVALID as migration job completed."},{"line_number":15,"context_line":"2.`Pretend` the migration job is still running after hitting"},{"line_number":16,"context_line":"  VIR_ERR_OPERATION_INVALID by returning VIR_DOMAIN_JOB_UNBOUNDED for"},{"line_number":17,"context_line":"  a few times."},{"line_number":18,"context_line":"3.Number of retries can be configured by CONF.job_info_retry_count."}],"source_content_type":"text/x-gerrit-commit-message","patch_set":12,"id":"1f493fa4_3cd4f6bc","line":15,"in_reply_to":"3fa7e38b_f8cbbd6f","updated":"2020-04-27 21:01:14.000000000","message":"From what Alexandre has commented on Feb 4, it is possible to complete the migration successfully at this point. He said that VIR_ERR_OPERATION_INVALID is briefly a valid state during a successful live migration. The state is seen briefly before the virt domain is undefined and the migration is completed.\n\nIf the domain stays in the VIR_ERR_OPERATION_INVALID state \"too long\" and never gets to undefining the virt domain, the live migration has failed. And this patch is returning VIR_DOMAIN_JOB_FAILED if it receives VIR_ERR_OPERATION_INVALID the configured number of times, and that will trigger the recovery/rollback routine for live migration.\n\nSo it seems like this will do the right thing or at the very least would be a big improvement over what we have today.","commit_id":"bf12c9e065458f6919027539236f0f5ad5e267bc"}],"nova/conf/compute.py":[{"author":{"_account_id":7634,"name":"Takashi Natsume","email":"takanattie@gmail.com","username":"natsumet"},"change_message_id":"7762732cb4f65cab4f62c98309bdec2eada00a3f","unresolved":false,"context_lines":[{"line_number":594,"context_line":""},{"line_number":595,"context_line":"* Any positive integer representing retry count."},{"line_number":596,"context_line":"\"\"\"),"},{"line_number":597,"context_line":"    cfg.IntOpt(\u0027job_info_retry_count\u0027,"},{"line_number":598,"context_line":"        default\u003d3,"},{"line_number":599,"context_line":"        min\u003d0,"},{"line_number":600,"context_line":"        help\u003d\"\"\""}],"source_content_type":"text/x-python","patch_set":7,"id":"9fdfeff1_20d441e9","line":597,"range":{"start_line":597,"start_character":15,"end_line":597,"end_character":37},"updated":"2019-03-04 13:15:09.000000000","message":"Why isn\u0027t this option in nova/conf/libvirt.py instead of this file?","commit_id":"06872acc330b9535a94156c957ebd24eb8b59fb9"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"025d8c354b6ea584320beed3c73bec2f3265262f","unresolved":false,"context_lines":[{"line_number":594,"context_line":""},{"line_number":595,"context_line":"* Any positive integer representing retry count."},{"line_number":596,"context_line":"\"\"\"),"},{"line_number":597,"context_line":"    cfg.IntOpt(\u0027job_info_retry_count\u0027,"},{"line_number":598,"context_line":"        default\u003d3,"},{"line_number":599,"context_line":"        min\u003d0,"},{"line_number":600,"context_line":"        help\u003d\"\"\""}],"source_content_type":"text/x-python","patch_set":7,"id":"9fdfeff1_48fcfab4","line":597,"range":{"start_line":597,"start_character":15,"end_line":597,"end_character":37},"in_reply_to":"9fdfeff1_20d441e9","updated":"2019-03-05 03:25:14.000000000","message":"Done","commit_id":"06872acc330b9535a94156c957ebd24eb8b59fb9"},{"author":{"_account_id":7634,"name":"Takashi Natsume","email":"takanattie@gmail.com","username":"natsumet"},"change_message_id":"7762732cb4f65cab4f62c98309bdec2eada00a3f","unresolved":false,"context_lines":[{"line_number":600,"context_line":"        help\u003d\"\"\""},{"line_number":601,"context_line":"Maximum number of retries in guest.get_job_info(). It specifies number"},{"line_number":602,"context_line":"of retries to get domain job stats while hitting libvirt error code"},{"line_number":603,"context_line":"VIR_ERR_OPERATION_INVALID."},{"line_number":604,"context_line":""},{"line_number":605,"context_line":"Possible values:"},{"line_number":606,"context_line":""}],"source_content_type":"text/x-python","patch_set":7,"id":"9fdfeff1_c027751e","line":603,"updated":"2019-03-04 13:15:09.000000000","message":"The description for 0 case should be added. (Retry is disabled?)","commit_id":"06872acc330b9535a94156c957ebd24eb8b59fb9"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"025d8c354b6ea584320beed3c73bec2f3265262f","unresolved":false,"context_lines":[{"line_number":600,"context_line":"        help\u003d\"\"\""},{"line_number":601,"context_line":"Maximum number of retries in guest.get_job_info(). It specifies number"},{"line_number":602,"context_line":"of retries to get domain job stats while hitting libvirt error code"},{"line_number":603,"context_line":"VIR_ERR_OPERATION_INVALID."},{"line_number":604,"context_line":""},{"line_number":605,"context_line":"Possible values:"},{"line_number":606,"context_line":""}],"source_content_type":"text/x-python","patch_set":7,"id":"9fdfeff1_e82a8e2d","line":603,"in_reply_to":"9fdfeff1_c027751e","updated":"2019-03-05 03:25:14.000000000","message":"Done","commit_id":"06872acc330b9535a94156c957ebd24eb8b59fb9"},{"author":{"_account_id":7634,"name":"Takashi Natsume","email":"takanattie@gmail.com","username":"natsumet"},"change_message_id":"7762732cb4f65cab4f62c98309bdec2eada00a3f","unresolved":false,"context_lines":[{"line_number":604,"context_line":""},{"line_number":605,"context_line":"Possible values:"},{"line_number":606,"context_line":""},{"line_number":607,"context_line":"* Any positive integer representing retry count."},{"line_number":608,"context_line":"\"\"\"),"},{"line_number":609,"context_line":"    cfg.BoolOpt(\u0027resume_guests_state_on_host_boot\u0027,"},{"line_number":610,"context_line":"        default\u003dFalse,"}],"source_content_type":"text/x-python","patch_set":7,"id":"9fdfeff1_40f62593","line":607,"range":{"start_line":607,"start_character":0,"end_line":607,"end_character":48},"updated":"2019-03-04 13:15:09.000000000","message":"Zero or any positive integer","commit_id":"06872acc330b9535a94156c957ebd24eb8b59fb9"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"025d8c354b6ea584320beed3c73bec2f3265262f","unresolved":false,"context_lines":[{"line_number":604,"context_line":""},{"line_number":605,"context_line":"Possible values:"},{"line_number":606,"context_line":""},{"line_number":607,"context_line":"* Any positive integer representing retry count."},{"line_number":608,"context_line":"\"\"\"),"},{"line_number":609,"context_line":"    cfg.BoolOpt(\u0027resume_guests_state_on_host_boot\u0027,"},{"line_number":610,"context_line":"        default\u003dFalse,"}],"source_content_type":"text/x-python","patch_set":7,"id":"9fdfeff1_0830329e","line":607,"range":{"start_line":607,"start_character":0,"end_line":607,"end_character":48},"in_reply_to":"9fdfeff1_40f62593","updated":"2019-03-05 03:25:14.000000000","message":"Done","commit_id":"06872acc330b9535a94156c957ebd24eb8b59fb9"}],"nova/conf/libvirt.py":[{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"055804aecceec5c194c6b298d08f9bdc51aea140","unresolved":false,"context_lines":[{"line_number":40,"context_line":"\"\"\")"},{"line_number":41,"context_line":""},{"line_number":42,"context_line":"libvirt_general_opts \u003d ["},{"line_number":43,"context_line":"    cfg.IntOpt(\u0027job_info_retry_count\u0027,"},{"line_number":44,"context_line":"        default\u003d3,"},{"line_number":45,"context_line":"        min\u003d0,"},{"line_number":46,"context_line":"        help\u003d\"\"\""}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_fe3e5289","line":43,"range":{"start_line":43,"start_character":16,"end_line":43,"end_character":36},"updated":"2019-03-13 12:13:15.000000000","message":"So this is going to be used for *all* jobs? Including things like blockRebase jobs? Or is this going to be restricted to just live migration jobs?\n\n(later)\n\nIt looks like this is only being used for live migration, so either we should say something in here about how this is only used during live migration, or you should make this a live migration specific option name, e.g. \"live_migration_job_info_retry_count\".","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"12a3f6c8bb02e7b00f7303d1c954f69b9074cf4f","unresolved":false,"context_lines":[{"line_number":40,"context_line":"\"\"\")"},{"line_number":41,"context_line":""},{"line_number":42,"context_line":"libvirt_general_opts \u003d ["},{"line_number":43,"context_line":"    cfg.IntOpt(\u0027job_info_retry_count\u0027,"},{"line_number":44,"context_line":"        default\u003d3,"},{"line_number":45,"context_line":"        min\u003d0,"},{"line_number":46,"context_line":"        help\u003d\"\"\""}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_fe4cfbc8","line":43,"range":{"start_line":43,"start_character":16,"end_line":43,"end_character":36},"in_reply_to":"5fc1f717_fe3e5289","updated":"2019-03-14 11:38:21.000000000","message":"Yes, it\u0027s only used for live migration. Fixed.","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"055804aecceec5c194c6b298d08f9bdc51aea140","unresolved":false,"context_lines":[{"line_number":46,"context_line":"        help\u003d\"\"\""},{"line_number":47,"context_line":"Maximum number of retries in guest.get_job_info(). It specifies number"},{"line_number":48,"context_line":"of retries to get domain job stats while hitting libvirt error code"},{"line_number":49,"context_line":"VIR_ERR_OPERATION_INVALID. Specially, zero means to disable retry."},{"line_number":50,"context_line":""},{"line_number":51,"context_line":"Possible values:"},{"line_number":52,"context_line":""}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_be4c4a42","line":49,"range":{"start_line":49,"start_character":27,"end_line":49,"end_character":36},"updated":"2019-03-13 12:13:15.000000000","message":"I\u0027m not sure if this should be \"Specifically\" but it doesn\u0027t make much of a difference - I would just remove this so you have \"Zero means retries are disabled.\"","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"12a3f6c8bb02e7b00f7303d1c954f69b9074cf4f","unresolved":false,"context_lines":[{"line_number":46,"context_line":"        help\u003d\"\"\""},{"line_number":47,"context_line":"Maximum number of retries in guest.get_job_info(). It specifies number"},{"line_number":48,"context_line":"of retries to get domain job stats while hitting libvirt error code"},{"line_number":49,"context_line":"VIR_ERR_OPERATION_INVALID. Specially, zero means to disable retry."},{"line_number":50,"context_line":""},{"line_number":51,"context_line":"Possible values:"},{"line_number":52,"context_line":""}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_1e4a27ab","line":49,"range":{"start_line":49,"start_character":27,"end_line":49,"end_character":36},"in_reply_to":"5fc1f717_be4c4a42","updated":"2019-03-14 11:38:21.000000000","message":"Done","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"7306be0dd8451ce157167b972261057c8eb6177d","unresolved":false,"context_lines":[{"line_number":46,"context_line":"        help\u003d\"\"\""},{"line_number":47,"context_line":"Maximum number of retries in guest.get_job_info(). It specifies number"},{"line_number":48,"context_line":"of retries to get domain job stats while hitting libvirt error code"},{"line_number":49,"context_line":"VIR_ERR_OPERATION_INVALID. Zero means to disable retry."},{"line_number":50,"context_line":""},{"line_number":51,"context_line":"Possible values:"},{"line_number":52,"context_line":""}],"source_content_type":"text/x-python","patch_set":12,"id":"1f493fa4_9ccd4a68","line":49,"updated":"2020-04-27 21:01:14.000000000","message":"I think this should contain a bit more info explaining why you would want to retry on VIR_ERR_OPERATION_INVALID. Something to do with how VIR_ERR_OPERATION_INVALID is returned (briefly) during a normal successful live migration. But if it is returned repeatedly and the virt domain does not become undefined, it represents a failure in live migration and thus server cleanup routines should *not* be done, else they result in data loss.","commit_id":"bf12c9e065458f6919027539236f0f5ad5e267bc"}],"nova/virt/libvirt/driver.py":[{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"055804aecceec5c194c6b298d08f9bdc51aea140","unresolved":false,"context_lines":[{"line_number":7356,"context_line":"        n \u003d 0"},{"line_number":7357,"context_line":"        start \u003d time.time()"},{"line_number":7358,"context_line":"        is_post_copy_enabled \u003d self._is_post_copy_enabled(migration_flags)"},{"line_number":7359,"context_line":"        retry \u003d {\"num_attempts\": 0}"},{"line_number":7360,"context_line":"        while True:"},{"line_number":7361,"context_line":"            info \u003d guest.get_job_info(retry\u003dretry)"},{"line_number":7362,"context_line":""}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_befa0aba","line":7359,"range":{"start_line":7359,"start_character":16,"end_line":7359,"end_character":35},"updated":"2019-03-13 12:13:15.000000000","message":"This is kind of clunky, do we need a dict? Why not just pass in a num_attempts parameter? Does it need to be a dict so you can change the value by reference?","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"12a3f6c8bb02e7b00f7303d1c954f69b9074cf4f","unresolved":false,"context_lines":[{"line_number":7356,"context_line":"        n \u003d 0"},{"line_number":7357,"context_line":"        start \u003d time.time()"},{"line_number":7358,"context_line":"        is_post_copy_enabled \u003d self._is_post_copy_enabled(migration_flags)"},{"line_number":7359,"context_line":"        retry \u003d {\"num_attempts\": 0}"},{"line_number":7360,"context_line":"        while True:"},{"line_number":7361,"context_line":"            info \u003d guest.get_job_info(retry\u003dretry)"},{"line_number":7362,"context_line":""}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_fe2b9b91","line":7359,"range":{"start_line":7359,"start_character":16,"end_line":7359,"end_character":35},"in_reply_to":"5fc1f717_befa0aba","updated":"2019-03-14 11:38:21.000000000","message":"Using a dict as key word argument can help reduce unit test failures and yes, I use a dict so I can change the value by reference.","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"}],"nova/virt/libvirt/guest.py":[{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"23d3ddcdb9fac10facc753eea95143df2c8ef862","unresolved":false,"context_lines":[{"line_number":723,"context_line":"                                      num_attempts + 1)"},{"line_number":724,"context_line":"                            retry[\u0027num_attempts\u0027] +\u003d 1"},{"line_number":725,"context_line":"                            return JobInfo("},{"line_number":726,"context_line":"                                type\u003dlibvirt.VIR_DOMAIN_JOB_UNBOUNDED)"},{"line_number":727,"context_line":"                    LOG.error(\"Domain has shutdown accidentally: %s\", ex)"},{"line_number":728,"context_line":"                    return JobInfo(type\u003dlibvirt.VIR_DOMAIN_JOB_FAILED)"},{"line_number":729,"context_line":"                else:"}],"source_content_type":"text/x-python","patch_set":3,"id":"3f79a3b5_f1d7664f","line":726,"updated":"2018-10-24 20:20:34.000000000","message":"I\u0027m not sure we want to add this retry logic, it\u0027s not directly related to the bug (wrong assumption of migration complete). And I would think, if you get INVALID once, there\u0027s a fair chance you will get it again during your retries.","commit_id":"d6af44fec3e65b13e02ae301a38adcd5f3a1ebc7"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"23d3ddcdb9fac10facc753eea95143df2c8ef862","unresolved":false,"context_lines":[{"line_number":724,"context_line":"                            retry[\u0027num_attempts\u0027] +\u003d 1"},{"line_number":725,"context_line":"                            return JobInfo("},{"line_number":726,"context_line":"                                type\u003dlibvirt.VIR_DOMAIN_JOB_UNBOUNDED)"},{"line_number":727,"context_line":"                    LOG.error(\"Domain has shutdown accidentally: %s\", ex)"},{"line_number":728,"context_line":"                    return JobInfo(type\u003dlibvirt.VIR_DOMAIN_JOB_FAILED)"},{"line_number":729,"context_line":"                else:"},{"line_number":730,"context_line":"                    LOG.debug(\"Failed to get job stats: %s\", ex)"},{"line_number":731,"context_line":"                    raise"}],"source_content_type":"text/x-python","patch_set":3,"id":"3f79a3b5_9184f25e","line":728,"range":{"start_line":727,"start_character":20,"end_line":728,"end_character":70},"updated":"2018-10-24 20:20:34.000000000","message":"Hm, based on the previous code, I\u0027m not sure we can assume that VIR_ERR_OPERATION_INVALID means that the live migration is not complete. That is, we may not be able to distinguish between migration complete (domain disappeared) or migration failed (qemu process died).\n\nI think we need some investigation into VIR_ERR_OPERATION_INVALID and if we might need to examine the error message to be able to differentiate between fail and complete.","commit_id":"d6af44fec3e65b13e02ae301a38adcd5f3a1ebc7"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"9dda27c10d3831e260a9c890f051480950701234","unresolved":false,"context_lines":[{"line_number":955,"context_line":"            # When migration of a transient guest completes, the guest"},{"line_number":956,"context_line":"            # goes away so we\u0027ll see NO_DOMAIN error code"},{"line_number":957,"context_line":"            #"},{"line_number":958,"context_line":"            # When migration of a persistent guest completes, the guest"},{"line_number":959,"context_line":"            # merely shuts off, but libvirt unhelpfully raises an"},{"line_number":960,"context_line":"            # OPERATION_INVALID error code"},{"line_number":961,"context_line":"            #"},{"line_number":962,"context_line":"            # Lets pretend both of these mean success"},{"line_number":963,"context_line":"            if ex.get_error_code() \u003d\u003d libvirt.VIR_ERR_NO_DOMAIN:"}],"source_content_type":"text/x-python","patch_set":3,"id":"3f79a3b5_b1a32e9c","line":960,"range":{"start_line":958,"start_character":12,"end_line":960,"end_character":42},"updated":"2018-10-24 21:52:33.000000000","message":"This is where the original code came from -- libvirt raises INVALID when migration of a persistent guest completes. So I think we need to dig a little more to hopefully find a difference in the error message that we can use to tell shutoff vs process inaccessible.","commit_id":"d6af44fec3e65b13e02ae301a38adcd5f3a1ebc7"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"9dda27c10d3831e260a9c890f051480950701234","unresolved":false,"context_lines":[{"line_number":963,"context_line":"            if ex.get_error_code() \u003d\u003d libvirt.VIR_ERR_NO_DOMAIN:"},{"line_number":964,"context_line":"                LOG.debug(\"Domain has shutdown/gone away: %s\", ex)"},{"line_number":965,"context_line":"                return cls(type\u003dlibvirt.VIR_DOMAIN_JOB_COMPLETED)"},{"line_number":966,"context_line":"            elif ex.get_error_code() \u003d\u003d libvirt.VIR_ERR_OPERATION_INVALID:"},{"line_number":967,"context_line":"                if retry:"},{"line_number":968,"context_line":"                    num_attempts \u003d retry[\u0027num_attempts\u0027]"},{"line_number":969,"context_line":"                    if num_attempts \u003c CONF.job_info_retry_count:"}],"source_content_type":"text/x-python","patch_set":3,"id":"3f79a3b5_dcb36134","line":966,"updated":"2018-10-24 21:52:33.000000000","message":"OK, I found the code where this is returned [1][2] (I\u0027m looking at v1.3.1 because that\u0027s our MIN_LIBVIRT_VERSION in nova/virt/libvirt/driver.py\n\nSo, when libvirt returns this error, the error message will be:\n\n virReportError(VIR_ERR_OPERATION_INVALID, \"%s\",\n _(\"domain is not running\"));\n\nI\u0027m thinking we could check if ex.get_error_message() contains \u0027domain is not running\u0027, we use the old behavior and treat the migration as complete. Else, we treat the migration as failed and return JOB_FAILED. According to the FAQ [3], stopping the daemon process should not shutdown VMs (please test this to be sure, if you can).\n\nWhat do you think?\n\n[1] https://github.com/libvirt/libvirt/blob/v1.3.1/src/qemu/qemu_driver.c#L13058\n[2] https://github.com/libvirt/libvirt/blob/v1.3.1/src/qemu/qemu_driver.c#L12999-L13023\n[3] https://wiki.libvirt.org/page/FAQ#Will_restarting_the_libvirt_daemon_stop_my_virtual_machines.3F","commit_id":"d6af44fec3e65b13e02ae301a38adcd5f3a1ebc7"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"9deb80d20edec924da56de9166785561c88c9307","unresolved":false,"context_lines":[{"line_number":963,"context_line":"            if ex.get_error_code() \u003d\u003d libvirt.VIR_ERR_NO_DOMAIN:"},{"line_number":964,"context_line":"                LOG.debug(\"Domain has shutdown/gone away: %s\", ex)"},{"line_number":965,"context_line":"                return cls(type\u003dlibvirt.VIR_DOMAIN_JOB_COMPLETED)"},{"line_number":966,"context_line":"            elif ex.get_error_code() \u003d\u003d libvirt.VIR_ERR_OPERATION_INVALID:"},{"line_number":967,"context_line":"                if retry:"},{"line_number":968,"context_line":"                    num_attempts \u003d retry[\u0027num_attempts\u0027]"},{"line_number":969,"context_line":"                    if num_attempts \u003c CONF.job_info_retry_count:"}],"source_content_type":"text/x-python","patch_set":3,"id":"3f79a3b5_17df7238","line":966,"in_reply_to":"3f79a3b5_dcb36134","updated":"2018-10-24 22:29:55.000000000","message":"Sigh, upon re-reading the bug description, apparently libvirt will say \u0027domain is not running\u0027 even in the case where the qemu process has been killed via OOM killer. So, what I suggested isn\u0027t going to work.\n\nI was concerned that if we change VIR_ERR_OPERATION_INVALID from complete to failed, we\u0027ll report FAILED for migrations that actually completed.\n\nI think that for live migration, it might be OK because IIRC, live migration uses only a transient domain. And looking at the libvirt driver code, it doesn\u0027t look like we use this job info functionality for cold migration at all.","commit_id":"d6af44fec3e65b13e02ae301a38adcd5f3a1ebc7"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"5be4ae8be810e9b3d2b5ac23bbc8957e187bee60","unresolved":false,"context_lines":[{"line_number":723,"context_line":"                                      num_attempts + 1)"},{"line_number":724,"context_line":"                            retry[\u0027num_attempts\u0027] +\u003d 1"},{"line_number":725,"context_line":"                            return JobInfo("},{"line_number":726,"context_line":"                                type\u003dlibvirt.VIR_DOMAIN_JOB_UNBOUNDED)"},{"line_number":727,"context_line":"                    LOG.error(\"Domain has shutdown accidentally: %s\", ex)"},{"line_number":728,"context_line":"                    return JobInfo(type\u003dlibvirt.VIR_DOMAIN_JOB_FAILED)"},{"line_number":729,"context_line":"                else:"}],"source_content_type":"text/x-python","patch_set":5,"id":"3f79a3b5_b329b63a","line":726,"updated":"2018-11-09 18:03:12.000000000","message":"Can you tell me a bit more about how the retry helps in your bug scenario? If the qemu-kvm process has been killed, are you finding that it is automatically restarted but not killed again? How is the retry helping you?","commit_id":"41011cd233a35bff47e3ca628779a5a3e3f528dc"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"5020f79751ef5e74b95110990f4f124c069c9adb","unresolved":false,"context_lines":[{"line_number":723,"context_line":"                                      num_attempts + 1)"},{"line_number":724,"context_line":"                            retry[\u0027num_attempts\u0027] +\u003d 1"},{"line_number":725,"context_line":"                            return JobInfo("},{"line_number":726,"context_line":"                                type\u003dlibvirt.VIR_DOMAIN_JOB_UNBOUNDED)"},{"line_number":727,"context_line":"                    LOG.error(\"Domain has shutdown accidentally: %s\", ex)"},{"line_number":728,"context_line":"                    return JobInfo(type\u003dlibvirt.VIR_DOMAIN_JOB_FAILED)"},{"line_number":729,"context_line":"                else:"}],"source_content_type":"text/x-python","patch_set":5,"id":"3f79a3b5_6b19de73","line":726,"in_reply_to":"3f79a3b5_a59f28b0","updated":"2018-11-10 21:12:03.000000000","message":"Thanks for the explanation. That helps me understand your approach in this patch.\n\nSo, the retry is for the case where there is a short window of time the guest is SHUTOFF before it disappears from the source host. You could get a problem if the short window is just a bit too long and would have completed if you had waited more. And live migration would be considered failed. That is why you have the config option.\n\nAccording to the code comment on L958, when a persistent guest completes live migration, it shuts off but does _not_ go away. That is why we can\u0027t get NO_DOMAIN for the persistent guest. It will just remain SHUTOFF when it completes.\n\nSo, in that case, your approach is saying, we should use the jobStats() as the source of truth for completed migration. Persistent domain can SHUTOFF for the configured number of retries before we expect to receive JOB_COMPLETED.\n\nThat does seem like a reasonable way to handle this issue. I\u0027ll ask Kashyap to take a look at his earliest convenience.","commit_id":"41011cd233a35bff47e3ca628779a5a3e3f528dc"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"0f19eb658641c820dee6b82e54b30d77f09f3a7e","unresolved":false,"context_lines":[{"line_number":723,"context_line":"                                      num_attempts + 1)"},{"line_number":724,"context_line":"                            retry[\u0027num_attempts\u0027] +\u003d 1"},{"line_number":725,"context_line":"                            return JobInfo("},{"line_number":726,"context_line":"                                type\u003dlibvirt.VIR_DOMAIN_JOB_UNBOUNDED)"},{"line_number":727,"context_line":"                    LOG.error(\"Domain has shutdown accidentally: %s\", ex)"},{"line_number":728,"context_line":"                    return JobInfo(type\u003dlibvirt.VIR_DOMAIN_JOB_FAILED)"},{"line_number":729,"context_line":"                else:"}],"source_content_type":"text/x-python","patch_set":5,"id":"3f79a3b5_a59f28b0","line":726,"in_reply_to":"3f79a3b5_b329b63a","updated":"2018-11-10 02:39:45.000000000","message":"While in the end of live migration, looks like the domain on the source host will be shut down and gone. Normally, we treating VIR_ERR_OPERATION_INVALID as success is fine because there surely is a short time SHUTOFF. But in my bug scenario, the qemu-kvm process is killed by OOM killer, and you can see the domain is SHUTOFF by ``virsh list --all``, it\u0027s STILL on the source host, not undefined or gone, but SHUTOFF, in such case, doing self._domain.jobStats() gets error VIR_ERR_OPERATION_INVALID. The qemu-kvm will not be restarted automatically but keep status SHUTOFF until someone finally notices. In such case, you cannot see VIR_ERR_OPERATION_INVALID as success, right?\n\nBy pretending that the migration job is still in progress while hitting VIR_ERR_OPERATION_INVALID and retry to get job stats, we will eventually distinguish whether the migration task is successful:\n* migration succeeds, domain is gone, `self._domain.jobStats()` gets VIR_ERR_NO_DOMAIN or just normal completion job stats.\n* migration fails, domain is SHUTOFF and not gone, `self._domain.jobStats()` gets VIR_ERR_OPERATION_INVALID\n\nIn a nut shell, we treat only the VIR_ERR_NO_DOMAIN exception as VIR_DOMAIN_JOB_COMPELTED. Of course, we won\u0027t mess with normal completion here.","commit_id":"41011cd233a35bff47e3ca628779a5a3e3f528dc"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"5be4ae8be810e9b3d2b5ac23bbc8957e187bee60","unresolved":false,"context_lines":[{"line_number":725,"context_line":"                            return JobInfo("},{"line_number":726,"context_line":"                                type\u003dlibvirt.VIR_DOMAIN_JOB_UNBOUNDED)"},{"line_number":727,"context_line":"                    LOG.error(\"Domain has shutdown accidentally: %s\", ex)"},{"line_number":728,"context_line":"                    return JobInfo(type\u003dlibvirt.VIR_DOMAIN_JOB_FAILED)"},{"line_number":729,"context_line":"                else:"},{"line_number":730,"context_line":"                    LOG.debug(\"Failed to get job stats: %s\", ex)"},{"line_number":731,"context_line":"                    raise"}],"source_content_type":"text/x-python","patch_set":5,"id":"3f79a3b5_33f4c6cf","line":728,"range":{"start_line":728,"start_character":20,"end_line":728,"end_character":70},"updated":"2018-11-09 18:03:12.000000000","message":"I\u0027m still not sure this is the correct thing to do, considering that persistent guests will return this same error when they shut down, and should be considered \"job completed.\" \n\nWhat I\u0027m wondering is if we ever have persistent guests involved in live migration. I\u0027d like Kashyap to take a look at this review and give his comment.","commit_id":"41011cd233a35bff47e3ca628779a5a3e3f528dc"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"1ce21af534ac9ace3909eda09f409a8e3d95efd3","unresolved":false,"context_lines":[{"line_number":725,"context_line":"                            return JobInfo("},{"line_number":726,"context_line":"                                type\u003dlibvirt.VIR_DOMAIN_JOB_UNBOUNDED)"},{"line_number":727,"context_line":"                    LOG.error(\"Domain has shutdown accidentally: %s\", ex)"},{"line_number":728,"context_line":"                    return JobInfo(type\u003dlibvirt.VIR_DOMAIN_JOB_FAILED)"},{"line_number":729,"context_line":"                else:"},{"line_number":730,"context_line":"                    LOG.debug(\"Failed to get job stats: %s\", ex)"},{"line_number":731,"context_line":"                    raise"}],"source_content_type":"text/x-python","patch_set":5,"id":"3f79a3b5_9015854e","line":728,"range":{"start_line":728,"start_character":20,"end_line":728,"end_character":70},"in_reply_to":"3f79a3b5_25847856","updated":"2018-11-10 20:31:36.000000000","message":"Even if it is for a short time, if it is possible for a persistent guest to live migrate, if it shuts down as part of the migration, this code will consider it to be \"job failed\" when it should be \"job completed.\"\n\nIf it is not possible for a persistent guest to live migrate, then this would be OK.","commit_id":"41011cd233a35bff47e3ca628779a5a3e3f528dc"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"0f19eb658641c820dee6b82e54b30d77f09f3a7e","unresolved":false,"context_lines":[{"line_number":725,"context_line":"                            return JobInfo("},{"line_number":726,"context_line":"                                type\u003dlibvirt.VIR_DOMAIN_JOB_UNBOUNDED)"},{"line_number":727,"context_line":"                    LOG.error(\"Domain has shutdown accidentally: %s\", ex)"},{"line_number":728,"context_line":"                    return JobInfo(type\u003dlibvirt.VIR_DOMAIN_JOB_FAILED)"},{"line_number":729,"context_line":"                else:"},{"line_number":730,"context_line":"                    LOG.debug(\"Failed to get job stats: %s\", ex)"},{"line_number":731,"context_line":"                    raise"}],"source_content_type":"text/x-python","patch_set":5,"id":"3f79a3b5_25847856","line":728,"range":{"start_line":728,"start_character":20,"end_line":728,"end_character":70},"in_reply_to":"3f79a3b5_33f4c6cf","updated":"2018-11-10 02:39:45.000000000","message":"Thanks so much for the reply and the comments, Melanie. IMHO, shutoff status only keeps for a short time, the domain is eventually gone from source host and starts on the destination host. Hope someone can find the answer from libvirt or qemu codes. Really looks forwards to Kashyap\u0027s comments. :)","commit_id":"41011cd233a35bff47e3ca628779a5a3e3f528dc"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"055804aecceec5c194c6b298d08f9bdc51aea140","unresolved":false,"context_lines":[{"line_number":693,"context_line":"        \"\"\"Switch running live migration to post-copy mode\"\"\""},{"line_number":694,"context_line":"        self._domain.migrateStartPostCopy()"},{"line_number":695,"context_line":""},{"line_number":696,"context_line":"    def get_job_info(self, retry\u003dNone):"},{"line_number":697,"context_line":"        \"\"\"Get job info for the domain"},{"line_number":698,"context_line":""},{"line_number":699,"context_line":"        Query the libvirt job info for the domain (ie progress"}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_5e6766be","line":696,"range":{"start_line":696,"start_character":27,"end_line":696,"end_character":37},"updated":"2019-03-13 12:13:15.000000000","message":"This parameter should be documented, because by just looking at this I wouldn\u0027t know this is a dict initially started with a value of 0, I would think it\u0027s maybe the configured number of retry attempts.","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"12a3f6c8bb02e7b00f7303d1c954f69b9074cf4f","unresolved":false,"context_lines":[{"line_number":693,"context_line":"        \"\"\"Switch running live migration to post-copy mode\"\"\""},{"line_number":694,"context_line":"        self._domain.migrateStartPostCopy()"},{"line_number":695,"context_line":""},{"line_number":696,"context_line":"    def get_job_info(self, retry\u003dNone):"},{"line_number":697,"context_line":"        \"\"\"Get job info for the domain"},{"line_number":698,"context_line":""},{"line_number":699,"context_line":"        Query the libvirt job info for the domain (ie progress"}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_9e161755","line":696,"range":{"start_line":696,"start_character":27,"end_line":696,"end_character":37},"in_reply_to":"5fc1f717_5e6766be","updated":"2019-03-14 11:38:21.000000000","message":"Done","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"055804aecceec5c194c6b298d08f9bdc51aea140","unresolved":false,"context_lines":[{"line_number":714,"context_line":"                                                         retry\u003dretry)"},{"line_number":715,"context_line":"                elif ex.get_error_code() \u003d\u003d libvirt.VIR_ERR_NO_DOMAIN:"},{"line_number":716,"context_line":"                    # Transient guest finished migration, so it has gone"},{"line_number":717,"context_line":"                    # away completclsely"},{"line_number":718,"context_line":"                    LOG.debug(\"Domain has gone away: %s\", ex)"},{"line_number":719,"context_line":"                    return JobInfo(type\u003dlibvirt.VIR_DOMAIN_JOB_COMPLETED)"},{"line_number":720,"context_line":"                elif ex.get_error_code() \u003d\u003d libvirt.VIR_ERR_OPERATION_INVALID:"}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_de72767b","line":717,"range":{"start_line":717,"start_character":27,"end_line":717,"end_character":40},"updated":"2019-03-13 12:13:15.000000000","message":"nit: can we fix this typo while you\u0027re in here, it should be \"completely\"","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"12a3f6c8bb02e7b00f7303d1c954f69b9074cf4f","unresolved":false,"context_lines":[{"line_number":714,"context_line":"                                                         retry\u003dretry)"},{"line_number":715,"context_line":"                elif ex.get_error_code() \u003d\u003d libvirt.VIR_ERR_NO_DOMAIN:"},{"line_number":716,"context_line":"                    # Transient guest finished migration, so it has gone"},{"line_number":717,"context_line":"                    # away completclsely"},{"line_number":718,"context_line":"                    LOG.debug(\"Domain has gone away: %s\", ex)"},{"line_number":719,"context_line":"                    return JobInfo(type\u003dlibvirt.VIR_DOMAIN_JOB_COMPLETED)"},{"line_number":720,"context_line":"                elif ex.get_error_code() \u003d\u003d libvirt.VIR_ERR_OPERATION_INVALID:"}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_e98a0368","line":717,"range":{"start_line":717,"start_character":27,"end_line":717,"end_character":40},"in_reply_to":"5fc1f717_de72767b","updated":"2019-03-14 11:38:21.000000000","message":"Done","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"055804aecceec5c194c6b298d08f9bdc51aea140","unresolved":false,"context_lines":[{"line_number":718,"context_line":"                    LOG.debug(\"Domain has gone away: %s\", ex)"},{"line_number":719,"context_line":"                    return JobInfo(type\u003dlibvirt.VIR_DOMAIN_JOB_COMPLETED)"},{"line_number":720,"context_line":"                elif ex.get_error_code() \u003d\u003d libvirt.VIR_ERR_OPERATION_INVALID:"},{"line_number":721,"context_line":"                    if retry:"},{"line_number":722,"context_line":"                        num_attempts \u003d retry[\u0027num_attempts\u0027]"},{"line_number":723,"context_line":"                        if num_attempts \u003c CONF.libvirt.job_info_retry_count:"},{"line_number":724,"context_line":"                            LOG.debug(\"Pretend the migration job is still \""}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_9e686e87","line":721,"updated":"2019-03-13 12:13:15.000000000","message":"Would be good to add a comment here.","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"12a3f6c8bb02e7b00f7303d1c954f69b9074cf4f","unresolved":false,"context_lines":[{"line_number":718,"context_line":"                    LOG.debug(\"Domain has gone away: %s\", ex)"},{"line_number":719,"context_line":"                    return JobInfo(type\u003dlibvirt.VIR_DOMAIN_JOB_COMPLETED)"},{"line_number":720,"context_line":"                elif ex.get_error_code() \u003d\u003d libvirt.VIR_ERR_OPERATION_INVALID:"},{"line_number":721,"context_line":"                    if retry:"},{"line_number":722,"context_line":"                        num_attempts \u003d retry[\u0027num_attempts\u0027]"},{"line_number":723,"context_line":"                        if num_attempts \u003c CONF.libvirt.job_info_retry_count:"},{"line_number":724,"context_line":"                            LOG.debug(\"Pretend the migration job is still \""}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_a9807b47","line":721,"in_reply_to":"5fc1f717_9e686e87","updated":"2019-03-14 11:38:21.000000000","message":"Done","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"055804aecceec5c194c6b298d08f9bdc51aea140","unresolved":false,"context_lines":[{"line_number":721,"context_line":"                    if retry:"},{"line_number":722,"context_line":"                        num_attempts \u003d retry[\u0027num_attempts\u0027]"},{"line_number":723,"context_line":"                        if num_attempts \u003c CONF.libvirt.job_info_retry_count:"},{"line_number":724,"context_line":"                            LOG.debug(\"Pretend the migration job is still \""},{"line_number":725,"context_line":"                                      \"running and retry %s time(s) after \""},{"line_number":726,"context_line":"                                      \"hitting VIR_ERR_OPERATION_INVALID.\","},{"line_number":727,"context_line":"                                      num_attempts + 1)"}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_9eeb8eda","line":724,"range":{"start_line":724,"start_character":51,"end_line":724,"end_character":60},"updated":"2019-03-13 12:13:15.000000000","message":"This job code is generic, e.g. I could be running a blockRebase job with retries. So this message should probably be generic as well.","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"12a3f6c8bb02e7b00f7303d1c954f69b9074cf4f","unresolved":false,"context_lines":[{"line_number":721,"context_line":"                    if retry:"},{"line_number":722,"context_line":"                        num_attempts \u003d retry[\u0027num_attempts\u0027]"},{"line_number":723,"context_line":"                        if num_attempts \u003c CONF.libvirt.job_info_retry_count:"},{"line_number":724,"context_line":"                            LOG.debug(\"Pretend the migration job is still \""},{"line_number":725,"context_line":"                                      \"running and retry %s time(s) after \""},{"line_number":726,"context_line":"                                      \"hitting VIR_ERR_OPERATION_INVALID.\","},{"line_number":727,"context_line":"                                      num_attempts + 1)"}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_c93867a1","line":724,"range":{"start_line":724,"start_character":51,"end_line":724,"end_character":60},"in_reply_to":"5fc1f717_9eeb8eda","updated":"2019-03-14 11:38:21.000000000","message":"Done","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"055804aecceec5c194c6b298d08f9bdc51aea140","unresolved":false,"context_lines":[{"line_number":727,"context_line":"                                      num_attempts + 1)"},{"line_number":728,"context_line":"                            retry[\u0027num_attempts\u0027] +\u003d 1"},{"line_number":729,"context_line":"                            return JobInfo("},{"line_number":730,"context_line":"                                type\u003dlibvirt.VIR_DOMAIN_JOB_UNBOUNDED)"},{"line_number":731,"context_line":"                    LOG.error(\"Domain has shutdown accidentally: %s\", ex)"},{"line_number":732,"context_line":"                    return JobInfo(type\u003dlibvirt.VIR_DOMAIN_JOB_FAILED)"},{"line_number":733,"context_line":"                else:"}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_5e968641","line":730,"range":{"start_line":730,"start_character":45,"end_line":730,"end_character":69},"updated":"2019-03-13 12:13:15.000000000","message":"These return value semantics should also be documented because this looks somewhat tightly coupled to how the live migration monitor code is handling this to consider the job is still going.","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"055804aecceec5c194c6b298d08f9bdc51aea140","unresolved":false,"context_lines":[{"line_number":729,"context_line":"                            return JobInfo("},{"line_number":730,"context_line":"                                type\u003dlibvirt.VIR_DOMAIN_JOB_UNBOUNDED)"},{"line_number":731,"context_line":"                    LOG.error(\"Domain has shutdown accidentally: %s\", ex)"},{"line_number":732,"context_line":"                    return JobInfo(type\u003dlibvirt.VIR_DOMAIN_JOB_FAILED)"},{"line_number":733,"context_line":"                else:"},{"line_number":734,"context_line":"                    LOG.debug(\"Failed to get job stats: %s\", ex)"},{"line_number":735,"context_line":"                    raise"}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_be23ea55","line":732,"range":{"start_line":732,"start_character":48,"end_line":732,"end_character":69},"updated":"2019-03-13 12:13:15.000000000","message":"Why is this FAILED rather than VIR_DOMAIN_JOB_COMPLETED as before? Is it because if we are retrying and continue to hit VIR_ERR_OPERATION_INVALID that something is broken and we need to consider the job failed?","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"12a3f6c8bb02e7b00f7303d1c954f69b9074cf4f","unresolved":false,"context_lines":[{"line_number":729,"context_line":"                            return JobInfo("},{"line_number":730,"context_line":"                                type\u003dlibvirt.VIR_DOMAIN_JOB_UNBOUNDED)"},{"line_number":731,"context_line":"                    LOG.error(\"Domain has shutdown accidentally: %s\", ex)"},{"line_number":732,"context_line":"                    return JobInfo(type\u003dlibvirt.VIR_DOMAIN_JOB_FAILED)"},{"line_number":733,"context_line":"                else:"},{"line_number":734,"context_line":"                    LOG.debug(\"Failed to get job stats: %s\", ex)"},{"line_number":735,"context_line":"                    raise"}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_c96147ab","line":732,"range":{"start_line":732,"start_character":48,"end_line":732,"end_character":69},"in_reply_to":"5fc1f717_be23ea55","updated":"2019-03-14 11:38:21.000000000","message":"Because if we still use VIR_DOMAIN_JOB_COMPLETED as before but the live migration job actually is FAILED, then the code would trigger post_live_migration() to delete instance files. It\u0027s risky.","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"055804aecceec5c194c6b298d08f9bdc51aea140","unresolved":false,"context_lines":[{"line_number":950,"context_line":"        self.comp_overflow \u003d kwargs.get(\"compression_overflow\", 0)"},{"line_number":951,"context_line":""},{"line_number":952,"context_line":"    @classmethod"},{"line_number":953,"context_line":"    def _get_job_stats_compat(cls, dom, retry\u003dNone):"},{"line_number":954,"context_line":"        # Make the old virDomainGetJobInfo method look similar to the"},{"line_number":955,"context_line":"        # modern virDomainGetJobStats method"},{"line_number":956,"context_line":"        try:"}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_decef680","line":953,"range":{"start_line":953,"start_character":40,"end_line":953,"end_character":50},"updated":"2019-03-13 12:13:15.000000000","message":"This parameter should be documented in a docstring.","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"12a3f6c8bb02e7b00f7303d1c954f69b9074cf4f","unresolved":false,"context_lines":[{"line_number":950,"context_line":"        self.comp_overflow \u003d kwargs.get(\"compression_overflow\", 0)"},{"line_number":951,"context_line":""},{"line_number":952,"context_line":"    @classmethod"},{"line_number":953,"context_line":"    def _get_job_stats_compat(cls, dom, retry\u003dNone):"},{"line_number":954,"context_line":"        # Make the old virDomainGetJobInfo method look similar to the"},{"line_number":955,"context_line":"        # modern virDomainGetJobStats method"},{"line_number":956,"context_line":"        try:"}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_c92f073d","line":953,"range":{"start_line":953,"start_character":40,"end_line":953,"end_character":50},"in_reply_to":"5fc1f717_decef680","updated":"2019-03-14 11:38:21.000000000","message":"Done","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"055804aecceec5c194c6b298d08f9bdc51aea140","unresolved":false,"context_lines":[{"line_number":971,"context_line":"                if retry:"},{"line_number":972,"context_line":"                    num_attempts \u003d retry[\u0027num_attempts\u0027]"},{"line_number":973,"context_line":"                    if num_attempts \u003c CONF.libvirt.job_info_retry_count:"},{"line_number":974,"context_line":"                        LOG.debug(\"Pretend the migration job is still \""},{"line_number":975,"context_line":"                                  \"running and retry %s time(s) after \""},{"line_number":976,"context_line":"                                  \"hitting VIR_ERR_OPERATION_INVALID.\","},{"line_number":977,"context_line":"                                  num_attempts + 1)"}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_3ea37ab4","line":974,"range":{"start_line":974,"start_character":47,"end_line":974,"end_character":56},"updated":"2019-03-13 12:13:15.000000000","message":"same as above","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"12a3f6c8bb02e7b00f7303d1c954f69b9074cf4f","unresolved":false,"context_lines":[{"line_number":971,"context_line":"                if retry:"},{"line_number":972,"context_line":"                    num_attempts \u003d retry[\u0027num_attempts\u0027]"},{"line_number":973,"context_line":"                    if num_attempts \u003c CONF.libvirt.job_info_retry_count:"},{"line_number":974,"context_line":"                        LOG.debug(\"Pretend the migration job is still \""},{"line_number":975,"context_line":"                                  \"running and retry %s time(s) after \""},{"line_number":976,"context_line":"                                  \"hitting VIR_ERR_OPERATION_INVALID.\","},{"line_number":977,"context_line":"                                  num_attempts + 1)"}],"source_content_type":"text/x-python","patch_set":8,"id":"5fc1f717_e9344395","line":974,"range":{"start_line":974,"start_character":47,"end_line":974,"end_character":56},"in_reply_to":"5fc1f717_3ea37ab4","updated":"2019-03-14 11:38:21.000000000","message":"Done","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"457986a903792b0e69912ea1acc032732fc35f6e","unresolved":false,"context_lines":[{"line_number":680,"context_line":"        Query the libvirt job info for the domain (ie progress"},{"line_number":681,"context_line":"        of migration, or snapshot operation)"},{"line_number":682,"context_line":""},{"line_number":683,"context_line":"        :param retry: a dict {\"num_attempts\": value} containing the number of"},{"line_number":684,"context_line":"                    retry counts with initial value 0."},{"line_number":685,"context_line":"        :returns: a JobInfo of guest"},{"line_number":686,"context_line":"        \"\"\""}],"source_content_type":"text/x-python","patch_set":12,"id":"3fa7e38b_3820754b","line":683,"updated":"2020-01-06 14:33:59.000000000","message":"I don\u0027t see why we would make this a dict instead of just an int","commit_id":"bf12c9e065458f6919027539236f0f5ad5e267bc"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"7306be0dd8451ce157167b972261057c8eb6177d","unresolved":false,"context_lines":[{"line_number":680,"context_line":"        Query the libvirt job info for the domain (ie progress"},{"line_number":681,"context_line":"        of migration, or snapshot operation)"},{"line_number":682,"context_line":""},{"line_number":683,"context_line":"        :param retry: a dict {\"num_attempts\": value} containing the number of"},{"line_number":684,"context_line":"                    retry counts with initial value 0."},{"line_number":685,"context_line":"        :returns: a JobInfo of guest"},{"line_number":686,"context_line":"        \"\"\""}],"source_content_type":"text/x-python","patch_set":12,"id":"1f493fa4_1cbe5ad4","line":683,"in_reply_to":"3fa7e38b_3820754b","updated":"2020-04-27 21:01:14.000000000","message":"I also would like to know the answer to this question. Why use a dict? Would prefer to keep it simple.","commit_id":"bf12c9e065458f6919027539236f0f5ad5e267bc"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"7306be0dd8451ce157167b972261057c8eb6177d","unresolved":false,"context_lines":[{"line_number":706,"context_line":"                    # parameter retry is specified, then retry get_job_info()"},{"line_number":707,"context_line":"                    # by returning JobInfo with type"},{"line_number":708,"context_line":"                    # libvirt.VIR_DOMAIN_JOB_UNBOUNDED, which means migration"},{"line_number":709,"context_line":"                    # is still running."},{"line_number":710,"context_line":"                    if retry:"},{"line_number":711,"context_line":"                        num_attempts \u003d retry[\u0027num_attempts\u0027]"},{"line_number":712,"context_line":"                        if num_attempts \u003c CONF.libvirt.\\"}],"source_content_type":"text/x-python","patch_set":12,"id":"1f493fa4_bcf80649","line":709,"updated":"2020-04-27 21:01:14.000000000","message":"Here I\u0027d also like to see some explanation about why we are retrying. Same as what I commented about the config option help.\n\nThis is something I think will be pretty mysterious to future devs coming to this code area and I think we should leave a lot of info here for them (and ourselves because we can forget over a long time).","commit_id":"bf12c9e065458f6919027539236f0f5ad5e267bc"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"791f6650b9f002810cdae0e6a01155f4c429f396","unresolved":false,"context_lines":[{"line_number":706,"context_line":"                    # parameter retry is specified, then retry get_job_info()"},{"line_number":707,"context_line":"                    # by returning JobInfo with type"},{"line_number":708,"context_line":"                    # libvirt.VIR_DOMAIN_JOB_UNBOUNDED, which means migration"},{"line_number":709,"context_line":"                    # is still running."},{"line_number":710,"context_line":"                    if retry:"},{"line_number":711,"context_line":"                        num_attempts \u003d retry[\u0027num_attempts\u0027]"},{"line_number":712,"context_line":"                        if num_attempts \u003c CONF.libvirt.\\"}],"source_content_type":"text/x-python","patch_set":12,"id":"1f493fa4_fd2d3ffe","line":709,"in_reply_to":"1f493fa4_bcf80649","updated":"2020-04-29 12:46:56.000000000","message":"ok i agree with melanie, alexandre\u0027s explaination for why the retry is correct seams valid to me but we should also capture that resoning in the code comments here so that we dont have to go to gerrit to understand why we did this in the future.","commit_id":"bf12c9e065458f6919027539236f0f5ad5e267bc"}],"releasenotes/notes/bug-1799152-2d4c3ca24466b3a1.yaml":[{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"055804aecceec5c194c6b298d08f9bdc51aea140","unresolved":false,"context_lines":[{"line_number":1,"context_line":"---"},{"line_number":2,"context_line":"features:"},{"line_number":3,"context_line":"  - |"},{"line_number":4,"context_line":"    Add config option ``[DEFAULT]job_info_retry_count`` which specifies the"},{"line_number":5,"context_line":"    maximum number of retries to get guest job info after metting libvirt"}],"source_content_type":"text/x-yaml","patch_set":8,"id":"5fc1f717_1eea3eab","line":2,"range":{"start_line":2,"start_character":0,"end_line":2,"end_character":8},"updated":"2019-03-13 12:13:15.000000000","message":"Is this really a feature? It\u0027s more a bug fix. I would change this to \"fixes\".","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"12a3f6c8bb02e7b00f7303d1c954f69b9074cf4f","unresolved":false,"context_lines":[{"line_number":1,"context_line":"---"},{"line_number":2,"context_line":"features:"},{"line_number":3,"context_line":"  - |"},{"line_number":4,"context_line":"    Add config option ``[DEFAULT]job_info_retry_count`` which specifies the"},{"line_number":5,"context_line":"    maximum number of retries to get guest job info after metting libvirt"}],"source_content_type":"text/x-yaml","patch_set":8,"id":"5fc1f717_89b33f0a","line":2,"range":{"start_line":2,"start_character":0,"end_line":2,"end_character":8},"in_reply_to":"5fc1f717_1eea3eab","updated":"2019-03-14 11:38:21.000000000","message":"Done","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"055804aecceec5c194c6b298d08f9bdc51aea140","unresolved":false,"context_lines":[{"line_number":1,"context_line":"---"},{"line_number":2,"context_line":"features:"},{"line_number":3,"context_line":"  - |"},{"line_number":4,"context_line":"    Add config option ``[DEFAULT]job_info_retry_count`` which specifies the"},{"line_number":5,"context_line":"    maximum number of retries to get guest job info after metting libvirt"},{"line_number":6,"context_line":"    error ``VIR_ERR_OPERATION_INVALID`` in order to avoid nova\u0027s misjudgment"},{"line_number":7,"context_line":"    that the migration job is completed but actually failed."}],"source_content_type":"text/x-yaml","patch_set":8,"id":"5fc1f717_feaf327a","line":4,"range":{"start_line":4,"start_character":4,"end_line":4,"end_character":7},"updated":"2019-03-13 12:13:15.000000000","message":"Added","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"055804aecceec5c194c6b298d08f9bdc51aea140","unresolved":false,"context_lines":[{"line_number":1,"context_line":"---"},{"line_number":2,"context_line":"features:"},{"line_number":3,"context_line":"  - |"},{"line_number":4,"context_line":"    Add config option ``[DEFAULT]job_info_retry_count`` which specifies the"},{"line_number":5,"context_line":"    maximum number of retries to get guest job info after metting libvirt"},{"line_number":6,"context_line":"    error ``VIR_ERR_OPERATION_INVALID`` in order to avoid nova\u0027s misjudgment"},{"line_number":7,"context_line":"    that the migration job is completed but actually failed."}],"source_content_type":"text/x-yaml","patch_set":8,"id":"5fc1f717_1eb55ecd","line":4,"range":{"start_line":4,"start_character":25,"end_line":4,"end_character":32},"updated":"2019-03-13 12:13:15.000000000","message":"It\u0027s in the libvirt group.","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"12a3f6c8bb02e7b00f7303d1c954f69b9074cf4f","unresolved":false,"context_lines":[{"line_number":1,"context_line":"---"},{"line_number":2,"context_line":"features:"},{"line_number":3,"context_line":"  - |"},{"line_number":4,"context_line":"    Add config option ``[DEFAULT]job_info_retry_count`` which specifies the"},{"line_number":5,"context_line":"    maximum number of retries to get guest job info after metting libvirt"},{"line_number":6,"context_line":"    error ``VIR_ERR_OPERATION_INVALID`` in order to avoid nova\u0027s misjudgment"},{"line_number":7,"context_line":"    that the migration job is completed but actually failed."}],"source_content_type":"text/x-yaml","patch_set":8,"id":"5fc1f717_e9c20359","line":4,"range":{"start_line":4,"start_character":25,"end_line":4,"end_character":32},"in_reply_to":"5fc1f717_1eb55ecd","updated":"2019-03-14 11:38:21.000000000","message":"Done","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"12a3f6c8bb02e7b00f7303d1c954f69b9074cf4f","unresolved":false,"context_lines":[{"line_number":1,"context_line":"---"},{"line_number":2,"context_line":"features:"},{"line_number":3,"context_line":"  - |"},{"line_number":4,"context_line":"    Add config option ``[DEFAULT]job_info_retry_count`` which specifies the"},{"line_number":5,"context_line":"    maximum number of retries to get guest job info after metting libvirt"},{"line_number":6,"context_line":"    error ``VIR_ERR_OPERATION_INVALID`` in order to avoid nova\u0027s misjudgment"},{"line_number":7,"context_line":"    that the migration job is completed but actually failed."}],"source_content_type":"text/x-yaml","patch_set":8,"id":"5fc1f717_c9bdc7d8","line":4,"range":{"start_line":4,"start_character":4,"end_line":4,"end_character":7},"in_reply_to":"5fc1f717_feaf327a","updated":"2019-03-14 11:38:21.000000000","message":"Done","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"055804aecceec5c194c6b298d08f9bdc51aea140","unresolved":false,"context_lines":[{"line_number":2,"context_line":"features:"},{"line_number":3,"context_line":"  - |"},{"line_number":4,"context_line":"    Add config option ``[DEFAULT]job_info_retry_count`` which specifies the"},{"line_number":5,"context_line":"    maximum number of retries to get guest job info after metting libvirt"},{"line_number":6,"context_line":"    error ``VIR_ERR_OPERATION_INVALID`` in order to avoid nova\u0027s misjudgment"},{"line_number":7,"context_line":"    that the migration job is completed but actually failed."}],"source_content_type":"text/x-yaml","patch_set":8,"id":"5fc1f717_9ec06e2c","line":5,"range":{"start_line":5,"start_character":58,"end_line":5,"end_character":65},"updated":"2019-03-13 12:13:15.000000000","message":"meeting","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"12a3f6c8bb02e7b00f7303d1c954f69b9074cf4f","unresolved":false,"context_lines":[{"line_number":2,"context_line":"features:"},{"line_number":3,"context_line":"  - |"},{"line_number":4,"context_line":"    Add config option ``[DEFAULT]job_info_retry_count`` which specifies the"},{"line_number":5,"context_line":"    maximum number of retries to get guest job info after metting libvirt"},{"line_number":6,"context_line":"    error ``VIR_ERR_OPERATION_INVALID`` in order to avoid nova\u0027s misjudgment"},{"line_number":7,"context_line":"    that the migration job is completed but actually failed."}],"source_content_type":"text/x-yaml","patch_set":8,"id":"5fc1f717_29eb2bdc","line":5,"range":{"start_line":5,"start_character":58,"end_line":5,"end_character":65},"in_reply_to":"5fc1f717_9ec06e2c","updated":"2019-03-14 11:38:21.000000000","message":"Done","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"055804aecceec5c194c6b298d08f9bdc51aea140","unresolved":false,"context_lines":[{"line_number":4,"context_line":"    Add config option ``[DEFAULT]job_info_retry_count`` which specifies the"},{"line_number":5,"context_line":"    maximum number of retries to get guest job info after metting libvirt"},{"line_number":6,"context_line":"    error ``VIR_ERR_OPERATION_INVALID`` in order to avoid nova\u0027s misjudgment"},{"line_number":7,"context_line":"    that the migration job is completed but actually failed."}],"source_content_type":"text/x-yaml","patch_set":8,"id":"5fc1f717_decfd653","line":7,"updated":"2019-03-13 12:13:15.000000000","message":"You could add, \"See bug https://bugs.launchpad.net/nova/+bug/1799152 for details.\"","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"},{"author":{"_account_id":25113,"name":"Fan Zhang","email":"zh.f@outlook.com","username":"fanzhang"},"change_message_id":"12a3f6c8bb02e7b00f7303d1c954f69b9074cf4f","unresolved":false,"context_lines":[{"line_number":4,"context_line":"    Add config option ``[DEFAULT]job_info_retry_count`` which specifies the"},{"line_number":5,"context_line":"    maximum number of retries to get guest job info after metting libvirt"},{"line_number":6,"context_line":"    error ``VIR_ERR_OPERATION_INVALID`` in order to avoid nova\u0027s misjudgment"},{"line_number":7,"context_line":"    that the migration job is completed but actually failed."}],"source_content_type":"text/x-yaml","patch_set":8,"id":"5fc1f717_09ee6fec","line":7,"in_reply_to":"5fc1f717_decfd653","updated":"2019-03-14 11:38:21.000000000","message":"Done","commit_id":"3e1f41071b26d77bdabd8b13ff77f3c4e584d4c8"}]}
