)]}'
{"specs/stein/approved/numa-aware-live-migration.rst":[{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"df72d684a2781f08076542dc572c150e274057e8","unresolved":false,"context_lines":[{"line_number":21,"context_line":"Problem description"},{"line_number":22,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":23,"context_line":""},{"line_number":24,"context_line":"In the following paragraphs the term NUMA is incorrectly used to signify any"},{"line_number":25,"context_line":"guest characteristic that is expressed in the `InstanceNUMATopology` object,"},{"line_number":26,"context_line":"for example CPU pinning and hugepages. CPU pinning can be achieved without a"},{"line_number":27,"context_line":"guest NUMA topology, but because no better term than NUMA is available it will"}],"source_content_type":"text/x-rst","patch_set":1,"id":"3f79a3b5_c0aa76d0","line":24,"range":{"start_line":24,"start_character":45,"end_line":24,"end_character":56},"updated":"2018-09-06 14:29:11.000000000","message":":) as long as it is known i am ok with this.\nill omit any comments about incorrect usage.","commit_id":"5f02617a297c40581c64fb83c2383aa8dc9a42c6"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"df72d684a2781f08076542dc572c150e274057e8","unresolved":false,"context_lines":[{"line_number":24,"context_line":"In the following paragraphs the term NUMA is incorrectly used to signify any"},{"line_number":25,"context_line":"guest characteristic that is expressed in the `InstanceNUMATopology` object,"},{"line_number":26,"context_line":"for example CPU pinning and hugepages. CPU pinning can be achieved without a"},{"line_number":27,"context_line":"guest NUMA topology, but because no better term than NUMA is available it will"},{"line_number":28,"context_line":"continue to be used."},{"line_number":29,"context_line":""},{"line_number":30,"context_line":"The problem can best be described with three examples."}],"source_content_type":"text/x-rst","patch_set":1,"id":"3f79a3b5_a0989abf","line":27,"range":{"start_line":27,"start_character":53,"end_line":27,"end_character":57},"updated":"2018-09-06 14:29:11.000000000","message":"ok excpet this comment with is the more general term is EPA\nor enhanced plathform awreness. now no more comments on this :)","commit_id":"5f02617a297c40581c64fb83c2383aa8dc9a42c6"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"df72d684a2781f08076542dc572c150e274057e8","unresolved":false,"context_lines":[{"line_number":79,"context_line":"Specifically, NUMA resources will continue to be claimed by the compute host\u0027s"},{"line_number":80,"context_line":"resource tracker."},{"line_number":81,"context_line":""},{"line_number":82,"context_line":"At the cell conductor (live migration isn\u0027t supported between cells, so the"},{"line_number":83,"context_line":"superconductor is not involved) and compute level, the relevant parts of the"},{"line_number":84,"context_line":"current live migration flow can be summarized by the following oversimplified"},{"line_number":85,"context_line":"pseudo sequence diagram.::"},{"line_number":86,"context_line":""}],"source_content_type":"text/x-rst","patch_set":1,"id":"3f79a3b5_6156042d","line":83,"range":{"start_line":82,"start_character":23,"end_line":83,"end_character":31},"updated":"2018-09-06 14:29:11.000000000","message":"this is a topic that is going to be discusssed at teh ptg so this might/will change.","commit_id":"5f02617a297c40581c64fb83c2383aa8dc9a42c6"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"fc1d8c88636282233828533410e53a0fd98fc1cb","unresolved":false,"context_lines":[{"line_number":321,"context_line":"     - Description"},{"line_number":322,"context_line":"   * - Rocky"},{"line_number":323,"context_line":"     - Introduced"},{"line_number":324,"context_line":"   * - Rocky"},{"line_number":325,"context_line":"     - Re-proposed"}],"source_content_type":"text/x-rst","patch_set":1,"id":"3f79a3b5_9ed93357","line":324,"range":{"start_line":324,"start_character":7,"end_line":324,"end_character":12},"updated":"2018-09-06 21:53:46.000000000","message":"We\u0027re in Stein now homeboy.","commit_id":"5f02617a297c40581c64fb83c2383aa8dc9a42c6"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"b77b4aa09729b6e9babfd2877f84c6b4abb0d8e4","unresolved":false,"context_lines":[{"line_number":75,"context_line":"There are four aspects to supporting NUMA live migration. First, the instance\u0027s"},{"line_number":76,"context_line":"NUMA characteristics need to be recalculated to fit on the new host. Second,"},{"line_number":77,"context_line":"the resources that the instance will consume on the new host need to be"},{"line_number":78,"context_line":"claimed. Third, information about the instance\u0027 new NUMA characteristics needs"},{"line_number":79,"context_line":"to be generated on the destination (an `InstanceNUMATopolgy` object is not"},{"line_number":80,"context_line":"enough, more on that later). Finally, this information needs to be sent from"},{"line_number":81,"context_line":"the destination to the source, in order for the source to generate the correct"}],"source_content_type":"text/x-rst","patch_set":4,"id":"3f79a3b5_c14ed16c","line":78,"updated":"2018-10-03 22:54:32.000000000","message":"missing an \"s\" after the apostrophe","commit_id":"60ca8dee3ea4f29bcb0bcc4f16076b2dbda0f78a"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"a122e3c08273893b03ad70600c3aee79e716ce5d","unresolved":false,"context_lines":[{"line_number":75,"context_line":"There are four aspects to supporting NUMA live migration. First, the instance\u0027s"},{"line_number":76,"context_line":"NUMA characteristics need to be recalculated to fit on the new host. Second,"},{"line_number":77,"context_line":"the resources that the instance will consume on the new host need to be"},{"line_number":78,"context_line":"claimed. Third, information about the instance\u0027 new NUMA characteristics needs"},{"line_number":79,"context_line":"to be generated on the destination (an `InstanceNUMATopolgy` object is not"},{"line_number":80,"context_line":"enough, more on that later). Finally, this information needs to be sent from"},{"line_number":81,"context_line":"the destination to the source, in order for the source to generate the correct"}],"source_content_type":"text/x-rst","patch_set":4,"id":"3f79a3b5_8cb6ad89","line":78,"in_reply_to":"3f79a3b5_c14ed16c","updated":"2018-10-17 18:54:20.000000000","message":"Done","commit_id":"60ca8dee3ea4f29bcb0bcc4f16076b2dbda0f78a"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"b77b4aa09729b6e9babfd2877f84c6b4abb0d8e4","unresolved":false,"context_lines":[{"line_number":88,"context_line":"resource providers in placement `[3]_`. However, placement can only track"},{"line_number":89,"context_line":"inventories and allocations of quantities of resources. It does not track which"},{"line_number":90,"context_line":"specific resources are used. This specificity is needed for NUMA live"},{"line_number":91,"context_line":"migration. For example, it is not enough to know that an instance uses two NUMA"},{"line_number":92,"context_line":"nodes, we need to know which specific NUMA nodes are used. This is something"},{"line_number":93,"context_line":"placement will never support, even if and when NUMA resource providers are"},{"line_number":94,"context_line":"implemented. Therefore, the compute resource tracker will continue to be used"},{"line_number":95,"context_line":"to claim specific resources on the destination, even in a NUMA-enabled"},{"line_number":96,"context_line":"placement future."},{"line_number":97,"context_line":""}],"source_content_type":"text/x-rst","patch_set":4,"id":"3f79a3b5_c177b19e","line":94,"range":{"start_line":91,"start_character":11,"end_line":94,"end_character":12},"updated":"2018-10-03 22:54:32.000000000","message":"This isn\u0027t a good example, since we\u0027re talking about modeling NUMA nodes in placement as resource providers and so placement *will* know which numa nodes are being used.  A better example would be dedicated CPUs.  Placement will know how many are being used on each NUMA node, but not which ones.","commit_id":"60ca8dee3ea4f29bcb0bcc4f16076b2dbda0f78a"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"bdc4a8552f07cd081447f3b0fffb6314d6b35e0b","unresolved":false,"context_lines":[{"line_number":88,"context_line":"resource providers in placement `[3]_`. However, placement can only track"},{"line_number":89,"context_line":"inventories and allocations of quantities of resources. It does not track which"},{"line_number":90,"context_line":"specific resources are used. This specificity is needed for NUMA live"},{"line_number":91,"context_line":"migration. For example, it is not enough to know that an instance uses two NUMA"},{"line_number":92,"context_line":"nodes, we need to know which specific NUMA nodes are used. This is something"},{"line_number":93,"context_line":"placement will never support, even if and when NUMA resource providers are"},{"line_number":94,"context_line":"implemented. Therefore, the compute resource tracker will continue to be used"},{"line_number":95,"context_line":"to claim specific resources on the destination, even in a NUMA-enabled"},{"line_number":96,"context_line":"placement future."},{"line_number":97,"context_line":""}],"source_content_type":"text/x-rst","patch_set":4,"id":"3f79a3b5_a8b855c6","line":94,"range":{"start_line":91,"start_character":11,"end_line":94,"end_character":12},"in_reply_to":"3f79a3b5_013e0925","updated":"2018-10-16 16:30:25.000000000","message":"To be clear, there are two different things :\n- checking resource capacities for a compute\n- saying which resource should be used for an instance\n\nFor example, with vGPUs, we only check whether the host has enough GPU capabilities by the Placement API. We don\u0027t really say *which pGPU should be used for this instance*. For that, it\u0027s done by the virt driver.\n\nHere, this is the same. We don\u0027t really look at which NUMA node to use by the Placement API.\n\nThat said, I think the above paragraph should be either modified or removed, because it doesn\u0027t really explain why we still need to claim.","commit_id":"60ca8dee3ea4f29bcb0bcc4f16076b2dbda0f78a"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"a122e3c08273893b03ad70600c3aee79e716ce5d","unresolved":false,"context_lines":[{"line_number":88,"context_line":"resource providers in placement `[3]_`. However, placement can only track"},{"line_number":89,"context_line":"inventories and allocations of quantities of resources. It does not track which"},{"line_number":90,"context_line":"specific resources are used. This specificity is needed for NUMA live"},{"line_number":91,"context_line":"migration. For example, it is not enough to know that an instance uses two NUMA"},{"line_number":92,"context_line":"nodes, we need to know which specific NUMA nodes are used. This is something"},{"line_number":93,"context_line":"placement will never support, even if and when NUMA resource providers are"},{"line_number":94,"context_line":"implemented. Therefore, the compute resource tracker will continue to be used"},{"line_number":95,"context_line":"to claim specific resources on the destination, even in a NUMA-enabled"},{"line_number":96,"context_line":"placement future."},{"line_number":97,"context_line":""}],"source_content_type":"text/x-rst","patch_set":4,"id":"3f79a3b5_af24bb1b","line":94,"range":{"start_line":91,"start_character":11,"end_line":94,"end_character":12},"in_reply_to":"3f79a3b5_1665e648","updated":"2018-10-17 18:54:20.000000000","message":"Rewrote the paragrpah to try and make it clearer, using Chris\u0027s dedicated CPUs idea as an example instead of NUMA nodes.","commit_id":"60ca8dee3ea4f29bcb0bcc4f16076b2dbda0f78a"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"d6be8b5115527e26b9608eeb493b9df3fda88b39","unresolved":false,"context_lines":[{"line_number":88,"context_line":"resource providers in placement `[3]_`. However, placement can only track"},{"line_number":89,"context_line":"inventories and allocations of quantities of resources. It does not track which"},{"line_number":90,"context_line":"specific resources are used. This specificity is needed for NUMA live"},{"line_number":91,"context_line":"migration. For example, it is not enough to know that an instance uses two NUMA"},{"line_number":92,"context_line":"nodes, we need to know which specific NUMA nodes are used. This is something"},{"line_number":93,"context_line":"placement will never support, even if and when NUMA resource providers are"},{"line_number":94,"context_line":"implemented. Therefore, the compute resource tracker will continue to be used"},{"line_number":95,"context_line":"to claim specific resources on the destination, even in a NUMA-enabled"},{"line_number":96,"context_line":"placement future."},{"line_number":97,"context_line":""}],"source_content_type":"text/x-rst","patch_set":4,"id":"3f79a3b5_1665e648","line":94,"range":{"start_line":91,"start_character":11,"end_line":94,"end_character":12},"in_reply_to":"3f79a3b5_a8b855c6","updated":"2018-10-16 17:51:47.000000000","message":"Actually, I think we *would* look at which NUMA node to use in placement, but indirectly.\n\nSince memory and CPUs (and PCI devices) are generally associated with NUMA nodes, and those NUMA nodes will be represented in placement as child resource providers, the answer to the question \"which resource providers can satisfy this request\" will result in a list of allocation candidates which will map to specific NUMA nodes.\n\nIn the case of CPUs, placement will not know at all *which* pCPUs are being used, only *how many*.  This is why I think it\u0027s better to use dedicated CPUs as an example.","commit_id":"60ca8dee3ea4f29bcb0bcc4f16076b2dbda0f78a"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"7b2c949e3ee58f39b994d27c93d7eebb115d92bf","unresolved":false,"context_lines":[{"line_number":88,"context_line":"resource providers in placement `[3]_`. However, placement can only track"},{"line_number":89,"context_line":"inventories and allocations of quantities of resources. It does not track which"},{"line_number":90,"context_line":"specific resources are used. This specificity is needed for NUMA live"},{"line_number":91,"context_line":"migration. For example, it is not enough to know that an instance uses two NUMA"},{"line_number":92,"context_line":"nodes, we need to know which specific NUMA nodes are used. This is something"},{"line_number":93,"context_line":"placement will never support, even if and when NUMA resource providers are"},{"line_number":94,"context_line":"implemented. Therefore, the compute resource tracker will continue to be used"},{"line_number":95,"context_line":"to claim specific resources on the destination, even in a NUMA-enabled"},{"line_number":96,"context_line":"placement future."},{"line_number":97,"context_line":""}],"source_content_type":"text/x-rst","patch_set":4,"id":"3f79a3b5_013e0925","line":94,"range":{"start_line":91,"start_character":11,"end_line":94,"end_character":12},"in_reply_to":"3f79a3b5_c177b19e","updated":"2018-10-03 23:35:57.000000000","message":"Coming from a place of mostly ignorance about placement, its API and its internals, how does a resource provider with uuid blah representing a NUMA node know that it\u0027s actually node 0 on the hardware? So if you\u0027ve used the inventories up 3 out of 4 NUMA nodes, you\u0027ll know that the parent compute host RP only has a single node left, but does placement know that it\u0027s node 0 (or 1, or 2, or 3)?","commit_id":"60ca8dee3ea4f29bcb0bcc4f16076b2dbda0f78a"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"b77b4aa09729b6e9babfd2877f84c6b4abb0d8e4","unresolved":false,"context_lines":[{"line_number":369,"context_line":"     - Description"},{"line_number":370,"context_line":"   * - Rocky"},{"line_number":371,"context_line":"     - Introduced"},{"line_number":372,"context_line":"   * - Rocky"},{"line_number":373,"context_line":"     - Re-proposed with modifications pertaining to claims and the exchange of"},{"line_number":374,"context_line":"       information between destination and source."}],"source_content_type":"text/x-rst","patch_set":4,"id":"3f79a3b5_2196a595","line":372,"range":{"start_line":372,"start_character":7,"end_line":372,"end_character":12},"updated":"2018-10-03 22:54:32.000000000","message":"Stein","commit_id":"60ca8dee3ea4f29bcb0bcc4f16076b2dbda0f78a"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"a122e3c08273893b03ad70600c3aee79e716ce5d","unresolved":false,"context_lines":[{"line_number":369,"context_line":"     - Description"},{"line_number":370,"context_line":"   * - Rocky"},{"line_number":371,"context_line":"     - Introduced"},{"line_number":372,"context_line":"   * - Rocky"},{"line_number":373,"context_line":"     - Re-proposed with modifications pertaining to claims and the exchange of"},{"line_number":374,"context_line":"       information between destination and source."}],"source_content_type":"text/x-rst","patch_set":4,"id":"3f79a3b5_2c075921","line":372,"range":{"start_line":372,"start_character":7,"end_line":372,"end_character":12},"in_reply_to":"3f79a3b5_08c94958","updated":"2018-10-17 18:54:20.000000000","message":"Done","commit_id":"60ca8dee3ea4f29bcb0bcc4f16076b2dbda0f78a"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"bdc4a8552f07cd081447f3b0fffb6314d6b35e0b","unresolved":false,"context_lines":[{"line_number":369,"context_line":"     - Description"},{"line_number":370,"context_line":"   * - Rocky"},{"line_number":371,"context_line":"     - Introduced"},{"line_number":372,"context_line":"   * - Rocky"},{"line_number":373,"context_line":"     - Re-proposed with modifications pertaining to claims and the exchange of"},{"line_number":374,"context_line":"       information between destination and source."}],"source_content_type":"text/x-rst","patch_set":4,"id":"3f79a3b5_08c94958","line":372,"range":{"start_line":372,"start_character":7,"end_line":372,"end_character":12},"in_reply_to":"3f79a3b5_2196a595","updated":"2018-10-16 16:30:25.000000000","message":"Yup","commit_id":"60ca8dee3ea4f29bcb0bcc4f16076b2dbda0f78a"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"09be0b1ba6e202f642a71d35bab72e5930e21639","unresolved":false,"context_lines":[{"line_number":24,"context_line":"In the following paragraphs the term NUMA is incorrectly used to signify any"},{"line_number":25,"context_line":"guest characteristic that is expressed in the `InstanceNUMATopology` object,"},{"line_number":26,"context_line":"for example CPU pinning and hugepages. CPU pinning can be achieved without a"},{"line_number":27,"context_line":"guest NUMA topology, but because no better term than NUMA is available it will"},{"line_number":28,"context_line":"continue to be used."},{"line_number":29,"context_line":""},{"line_number":30,"context_line":"The problem can best be described with three examples."},{"line_number":31,"context_line":""}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_bd2198b7","line":28,"range":{"start_line":27,"start_character":19,"end_line":28,"end_character":20},"updated":"2018-10-26 13:56:33.000000000","message":", but the two concepts are unfortunately tightly coupled in nova and instance pinning is not possible without an instance NUMA topology. For this reason, NUMA is used as a catchall term.\n\n?","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"3af5575b0c956c4149ba53fa563564ec650d4347","unresolved":false,"context_lines":[{"line_number":24,"context_line":"In the following paragraphs the term NUMA is incorrectly used to signify any"},{"line_number":25,"context_line":"guest characteristic that is expressed in the `InstanceNUMATopology` object,"},{"line_number":26,"context_line":"for example CPU pinning and hugepages. CPU pinning can be achieved without a"},{"line_number":27,"context_line":"guest NUMA topology, but because no better term than NUMA is available it will"},{"line_number":28,"context_line":"continue to be used."},{"line_number":29,"context_line":""},{"line_number":30,"context_line":"The problem can best be described with three examples."},{"line_number":31,"context_line":""}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_38cf338b","line":28,"range":{"start_line":27,"start_character":19,"end_line":28,"end_character":20},"in_reply_to":"3f79a3b5_5476b30d","updated":"2018-11-20 18:22:19.000000000","message":"I like Stephen\u0027s wording too, done.","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"eb7819759eb16b7471edd722edce5ded908e9daa","unresolved":false,"context_lines":[{"line_number":24,"context_line":"In the following paragraphs the term NUMA is incorrectly used to signify any"},{"line_number":25,"context_line":"guest characteristic that is expressed in the `InstanceNUMATopology` object,"},{"line_number":26,"context_line":"for example CPU pinning and hugepages. CPU pinning can be achieved without a"},{"line_number":27,"context_line":"guest NUMA topology, but because no better term than NUMA is available it will"},{"line_number":28,"context_line":"continue to be used."},{"line_number":29,"context_line":""},{"line_number":30,"context_line":"The problem can best be described with three examples."},{"line_number":31,"context_line":""}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_5476b30d","line":28,"range":{"start_line":27,"start_character":19,"end_line":28,"end_character":20},"in_reply_to":"3f79a3b5_bd2198b7","updated":"2018-11-15 23:18:51.000000000","message":"i prefer stephens phrasing but i still dislike the conflation of numa and pinning but not enough to -1","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"09be0b1ba6e202f642a71d35bab72e5930e21639","unresolved":false,"context_lines":[{"line_number":30,"context_line":"The problem can best be described with three examples."},{"line_number":31,"context_line":""},{"line_number":32,"context_line":"The first example is live migration with CPU pinning. An instance with a"},{"line_number":33,"context_line":"``dedicated`` CPU policy and pinned CPUs is live-migrated.  Its pin mappings"},{"line_number":34,"context_line":"are naively copied over to the destination host. This creates two problems."},{"line_number":35,"context_line":"First, its pinned pCPUs aren\u0027t properly claimed on the destination. This means"},{"line_number":36,"context_line":"that, should a second instance with pinned CPUs land on the destination, both"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_3d17c856","line":33,"range":{"start_line":33,"start_character":2,"end_line":33,"end_character":11},"updated":"2018-10-26 13:56:33.000000000","message":"the ``hw:cpu_pinning\u003ddedicated`` extra spec","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"eb7819759eb16b7471edd722edce5ded908e9daa","unresolved":false,"context_lines":[{"line_number":30,"context_line":"The problem can best be described with three examples."},{"line_number":31,"context_line":""},{"line_number":32,"context_line":"The first example is live migration with CPU pinning. An instance with a"},{"line_number":33,"context_line":"``dedicated`` CPU policy and pinned CPUs is live-migrated.  Its pin mappings"},{"line_number":34,"context_line":"are naively copied over to the destination host. This creates two problems."},{"line_number":35,"context_line":"First, its pinned pCPUs aren\u0027t properly claimed on the destination. This means"},{"line_number":36,"context_line":"that, should a second instance with pinned CPUs land on the destination, both"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_14803bbe","line":33,"range":{"start_line":33,"start_character":2,"end_line":33,"end_character":11},"in_reply_to":"3f79a3b5_3d17c856","updated":"2018-11-15 23:18:51.000000000","message":"+1","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"3af5575b0c956c4149ba53fa563564ec650d4347","unresolved":false,"context_lines":[{"line_number":30,"context_line":"The problem can best be described with three examples."},{"line_number":31,"context_line":""},{"line_number":32,"context_line":"The first example is live migration with CPU pinning. An instance with a"},{"line_number":33,"context_line":"``dedicated`` CPU policy and pinned CPUs is live-migrated.  Its pin mappings"},{"line_number":34,"context_line":"are naively copied over to the destination host. This creates two problems."},{"line_number":35,"context_line":"First, its pinned pCPUs aren\u0027t properly claimed on the destination. This means"},{"line_number":36,"context_line":"that, should a second instance with pinned CPUs land on the destination, both"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_f8d8db51","line":33,"range":{"start_line":33,"start_character":2,"end_line":33,"end_character":11},"in_reply_to":"3f79a3b5_3d17c856","updated":"2018-11-20 18:22:19.000000000","message":"Done, but you meant hw:cpu_policy, right?","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"09be0b1ba6e202f642a71d35bab72e5930e21639","unresolved":false,"context_lines":[{"line_number":38,"context_line":"mappings on the destination are ignored. If another instance already exists on"},{"line_number":39,"context_line":"the destination, both instances\u0027s vCPUs could be pinned to the same pCPUs. In"},{"line_number":40,"context_line":"both cases, the ``dedicated`` CPU policy is violated, potentially leading to"},{"line_number":41,"context_line":"unpredictable performance degradation."},{"line_number":42,"context_line":""},{"line_number":43,"context_line":"The second example is instances with hugepages. There are two hosts, each with"},{"line_number":44,"context_line":"two NUMA nodes and 8 1GB hugepages per node. Two identical instances are booted"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_dd057416","line":41,"updated":"2018-10-26 13:56:33.000000000","message":"You can also have instances being split across host NUMA nodes if the source and destination use different hardware.\n\n[Later] I see you covered this in example 3. Noice.","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"09be0b1ba6e202f642a71d35bab72e5930e21639","unresolved":false,"context_lines":[{"line_number":60,"context_line":"---------"},{"line_number":61,"context_line":""},{"line_number":62,"context_line":"As a cloud administrator, I want to live migrate instances with CPU pinning"},{"line_number":63,"context_line":"without the pin mappings overlapping on the destination compute host."},{"line_number":64,"context_line":""},{"line_number":65,"context_line":"As a cloud administrator, I want live migration of hugepage-backed instances to"},{"line_number":66,"context_line":"work and for the instances to successfully run on the destination compute host."}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_f8015e04","line":63,"range":{"start_line":63,"start_character":0,"end_line":63,"end_character":69},"updated":"2018-10-26 13:56:33.000000000","message":"i.e. properly","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"3af5575b0c956c4149ba53fa563564ec650d4347","unresolved":false,"context_lines":[{"line_number":60,"context_line":"---------"},{"line_number":61,"context_line":""},{"line_number":62,"context_line":"As a cloud administrator, I want to live migrate instances with CPU pinning"},{"line_number":63,"context_line":"without the pin mappings overlapping on the destination compute host."},{"line_number":64,"context_line":""},{"line_number":65,"context_line":"As a cloud administrator, I want live migration of hugepage-backed instances to"},{"line_number":66,"context_line":"work and for the instances to successfully run on the destination compute host."}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_58c22f56","line":63,"range":{"start_line":63,"start_character":0,"end_line":63,"end_character":69},"in_reply_to":"3f79a3b5_f8015e04","updated":"2018-11-20 18:22:19.000000000","message":"Heh, right - somehow \"I want stuff to not suck\" doesn\u0027t ring like a thorough use case to me ;)","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"09be0b1ba6e202f642a71d35bab72e5930e21639","unresolved":false,"context_lines":[{"line_number":85,"context_line":"---------------"},{"line_number":86,"context_line":""},{"line_number":87,"context_line":"Let\u0027s address the resource claims aspect first. An effort has begun to support"},{"line_number":88,"context_line":"NUMA resource providers in placement `[3]_`. However, placement can only track"},{"line_number":89,"context_line":"inventories and allocations of quantities of resources. It does not track which"},{"line_number":90,"context_line":"specific resources are used. Specificity is needed for NUMA live migration."},{"line_number":91,"context_line":"Consider an instance that uses 4 dedicated CPUs. During live migration, the"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_98294a84","line":88,"range":{"start_line":88,"start_character":37,"end_line":88,"end_character":38},"updated":"2018-10-26 13:56:33.000000000","message":"You don\u0027t need these","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"3af5575b0c956c4149ba53fa563564ec650d4347","unresolved":false,"context_lines":[{"line_number":85,"context_line":"---------------"},{"line_number":86,"context_line":""},{"line_number":87,"context_line":"Let\u0027s address the resource claims aspect first. An effort has begun to support"},{"line_number":88,"context_line":"NUMA resource providers in placement `[3]_`. However, placement can only track"},{"line_number":89,"context_line":"inventories and allocations of quantities of resources. It does not track which"},{"line_number":90,"context_line":"specific resources are used. Specificity is needed for NUMA live migration."},{"line_number":91,"context_line":"Consider an instance that uses 4 dedicated CPUs. During live migration, the"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_d8915f47","line":88,"range":{"start_line":88,"start_character":37,"end_line":88,"end_character":38},"in_reply_to":"3f79a3b5_98294a84","updated":"2018-11-20 18:22:19.000000000","message":"Done","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"09be0b1ba6e202f642a71d35bab72e5930e21639","unresolved":false,"context_lines":[{"line_number":94,"context_line":"addition to claiming quantities of CPUs in placement, we need to claim specific"},{"line_number":95,"context_line":"CPUs on the compute host. The compute resource tracker already exists for"},{"line_number":96,"context_line":"exactly this purpose, and it will continue to be used to claim specific"},{"line_number":97,"context_line":"resources on the destination, even in a NUMA-enabled placement future."},{"line_number":98,"context_line":""},{"line_number":99,"context_line":"Fitting to the new host"},{"line_number":100,"context_line":"-----------------------"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_b838c6d3","line":97,"updated":"2018-10-26 13:56:33.000000000","message":"+1","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"09be0b1ba6e202f642a71d35bab72e5930e21639","unresolved":false,"context_lines":[{"line_number":100,"context_line":"-----------------------"},{"line_number":101,"context_line":""},{"line_number":102,"context_line":"An advantage of using the resource tracker is that it forces us to use a"},{"line_number":103,"context_line":"`MoveClaim`, thus giving us the instance new NUMA topology for free"},{"line_number":104,"context_line":"(`Claim._test_numa_topology` in `nova/compute/claims.py`)."},{"line_number":105,"context_line":""},{"line_number":106,"context_line":"Generating the new NUMA information on the destination"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_384a765a","line":103,"range":{"start_line":103,"start_character":0,"end_line":103,"end_character":1},"updated":"2018-10-26 13:56:33.000000000","message":"But you need two of these, here and for the rest of the code","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"eb7819759eb16b7471edd722edce5ded908e9daa","unresolved":false,"context_lines":[{"line_number":100,"context_line":"-----------------------"},{"line_number":101,"context_line":""},{"line_number":102,"context_line":"An advantage of using the resource tracker is that it forces us to use a"},{"line_number":103,"context_line":"`MoveClaim`, thus giving us the instance new NUMA topology for free"},{"line_number":104,"context_line":"(`Claim._test_numa_topology` in `nova/compute/claims.py`)."},{"line_number":105,"context_line":""},{"line_number":106,"context_line":"Generating the new NUMA information on the destination"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_74dc0fe2","line":103,"range":{"start_line":103,"start_character":0,"end_line":103,"end_character":1},"in_reply_to":"3f79a3b5_384a765a","updated":"2018-11-15 23:18:51.000000000","message":"am im not sure this is an advanatage and im not sure we actully want to use a move claims.\n\nwe should  be doubling up the allocation to make sure we can revert the migration correctly on error.","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"3af5575b0c956c4149ba53fa563564ec650d4347","unresolved":false,"context_lines":[{"line_number":100,"context_line":"-----------------------"},{"line_number":101,"context_line":""},{"line_number":102,"context_line":"An advantage of using the resource tracker is that it forces us to use a"},{"line_number":103,"context_line":"`MoveClaim`, thus giving us the instance new NUMA topology for free"},{"line_number":104,"context_line":"(`Claim._test_numa_topology` in `nova/compute/claims.py`)."},{"line_number":105,"context_line":""},{"line_number":106,"context_line":"Generating the new NUMA information on the destination"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_18c037f9","line":103,"range":{"start_line":103,"start_character":0,"end_line":103,"end_character":1},"in_reply_to":"3f79a3b5_74dc0fe2","updated":"2018-11-20 18:22:19.000000000","message":"\u003e am im not sure this is an advanatage and im not sure we actully\n \u003e want to use a move claims.\n \u003e \n \u003e we should  be doubling up the allocation to make sure we can revert\n \u003e the migration correctly on error.\n\nAllocations... in placement? It\u0027s not ready yet. And I thought we can\u0027t really use the resource tracker without claims and a migration context, unless we want to refactor it a whole bunch.","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"eb7819759eb16b7471edd722edce5ded908e9daa","unresolved":false,"context_lines":[{"line_number":122,"context_line":"element or attribute, talking to an older libvirt driver, which still supports"},{"line_number":123,"context_line":"it."},{"line_number":124,"context_line":""},{"line_number":125,"context_line":"Because of this, and sticking to the existing OpenStack best practice of"},{"line_number":126,"context_line":"sending oslo versionedobjects over the wire, this spec proposes to replicate"},{"line_number":127,"context_line":"the relevant NUMA-related `LibvirtConfigObject` as Nova objects. Contructed on"},{"line_number":128,"context_line":"the destination from the results of `_get_guest_numa_config`, the source would"},{"line_number":129,"context_line":"use the information they contain to update its instance XML in order for the"},{"line_number":130,"context_line":"instance to run on the destination host."}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_67146343","line":127,"range":{"start_line":125,"start_character":0,"end_line":127,"end_character":63},"updated":"2018-11-15 23:18:51.000000000","message":"directly converting the libvirt config objects to OVOs i really not any better then passing the raw xml snipits over the wire and add lots of complexity.\n\ni agree we should use OVOs but they should not be coupled\nthem with the libvirt objects, we should use create a higher level abstraction and pass back the numa mappings.\n\nsee example later on line 247.","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"3af5575b0c956c4149ba53fa563564ec650d4347","unresolved":false,"context_lines":[{"line_number":122,"context_line":"element or attribute, talking to an older libvirt driver, which still supports"},{"line_number":123,"context_line":"it."},{"line_number":124,"context_line":""},{"line_number":125,"context_line":"Because of this, and sticking to the existing OpenStack best practice of"},{"line_number":126,"context_line":"sending oslo versionedobjects over the wire, this spec proposes to replicate"},{"line_number":127,"context_line":"the relevant NUMA-related `LibvirtConfigObject` as Nova objects. Contructed on"},{"line_number":128,"context_line":"the destination from the results of `_get_guest_numa_config`, the source would"},{"line_number":129,"context_line":"use the information they contain to update its instance XML in order for the"},{"line_number":130,"context_line":"instance to run on the destination host."}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_1b2111a2","line":127,"range":{"start_line":125,"start_character":0,"end_line":127,"end_character":63},"in_reply_to":"3f79a3b5_67146343","updated":"2018-11-20 18:22:19.000000000","message":"\u003e directly converting the libvirt config objects to OVOs i really not\n \u003e any better then passing the raw xml snipits over the wire and add\n \u003e lots of complexity.\n\nA tiny bit better, since it\u0027d be versioned and schema\u0027d, so to speak.\n\n\n \u003e i agree we should use OVOs but they should not be coupled\n \u003e them with the libvirt objects, we should use create a higher level\n \u003e abstraction and pass back the numa mappings.\n \u003e \n \u003e see example later on line 247.\n\nWell, this is libvirt talking to libvirt, so it\u0027s not necessarily bad to couple it. I have no idea how hyperv handles this sort of stuff, so I\u0027d rather not try to come up with an overly generic abstraction only for it to bite us in the ass later. Also, I think your example on L247 is missing the hugepages backing nodeset stuff.\n\nCould we maybe hash this out in the implementation? I guess I agree that straight up copying the LibvirtCOnfig objects into OVOs is bad, but some amount of coupling to libvirt\u0027s domain XML is inevitable. I\u0027ve amended the spec to reflect this suggestion.","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"09be0b1ba6e202f642a71d35bab72e5930e21639","unresolved":false,"context_lines":[{"line_number":135,"context_line":"At the cell conductor (live migration isn\u0027t supported between cells, so the"},{"line_number":136,"context_line":"superconductor is not involved) and compute level, the relevant parts of the"},{"line_number":137,"context_line":"current live migration flow can be summarized by the following oversimplified"},{"line_number":138,"context_line":"pseudo sequence diagram.::"},{"line_number":139,"context_line":""},{"line_number":140,"context_line":"    +-----------+                           +---------+                        +-------------+ +---------+"},{"line_number":141,"context_line":"    | Conductor |                           | Source  |                        | Destination | | Driver  |"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_78430e41","line":138,"updated":"2018-10-26 13:56:33.000000000","message":"Protip: http://blockdiag.com/en/actdiag/index.html","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"3af5575b0c956c4149ba53fa563564ec650d4347","unresolved":false,"context_lines":[{"line_number":135,"context_line":"At the cell conductor (live migration isn\u0027t supported between cells, so the"},{"line_number":136,"context_line":"superconductor is not involved) and compute level, the relevant parts of the"},{"line_number":137,"context_line":"current live migration flow can be summarized by the following oversimplified"},{"line_number":138,"context_line":"pseudo sequence diagram.::"},{"line_number":139,"context_line":""},{"line_number":140,"context_line":"    +-----------+                           +---------+                        +-------------+ +---------+"},{"line_number":141,"context_line":"    | Conductor |                           | Source  |                        | Destination | | Driver  |"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_468a5065","line":138,"in_reply_to":"3f79a3b5_78430e41","updated":"2018-11-20 18:22:19.000000000","message":"Interesting, is there a way to have method calls the way I made them here? Their samples and interactive demo shell only show a sort of flow chart-like thing.","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"eb7819759eb16b7471edd722edce5ded908e9daa","unresolved":false,"context_lines":[{"line_number":166,"context_line":"          |                                      |-------------------------------------------------\u003e|"},{"line_number":167,"context_line":"          |                                      |                                    |             |"},{"line_number":168,"context_line":""},{"line_number":169,"context_line":"In the proposed new flow, the destination compute manager asks the libvirt"},{"line_number":170,"context_line":"driver to calculate the new `LibvirtGuestConfig` objects using the new instance"},{"line_number":171,"context_line":"NUMA topology obtained from the move claim. The compute manager converts those"},{"line_number":172,"context_line":"`LibvirtGuestConfig` objecs to the new NUMA Nova objects, and adds them as"},{"line_number":173,"context_line":"fields to the `LibvirtLiveMigrateData` `migrate_data` object. The latter"},{"line_number":174,"context_line":"eventually reaches the source libvirt driver, which uses it to generate the new"},{"line_number":175,"context_line":"XML. The proposed flow is summarised in the following diagram.::"},{"line_number":176,"context_line":""},{"line_number":177,"context_line":"    +-----------+                                             +---------+                       +-------------+                                          +---------+"},{"line_number":178,"context_line":"    | Conductor |                                             | Source  |                       | Destination |                                          | Driver  |"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_5460f30c","line":175,"range":{"start_line":169,"start_character":0,"end_line":175,"end_character":64},"updated":"2018-11-15 23:18:51.000000000","message":"im not sure we need to fully generate a libviftGuestConfig object we just need to generate the list of vCPU to pCPU mappings and the numam cell mappings between the guest virtual numa nodes and the host phyical numa nodes.","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"3af5575b0c956c4149ba53fa563564ec650d4347","unresolved":false,"context_lines":[{"line_number":166,"context_line":"          |                                      |-------------------------------------------------\u003e|"},{"line_number":167,"context_line":"          |                                      |                                    |             |"},{"line_number":168,"context_line":""},{"line_number":169,"context_line":"In the proposed new flow, the destination compute manager asks the libvirt"},{"line_number":170,"context_line":"driver to calculate the new `LibvirtGuestConfig` objects using the new instance"},{"line_number":171,"context_line":"NUMA topology obtained from the move claim. The compute manager converts those"},{"line_number":172,"context_line":"`LibvirtGuestConfig` objecs to the new NUMA Nova objects, and adds them as"},{"line_number":173,"context_line":"fields to the `LibvirtLiveMigrateData` `migrate_data` object. The latter"},{"line_number":174,"context_line":"eventually reaches the source libvirt driver, which uses it to generate the new"},{"line_number":175,"context_line":"XML. The proposed flow is summarised in the following diagram.::"},{"line_number":176,"context_line":""},{"line_number":177,"context_line":"    +-----------+                                             +---------+                       +-------------+                                          +---------+"},{"line_number":178,"context_line":"    | Conductor |                                             | Source  |                       | Destination |                                          | Driver  |"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_265cf4ed","line":175,"range":{"start_line":169,"start_character":0,"end_line":175,"end_character":64},"in_reply_to":"3f79a3b5_5460f30c","updated":"2018-11-20 18:22:19.000000000","message":"It\u0027s just easier because the code already exists, we can then grab what we need from that config objects.","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"41a3083fdddf3ac90959b53309dcdc3290fd6dce","unresolved":false,"context_lines":[{"line_number":226,"context_line":""},{"line_number":227,"context_line":"Using move claims and the new instance NUMA topology calculated within"},{"line_number":228,"context_line":"essentially dictates the rest of the implementation. For example, it would be"},{"line_number":229,"context_line":"possible to reuse the result of `numa_fit_instance_to_host` as called from the"},{"line_number":230,"context_line":"scheduler before the live migration reaches the conductor. However, the claim"},{"line_number":231,"context_line":"would still calculate its own new instance NUMA topology."},{"line_number":232,"context_line":""},{"line_number":233,"context_line":"Data model impact"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_a3f9cb82","line":230,"range":{"start_line":229,"start_character":32,"end_line":230,"end_character":57},"updated":"2018-11-05 20:52:39.000000000","message":"it occurs to me that the scheduler doesn\u0027t actually need to know about the individual resources, just whether there are enough of them.  We may be able to reduce the amount of throwaway work being done in the scheduler.\n\nThat\u0027s a future work item though, unrelated to this change.","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"eb7819759eb16b7471edd722edce5ded908e9daa","unresolved":false,"context_lines":[{"line_number":226,"context_line":""},{"line_number":227,"context_line":"Using move claims and the new instance NUMA topology calculated within"},{"line_number":228,"context_line":"essentially dictates the rest of the implementation. For example, it would be"},{"line_number":229,"context_line":"possible to reuse the result of `numa_fit_instance_to_host` as called from the"},{"line_number":230,"context_line":"scheduler before the live migration reaches the conductor. However, the claim"},{"line_number":231,"context_line":"would still calculate its own new instance NUMA topology."},{"line_number":232,"context_line":""},{"line_number":233,"context_line":"Data model impact"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_d46ba3eb","line":230,"range":{"start_line":229,"start_character":32,"end_line":230,"end_character":57},"in_reply_to":"3f79a3b5_a3f9cb82","updated":"2018-11-15 23:18:51.000000000","message":"yes and no. \n\nthe cpu thread policies will not be caluated by placement so the numa toplogy filter will still need to validate that which effectivly means we need to calualate the exeact pinning so what we really should be doing eventrully is have the schduler store the values it calulates for each host and then we should pass that to the compute node so that it does not need to recalulate it.\n\nbut as you said that is a future work item.","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"3af5575b0c956c4149ba53fa563564ec650d4347","unresolved":false,"context_lines":[{"line_number":226,"context_line":""},{"line_number":227,"context_line":"Using move claims and the new instance NUMA topology calculated within"},{"line_number":228,"context_line":"essentially dictates the rest of the implementation. For example, it would be"},{"line_number":229,"context_line":"possible to reuse the result of `numa_fit_instance_to_host` as called from the"},{"line_number":230,"context_line":"scheduler before the live migration reaches the conductor. However, the claim"},{"line_number":231,"context_line":"would still calculate its own new instance NUMA topology."},{"line_number":232,"context_line":""},{"line_number":233,"context_line":"Data model impact"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_86f2a89b","line":230,"range":{"start_line":229,"start_character":32,"end_line":230,"end_character":57},"in_reply_to":"3f79a3b5_d46ba3eb","updated":"2018-11-20 18:22:19.000000000","message":"That\u0027s a lot of work, yeah. As this spec says in \u0027Generating the new NUMA information on the destination\u0027, we\u0027d have to provide the scheduler with a lot more information about the compute hosts - NUMA topology, but also current utilization by other instances.","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"eb7819759eb16b7471edd722edce5ded908e9daa","unresolved":false,"context_lines":[{"line_number":237,"context_line":"(not necessarily directly, nested objects are included here) to"},{"line_number":238,"context_line":"`LibvirtLiveMigrateData`. This is not necessarily an exhaustive list."},{"line_number":239,"context_line":""},{"line_number":240,"context_line":"* LibvirtConfigGuestCPUTune"},{"line_number":241,"context_line":"* LibvirtConfigGuestCPUTuneEmulatorPin"},{"line_number":242,"context_line":"* LibvirtConfigGuestCPUTuneVCPUPin"},{"line_number":243,"context_line":"* LibvirtConfigGuestCPUNUMA"},{"line_number":244,"context_line":"* LibvirtConfigGuestCPUNUMACell"},{"line_number":245,"context_line":"* LibvirtConfigGuestNUMATune"},{"line_number":246,"context_line":"* LibvirtConfigGuestNUMATuneMemory"},{"line_number":247,"context_line":"* LibvirtConfigGuestNUMATuneMemNode"},{"line_number":248,"context_line":""},{"line_number":249,"context_line":"REST API impact"},{"line_number":250,"context_line":"---------------"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_34121766","line":247,"range":{"start_line":240,"start_character":0,"end_line":247,"end_character":35},"updated":"2018-11-15 23:18:51.000000000","message":"we should not make the new types libvirt specific\n\nwe basicall need 3 new objects total\n\nCpuMapping: { vCPU: X, pCPU: Y}\nCellMapping: { vCell: X, pCell : Y}\n\nNumaTopology: {\ncpu_mappings:[{vCPU:0, pCPU:0},{vCPU:1, pCPU:10}...]\nemulator_mappings:[{vCPU:0, pCPU:0},{vCPU:1, pCPU:10}...]\ncell_mappings: [{ vCell: 0, pCell : 0}, { vCell: 1, pCell : 2} ...]\n}\n\n\nwe do not need any libvirt specific objects as this object should be reusable for other virt drivers.\n\nall filed in the NumaTopology object can be empty lists if the subfiled is not required.\n\ne.g. a vm with out a dedicated emulator tread would not have emulator_mappings set\n\nsimilarly on hyperv it would not have cpu or emulator mappings.","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"3af5575b0c956c4149ba53fa563564ec650d4347","unresolved":false,"context_lines":[{"line_number":237,"context_line":"(not necessarily directly, nested objects are included here) to"},{"line_number":238,"context_line":"`LibvirtLiveMigrateData`. This is not necessarily an exhaustive list."},{"line_number":239,"context_line":""},{"line_number":240,"context_line":"* LibvirtConfigGuestCPUTune"},{"line_number":241,"context_line":"* LibvirtConfigGuestCPUTuneEmulatorPin"},{"line_number":242,"context_line":"* LibvirtConfigGuestCPUTuneVCPUPin"},{"line_number":243,"context_line":"* LibvirtConfigGuestCPUNUMA"},{"line_number":244,"context_line":"* LibvirtConfigGuestCPUNUMACell"},{"line_number":245,"context_line":"* LibvirtConfigGuestNUMATune"},{"line_number":246,"context_line":"* LibvirtConfigGuestNUMATuneMemory"},{"line_number":247,"context_line":"* LibvirtConfigGuestNUMATuneMemNode"},{"line_number":248,"context_line":""},{"line_number":249,"context_line":"REST API impact"},{"line_number":250,"context_line":"---------------"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_464810b3","line":247,"range":{"start_line":240,"start_character":0,"end_line":247,"end_character":35},"in_reply_to":"3f79a3b5_34121766","updated":"2018-11-20 18:22:19.000000000","message":"Done, though I chickened out of fleshing out the details. And I added hugepage nodeset mapping.","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"09be0b1ba6e202f642a71d35bab72e5930e21639","unresolved":false,"context_lines":[{"line_number":294,"context_line":"old-style live migration."},{"line_number":295,"context_line":""},{"line_number":296,"context_line":"If the source is older than the destination, versionedobject\u0027s compatibility"},{"line_number":297,"context_line":"code strits `migrate_data` of the new NUMA fields and the source performs an"},{"line_number":298,"context_line":"old-style live migration without generating new XML. However, the destination"},{"line_number":299,"context_line":"has already claimed NUMA resources that the source does generate instance XML"},{"line_number":300,"context_line":"for."}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_b86686ed","line":297,"range":{"start_line":297,"start_character":5,"end_line":297,"end_character":11},"updated":"2018-10-26 13:56:33.000000000","message":"strips?","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"3af5575b0c956c4149ba53fa563564ec650d4347","unresolved":false,"context_lines":[{"line_number":294,"context_line":"old-style live migration."},{"line_number":295,"context_line":""},{"line_number":296,"context_line":"If the source is older than the destination, versionedobject\u0027s compatibility"},{"line_number":297,"context_line":"code strits `migrate_data` of the new NUMA fields and the source performs an"},{"line_number":298,"context_line":"old-style live migration without generating new XML. However, the destination"},{"line_number":299,"context_line":"has already claimed NUMA resources that the source does generate instance XML"},{"line_number":300,"context_line":"for."}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_06c0d8f8","line":297,"range":{"start_line":297,"start_character":5,"end_line":297,"end_character":11},"in_reply_to":"3f79a3b5_b86686ed","updated":"2018-11-20 18:22:19.000000000","message":"Done","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"eb7819759eb16b7471edd722edce5ded908e9daa","unresolved":false,"context_lines":[{"line_number":294,"context_line":"old-style live migration."},{"line_number":295,"context_line":""},{"line_number":296,"context_line":"If the source is older than the destination, versionedobject\u0027s compatibility"},{"line_number":297,"context_line":"code strits `migrate_data` of the new NUMA fields and the source performs an"},{"line_number":298,"context_line":"old-style live migration without generating new XML. However, the destination"},{"line_number":299,"context_line":"has already claimed NUMA resources that the source does generate instance XML"},{"line_number":300,"context_line":"for."}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_2720ab9d","line":297,"range":{"start_line":297,"start_character":5,"end_line":297,"end_character":11},"in_reply_to":"3f79a3b5_b86686ed","updated":"2018-11-15 23:18:51.000000000","message":"yes","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"09be0b1ba6e202f642a71d35bab72e5930e21639","unresolved":false,"context_lines":[{"line_number":302,"context_line":"While the first case is acceptable, the second case isn\u0027t. For this reason,"},{"line_number":303,"context_line":"this spec proposes to refuse to perform a NUMA live migration unless both"},{"line_number":304,"context_line":"source and destination compute hosts have been upgraded to a version that"},{"line_number":305,"context_line":"supports it. To achieve this, the conductor can check the source and"},{"line_number":306,"context_line":"destination compute\u0027s service version and fail the migration if either one is"},{"line_number":307,"context_line":"too old."},{"line_number":308,"context_line":""},{"line_number":309,"context_line":"Implementation"},{"line_number":310,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_18a1baf6","line":307,"range":{"start_line":305,"start_character":13,"end_line":307,"end_character":8},"updated":"2018-10-26 13:56:33.000000000","message":"We always require the conductor be upgraded before the compute nodes, right? If not, do we want to do this the other way round too?","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"41a3083fdddf3ac90959b53309dcdc3290fd6dce","unresolved":false,"context_lines":[{"line_number":302,"context_line":"While the first case is acceptable, the second case isn\u0027t. For this reason,"},{"line_number":303,"context_line":"this spec proposes to refuse to perform a NUMA live migration unless both"},{"line_number":304,"context_line":"source and destination compute hosts have been upgraded to a version that"},{"line_number":305,"context_line":"supports it. To achieve this, the conductor can check the source and"},{"line_number":306,"context_line":"destination compute\u0027s service version and fail the migration if either one is"},{"line_number":307,"context_line":"too old."},{"line_number":308,"context_line":""},{"line_number":309,"context_line":"Implementation"},{"line_number":310,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_4316b7a3","line":307,"range":{"start_line":305,"start_character":13,"end_line":307,"end_character":8},"in_reply_to":"3f79a3b5_18a1baf6","updated":"2018-11-05 20:52:39.000000000","message":"I believe we do require the conductor to be upgraded first.","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"3af5575b0c956c4149ba53fa563564ec650d4347","unresolved":false,"context_lines":[{"line_number":302,"context_line":"While the first case is acceptable, the second case isn\u0027t. For this reason,"},{"line_number":303,"context_line":"this spec proposes to refuse to perform a NUMA live migration unless both"},{"line_number":304,"context_line":"source and destination compute hosts have been upgraded to a version that"},{"line_number":305,"context_line":"supports it. To achieve this, the conductor can check the source and"},{"line_number":306,"context_line":"destination compute\u0027s service version and fail the migration if either one is"},{"line_number":307,"context_line":"too old."},{"line_number":308,"context_line":""},{"line_number":309,"context_line":"Implementation"},{"line_number":310,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_a6f7a45b","line":307,"range":{"start_line":305,"start_character":13,"end_line":307,"end_character":8},"in_reply_to":"3f79a3b5_4316b7a3","updated":"2018-11-20 18:22:19.000000000","message":"Yeah, conductor has to be upgraded first, because that\u0027s where the indirection happens.","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"09be0b1ba6e202f642a71d35bab72e5930e21639","unresolved":false,"context_lines":[{"line_number":334,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":335,"context_line":""},{"line_number":336,"context_line":"The libvirt/qemu driver used in the gate does not currently support NUMA"},{"line_number":337,"context_line":"features (though work is in progress `[4]`_). Therefore, testing NUMA aware"},{"line_number":338,"context_line":"live migration in the upstream gate would require nested virt. In addition, the"},{"line_number":339,"context_line":"only assertable outcome of a NUMA live migration test (if it ever becomes"},{"line_number":340,"context_line":"possible) would be that the live migration succeeded. Examining the instance"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_78b1ce44","line":337,"range":{"start_line":337,"start_character":37,"end_line":337,"end_character":43},"updated":"2018-10-26 13:56:33.000000000","message":"nit: You don\u0027t need this. Do\n\n  [4]_\n\n  .. [4] https://example.com/","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"3af5575b0c956c4149ba53fa563564ec650d4347","unresolved":false,"context_lines":[{"line_number":334,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":335,"context_line":""},{"line_number":336,"context_line":"The libvirt/qemu driver used in the gate does not currently support NUMA"},{"line_number":337,"context_line":"features (though work is in progress `[4]`_). Therefore, testing NUMA aware"},{"line_number":338,"context_line":"live migration in the upstream gate would require nested virt. In addition, the"},{"line_number":339,"context_line":"only assertable outcome of a NUMA live migration test (if it ever becomes"},{"line_number":340,"context_line":"possible) would be that the live migration succeeded. Examining the instance"}],"source_content_type":"text/x-rst","patch_set":5,"id":"3f79a3b5_269154f7","line":337,"range":{"start_line":337,"start_character":37,"end_line":337,"end_character":43},"in_reply_to":"3f79a3b5_78b1ce44","updated":"2018-11-20 18:22:19.000000000","message":"Done","commit_id":"a55df31741bca55777fb1e25b673c1fb7e3f1de2"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"f55bb071297c9efc0462e668db25394e87764b96","unresolved":false,"context_lines":[{"line_number":89,"context_line":"NUMA resource providers in placement [3]_. However, placement can only track"},{"line_number":90,"context_line":"inventories and allocations of quantities of resources. It does not track which"},{"line_number":91,"context_line":"specific resources are used. Specificity is needed for NUMA live migration."},{"line_number":92,"context_line":"Consider an instance that uses 4 dedicated CPUs. During live migration, the"},{"line_number":93,"context_line":"scheduler claims those 4 CPUs in placement on the destination. However, we need"},{"line_number":94,"context_line":"to prevent other instances from using those specific CPUs. Therefore, in"},{"line_number":95,"context_line":"addition to claiming quantities of CPUs in placement, we need to claim specific"},{"line_number":96,"context_line":"CPUs on the compute host. The compute resource tracker already exists for"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_9a18a21f","line":93,"range":{"start_line":92,"start_character":49,"end_line":93,"end_character":62},"updated":"2018-11-21 21:38:27.000000000","message":"But we don\u0027t model dedicated CPUs nor claim them in placement today. I believe that\u0027s Jay\u0027s spec:\n\nhttps://review.openstack.org/#/c/555081/\n\nOr is this sentence talking about something that will happen once we model dedicated CPUs in placement and thus have the ability to allocate resources against them in the scheduler?\n\nI guess the tl;dr here is \"we can\u0027t do anything fancy with placement yet in the scheduler, so we\u0027ll hump things the old way with the resource tracker and claims like we do for cold migration\".","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"5706e15af5e1f65264cee6c49a235c441838c656","unresolved":false,"context_lines":[{"line_number":89,"context_line":"NUMA resource providers in placement [3]_. However, placement can only track"},{"line_number":90,"context_line":"inventories and allocations of quantities of resources. It does not track which"},{"line_number":91,"context_line":"specific resources are used. Specificity is needed for NUMA live migration."},{"line_number":92,"context_line":"Consider an instance that uses 4 dedicated CPUs. During live migration, the"},{"line_number":93,"context_line":"scheduler claims those 4 CPUs in placement on the destination. However, we need"},{"line_number":94,"context_line":"to prevent other instances from using those specific CPUs. Therefore, in"},{"line_number":95,"context_line":"addition to claiming quantities of CPUs in placement, we need to claim specific"},{"line_number":96,"context_line":"CPUs on the compute host. The compute resource tracker already exists for"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_b61ecb72","line":93,"range":{"start_line":92,"start_character":49,"end_line":93,"end_character":62},"in_reply_to":"3f79a3b5_9a18a21f","updated":"2018-11-23 00:56:58.000000000","message":"Yeah, this would be in a future where Jay\u0027s spec has been implemented. Clarified in the spec.","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"f55bb071297c9efc0462e668db25394e87764b96","unresolved":false,"context_lines":[{"line_number":134,"context_line":"Sending the new NUMA Nova objects"},{"line_number":135,"context_line":"---------------------------------"},{"line_number":136,"context_line":""},{"line_number":137,"context_line":"At the cell conductor (live migration isn\u0027t supported between cells, so the"},{"line_number":138,"context_line":"superconductor is not involved) and compute level, the relevant parts of the"},{"line_number":139,"context_line":"current live migration flow can be summarized by the following oversimplified"},{"line_number":140,"context_line":"pseudo sequence diagram.::"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_3abf0ecc","line":137,"range":{"start_line":137,"start_character":7,"end_line":137,"end_character":11},"updated":"2018-11-21 21:38:27.000000000","message":"Not really, see below.","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"5706e15af5e1f65264cee6c49a235c441838c656","unresolved":false,"context_lines":[{"line_number":134,"context_line":"Sending the new NUMA Nova objects"},{"line_number":135,"context_line":"---------------------------------"},{"line_number":136,"context_line":""},{"line_number":137,"context_line":"At the cell conductor (live migration isn\u0027t supported between cells, so the"},{"line_number":138,"context_line":"superconductor is not involved) and compute level, the relevant parts of the"},{"line_number":139,"context_line":"current live migration flow can be summarized by the following oversimplified"},{"line_number":140,"context_line":"pseudo sequence diagram.::"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_e236bdb2","line":137,"range":{"start_line":137,"start_character":7,"end_line":137,"end_character":11},"in_reply_to":"3f79a3b5_3abf0ecc","updated":"2018-11-23 00:56:58.000000000","message":"I\u0027m not sure which below this is referring to :(","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"d06004292211eeba512d9a6d9aba90eca18b3b0c","unresolved":false,"context_lines":[{"line_number":134,"context_line":"Sending the new NUMA Nova objects"},{"line_number":135,"context_line":"---------------------------------"},{"line_number":136,"context_line":""},{"line_number":137,"context_line":"At the cell conductor (live migration isn\u0027t supported between cells, so the"},{"line_number":138,"context_line":"superconductor is not involved) and compute level, the relevant parts of the"},{"line_number":139,"context_line":"current live migration flow can be summarized by the following oversimplified"},{"line_number":140,"context_line":"pseudo sequence diagram.::"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_06ede621","line":137,"range":{"start_line":137,"start_character":7,"end_line":137,"end_character":11},"in_reply_to":"3f79a3b5_e236bdb2","updated":"2018-11-26 18:16:38.000000000","message":"I think I meant L173. Point is, superconductor is involved since it\u0027s the thing that calls the scheduler for a destination host and does the pre-live migration check stuff and if those fails on the source/dest computes, reschedules through to the scheduler to find another dest host. The cell conductor is not involved in the orchestration at all because once superconductor finds the hosts that pass the pre-checks, it kicks off the live migration on the source and then everything is just RPC between the computes.","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"f55bb071297c9efc0462e668db25394e87764b96","unresolved":false,"context_lines":[{"line_number":136,"context_line":""},{"line_number":137,"context_line":"At the cell conductor (live migration isn\u0027t supported between cells, so the"},{"line_number":138,"context_line":"superconductor is not involved) and compute level, the relevant parts of the"},{"line_number":139,"context_line":"current live migration flow can be summarized by the following oversimplified"},{"line_number":140,"context_line":"pseudo sequence diagram.::"},{"line_number":141,"context_line":""},{"line_number":142,"context_line":"    +-----------+                           +---------+                        +-------------+ +---------+"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_5ae3ea03","line":139,"updated":"2018-11-21 21:38:27.000000000","message":"Note that we also have:\n\nhttps://docs.openstack.org/nova/latest/reference/live-migration.html","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"f55bb071297c9efc0462e668db25394e87764b96","unresolved":false,"context_lines":[{"line_number":170,"context_line":""},{"line_number":171,"context_line":"In the proposed new flow, the destination compute manager asks the libvirt"},{"line_number":172,"context_line":"driver to calculate the new ``LibvirtGuestConfig`` objects using the new"},{"line_number":173,"context_line":"instance NUMA topology obtained from the move claim. The compute manager"},{"line_number":174,"context_line":"converts those ``LibvirtGuestConfig`` objecs to the new NUMA Nova objects, and"},{"line_number":175,"context_line":"adds them as fields to the ``LibvirtLiveMigrateData`` ``migrate_data`` object."},{"line_number":176,"context_line":"The latter eventually reaches the source libvirt driver, which uses it to"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_9ac66269","line":173,"range":{"start_line":173,"start_character":41,"end_line":173,"end_character":51},"updated":"2018-11-21 21:38:27.000000000","message":"Where is the move claim going to happen? In check_can_live_migrate_destination? I would think it would have to because that\u0027s called from conductor while determining which destination host to use, and if it fails, it will result in going back to the scheduler for another host (note that at this point we\u0027re in superconductor at the top talking to the scheduler, not the cell conductor and we don\u0027t have retries/reschedules within the cell).\n\n(later)\n\nOK I see now from the diagram below, the move claim and such will be done in check_can_live_migrate_destination. I\u0027m not sure if it matters if we do it before or after RPC calling check_can_live_migrate_source on the source. I would think if we can do it at either point though, we should do it before calling check_can_live_migrate_source to save the time if the move claim is going to fail and we reschedule. Does the claim need anything from the migrate_data from the source? Or _get_guest_numa_config(new_instance_numa_topology) for that matter? Anyway, it\u0027s a minor implementation detail at this point.","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"5706e15af5e1f65264cee6c49a235c441838c656","unresolved":false,"context_lines":[{"line_number":170,"context_line":""},{"line_number":171,"context_line":"In the proposed new flow, the destination compute manager asks the libvirt"},{"line_number":172,"context_line":"driver to calculate the new ``LibvirtGuestConfig`` objects using the new"},{"line_number":173,"context_line":"instance NUMA topology obtained from the move claim. The compute manager"},{"line_number":174,"context_line":"converts those ``LibvirtGuestConfig`` objecs to the new NUMA Nova objects, and"},{"line_number":175,"context_line":"adds them as fields to the ``LibvirtLiveMigrateData`` ``migrate_data`` object."},{"line_number":176,"context_line":"The latter eventually reaches the source libvirt driver, which uses it to"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_d1f03195","line":173,"range":{"start_line":173,"start_character":41,"end_line":173,"end_character":51},"in_reply_to":"3f79a3b5_9ac66269","updated":"2018-11-23 00:56:58.000000000","message":"If you\u0027re OK with leaving this as an implementation detail, that\u0027s fine with me.","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"f55bb071297c9efc0462e668db25394e87764b96","unresolved":false,"context_lines":[{"line_number":212,"context_line":"          | live_migration(migrate_data + new NUMA Nova objects)   |                                   |                                                      |"},{"line_number":213,"context_line":"          |-------------------------------------------------------\u003e|                                   |                                                      |"},{"line_number":214,"context_line":"          |                                                        |                                   |                                                      |"},{"line_number":215,"context_line":"          |                            +-------------------------+ |                                   |                                                      |"},{"line_number":216,"context_line":"          |                            | pre_live_migration call |-|                                   |                                                      |"},{"line_number":217,"context_line":"          |                            +-------------------------+ |                                   |                                                      |"},{"line_number":218,"context_line":"          |                                                        |                                   |                                                      |"},{"line_number":219,"context_line":"          |                                                        | live_migration(migrate_data + new NUMA Nova objects)                                     |"},{"line_number":220,"context_line":"          |                                                        |-----------------------------------------------------------------------------------------\u003e|"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_1a8ed225","line":217,"range":{"start_line":215,"start_character":39,"end_line":217,"end_character":69},"updated":"2018-11-21 21:38:27.000000000","message":"This must just be shorthand for the destination running pre_live_migration? Because that\u0027s where it happens, not the source. Conductor calls source.live_migration which calls dest.pre_live_migration and returns the resulting migrate_data to the source which then calls the driver to build the guest xml and start the transfer.","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"5706e15af5e1f65264cee6c49a235c441838c656","unresolved":false,"context_lines":[{"line_number":212,"context_line":"          | live_migration(migrate_data + new NUMA Nova objects)   |                                   |                                                      |"},{"line_number":213,"context_line":"          |-------------------------------------------------------\u003e|                                   |                                                      |"},{"line_number":214,"context_line":"          |                                                        |                                   |                                                      |"},{"line_number":215,"context_line":"          |                            +-------------------------+ |                                   |                                                      |"},{"line_number":216,"context_line":"          |                            | pre_live_migration call |-|                                   |                                                      |"},{"line_number":217,"context_line":"          |                            +-------------------------+ |                                   |                                                      |"},{"line_number":218,"context_line":"          |                                                        |                                   |                                                      |"},{"line_number":219,"context_line":"          |                                                        | live_migration(migrate_data + new NUMA Nova objects)                                     |"},{"line_number":220,"context_line":"          |                                                        |-----------------------------------------------------------------------------------------\u003e|"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_b1d6d553","line":217,"range":{"start_line":215,"start_character":39,"end_line":217,"end_character":69},"in_reply_to":"3f79a3b5_1a8ed225","updated":"2018-11-23 00:56:58.000000000","message":"Yeah, this is shorthand for the source calling pre_live_migration on the dest. I didn\u0027t want to draw the full back and forth, but now that I look at it again I\u0027m not gaining much, I\u0027ll just spell it out.","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"f55bb071297c9efc0462e668db25394e87764b96","unresolved":false,"context_lines":[{"line_number":219,"context_line":"          |                                                        | live_migration(migrate_data + new NUMA Nova objects)                                     |"},{"line_number":220,"context_line":"          |                                                        |-----------------------------------------------------------------------------------------\u003e|"},{"line_number":221,"context_line":"          |                                                        |                                   |                                                      |"},{"line_number":222,"context_line":"          |                                                        |                                   |                +-----------------------------------+ |"},{"line_number":223,"context_line":"          |                                                        |                                   |                | generate NUMA XML for destination |-|"},{"line_number":224,"context_line":"          |                                                        |                                   |                +-----------------------------------+ |"},{"line_number":225,"context_line":"          |                                                        |                                   |                                                      |"},{"line_number":226,"context_line":""},{"line_number":227,"context_line":"Alternatives"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_5a9f6a4d","line":224,"range":{"start_line":222,"start_character":120,"end_line":224,"end_character":157},"updated":"2018-11-21 21:38:27.000000000","message":"I\u0027m not sure what this is....oh I guess it\u0027s the driver (on the source) building the xml using the migrate_data with the new NUMA topology from the dest. Got it.","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"f55bb071297c9efc0462e668db25394e87764b96","unresolved":false,"context_lines":[{"line_number":223,"context_line":"          |                                                        |                                   |                | generate NUMA XML for destination |-|"},{"line_number":224,"context_line":"          |                                                        |                                   |                +-----------------------------------+ |"},{"line_number":225,"context_line":"          |                                                        |                                   |                                                      |"},{"line_number":226,"context_line":""},{"line_number":227,"context_line":"Alternatives"},{"line_number":228,"context_line":"------------"},{"line_number":229,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_7ac5863d","line":226,"updated":"2018-11-21 21:38:27.000000000","message":"Something I don\u0027t see mentioned here is what happens in _post_live_migration and _rollback_live_migration? Something has to happen with the claim, right? As in, I think, the move_claim will create the migration_context for the instance and in post (success) we\u0027ll apply the migration context and in rollback (failure) we\u0027ll revert the migration context.\n\nWhat do we do about freeing up claimed resources on the source in case of a successful live migration and freeing up claimed resources on the dest in case of a failed live migration? I\u0027m not totally sure if that\u0027s the migration_context handling that or what.","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"5706e15af5e1f65264cee6c49a235c441838c656","unresolved":false,"context_lines":[{"line_number":223,"context_line":"          |                                                        |                                   |                | generate NUMA XML for destination |-|"},{"line_number":224,"context_line":"          |                                                        |                                   |                +-----------------------------------+ |"},{"line_number":225,"context_line":"          |                                                        |                                   |                                                      |"},{"line_number":226,"context_line":""},{"line_number":227,"context_line":"Alternatives"},{"line_number":228,"context_line":"------------"},{"line_number":229,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_a23a05c2","line":226,"in_reply_to":"3f79a3b5_7ac5863d","updated":"2018-11-23 00:56:58.000000000","message":"Claims are context managers, so if we put it around the right code, any unhandled exception will cause the exit() method to clean up by calling claim.abort() and then resource_tracker.drop_move_claim(). I\u0027ve added this to the spec.\n\nIf the migration completes successfully, I *think* we just have to call drop_move_claim manually. That\u0027s what _confirm_resize in the compute manager does, at any rate. I\u0027ve added this to the spec as well.","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"d06004292211eeba512d9a6d9aba90eca18b3b0c","unresolved":false,"context_lines":[{"line_number":223,"context_line":"          |                                                        |                                   |                | generate NUMA XML for destination |-|"},{"line_number":224,"context_line":"          |                                                        |                                   |                +-----------------------------------+ |"},{"line_number":225,"context_line":"          |                                                        |                                   |                                                      |"},{"line_number":226,"context_line":""},{"line_number":227,"context_line":"Alternatives"},{"line_number":228,"context_line":"------------"},{"line_number":229,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_065406c4","line":226,"in_reply_to":"3f79a3b5_a23a05c2","updated":"2018-11-26 18:16:38.000000000","message":"\u003e Claims are context managers, so if we put it around the right code, any unhandled exception will cause the exit() method to clean up by calling claim.abort() and then resource_tracker.drop_move_claim().\n\nOK, assuming we don\u0027t do some RPC cast and lose track of that. I haven\u0027t done an audit, but I think any back and forth between compute and dest during post/rollback is done using RPC calls.\n\nBut having said that, the claim is done before we even get to source.live_migration...so you\u0027ve lost the claim context manager at that point. Are you going to pass the Claim object back to conductor to pass down to source.live_migration()? Or use the instance.migration_context? I mean, this is kind of the reason we have the migration_context I think, because for cold migration the computes are RPC casting between themselves, so you can\u0027t really hold the claim context manager during that process.","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"f55bb071297c9efc0462e668db25394e87764b96","unresolved":false,"context_lines":[{"line_number":228,"context_line":"------------"},{"line_number":229,"context_line":""},{"line_number":230,"context_line":"Using move claims and the new instance NUMA topology calculated within"},{"line_number":231,"context_line":"essentially dictates the rest of the implementation. For example, it would be"},{"line_number":232,"context_line":"possible to reuse the result of ``numa_fit_instance_to_host`` as called from"},{"line_number":233,"context_line":"the scheduler before the live migration reaches the conductor. However, the"},{"line_number":234,"context_line":"claim would still calculate its own new instance NUMA topology."},{"line_number":235,"context_line":""},{"line_number":236,"context_line":"Data model impact"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_1a301206","line":233,"range":{"start_line":231,"start_character":53,"end_line":233,"end_character":62},"updated":"2018-11-21 21:38:27.000000000","message":"I\u0027m not exactly sure what this is saying. (super)conductor at the top is calling the scheduler to get a host (assuming the live migration dest host isn\u0027t forced....but that\u0027s gibi\u0027s spec to deprecate that thing, but it could still happen for anyone using openstack CLI to live migration one of these types of instances). Then conductor will do the pre-checks on the dest host and if those fails, it will reschedule, that all happens in here:\n\nhttps://github.com/openstack/nova/blob/1d444704a24a7103fb3cb73e451e4bff292f6467/nova/conductor/tasks/live_migrate.py#L359","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"5706e15af5e1f65264cee6c49a235c441838c656","unresolved":false,"context_lines":[{"line_number":228,"context_line":"------------"},{"line_number":229,"context_line":""},{"line_number":230,"context_line":"Using move claims and the new instance NUMA topology calculated within"},{"line_number":231,"context_line":"essentially dictates the rest of the implementation. For example, it would be"},{"line_number":232,"context_line":"possible to reuse the result of ``numa_fit_instance_to_host`` as called from"},{"line_number":233,"context_line":"the scheduler before the live migration reaches the conductor. However, the"},{"line_number":234,"context_line":"claim would still calculate its own new instance NUMA topology."},{"line_number":235,"context_line":""},{"line_number":236,"context_line":"Data model impact"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_4878f37b","line":233,"range":{"start_line":231,"start_character":53,"end_line":233,"end_character":62},"in_reply_to":"3f79a3b5_1a301206","updated":"2018-11-23 00:56:58.000000000","message":"Err, yeah, I messed up the ordering here. Clarified.","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"f55bb071297c9efc0462e668db25394e87764b96","unresolved":false,"context_lines":[{"line_number":275,"context_line":""},{"line_number":276,"context_line":"None."},{"line_number":277,"context_line":""},{"line_number":278,"context_line":".. _upgrade-impact:"},{"line_number":279,"context_line":""},{"line_number":280,"context_line":"Upgrade impact"},{"line_number":281,"context_line":"--------------"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_5afa0a12","line":278,"range":{"start_line":278,"start_character":4,"end_line":278,"end_character":18},"updated":"2018-11-21 21:38:27.000000000","message":"You don\u0027t really need anchors for sections, sections themselves are implied anchors and you can refer to them, e.g. \"see the `Upgrade impact`_ section for more details.\"","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"5706e15af5e1f65264cee6c49a235c441838c656","unresolved":false,"context_lines":[{"line_number":275,"context_line":""},{"line_number":276,"context_line":"None."},{"line_number":277,"context_line":""},{"line_number":278,"context_line":".. _upgrade-impact:"},{"line_number":279,"context_line":""},{"line_number":280,"context_line":"Upgrade impact"},{"line_number":281,"context_line":"--------------"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_27d7e7f6","line":278,"range":{"start_line":278,"start_character":4,"end_line":278,"end_character":18},"in_reply_to":"3f79a3b5_5afa0a12","updated":"2018-11-23 00:56:58.000000000","message":"Oh, I think this is leftover from when I linked to this section internally. Removed.","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"f55bb071297c9efc0462e668db25394e87764b96","unresolved":false,"context_lines":[{"line_number":296,"context_line":"While the first case is acceptable, the second case is not. For this reason,"},{"line_number":297,"context_line":"this spec proposes to refuse to perform a NUMA live migration unless both"},{"line_number":298,"context_line":"source and destination compute hosts have been upgraded to a version that"},{"line_number":299,"context_line":"supports it. To achieve this, the conductor can check the source and"},{"line_number":300,"context_line":"destination compute\u0027s service version and fail the migration if either one is"},{"line_number":301,"context_line":"too old."},{"line_number":302,"context_line":""},{"line_number":303,"context_line":"Implementation"},{"line_number":304,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_d5034b0a","line":301,"range":{"start_line":299,"start_character":13,"end_line":301,"end_character":8},"updated":"2018-11-21 21:38:27.000000000","message":"Thumbs up.","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"f55bb071297c9efc0462e668db25394e87764b96","unresolved":false,"context_lines":[{"line_number":309,"context_line":"Primary assignee:"},{"line_number":310,"context_line":"  notartom"},{"line_number":311,"context_line":""},{"line_number":312,"context_line":"Work Items"},{"line_number":313,"context_line":"----------"},{"line_number":314,"context_line":""},{"line_number":315,"context_line":"* Add NUMA Nova objects"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_b54c8f74","line":312,"updated":"2018-11-21 21:38:27.000000000","message":"Seems that https://review.openstack.org/#/c/611088/ should be worked in here somewhere yeah?","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"5706e15af5e1f65264cee6c49a235c441838c656","unresolved":false,"context_lines":[{"line_number":309,"context_line":"Primary assignee:"},{"line_number":310,"context_line":"  notartom"},{"line_number":311,"context_line":""},{"line_number":312,"context_line":"Work Items"},{"line_number":313,"context_line":"----------"},{"line_number":314,"context_line":""},{"line_number":315,"context_line":"* Add NUMA Nova objects"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3f79a3b5_e751ef66","line":312,"in_reply_to":"3f79a3b5_b54c8f74","updated":"2018-11-23 00:56:58.000000000","message":"Sure.","commit_id":"04c77957b199a5aa34f69654d870c4a7d440756c"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"0e726e02df88fa27f84db3dd5dfc467fe7df2321","unresolved":false,"context_lines":[{"line_number":136,"context_line":"Sending the new NUMA Nova objects"},{"line_number":137,"context_line":"---------------------------------"},{"line_number":138,"context_line":""},{"line_number":139,"context_line":"At the cell conductor (live migration isn\u0027t supported between cells, so the"},{"line_number":140,"context_line":"superconductor is not involved) and compute level, the relevant parts of the"},{"line_number":141,"context_line":"current live migration flow can be summarized by the following oversimplified"},{"line_number":142,"context_line":"pseudo sequence diagram.::"},{"line_number":143,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"3f79a3b5_e6a1caa8","line":140,"range":{"start_line":139,"start_character":0,"end_line":140,"end_character":31},"updated":"2018-11-26 18:31:19.000000000","message":"This is wrong, see reply in PS6. It\u0027s a minor detail at this point though.","commit_id":"47b1bd8745b09740913f5fd68e233825b10179a7"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"e449df7866a73b4b3e5abaeda160159d2fed1af7","unresolved":false,"context_lines":[{"line_number":136,"context_line":"Sending the new NUMA Nova objects"},{"line_number":137,"context_line":"---------------------------------"},{"line_number":138,"context_line":""},{"line_number":139,"context_line":"At the cell conductor (live migration isn\u0027t supported between cells, so the"},{"line_number":140,"context_line":"superconductor is not involved) and compute level, the relevant parts of the"},{"line_number":141,"context_line":"current live migration flow can be summarized by the following oversimplified"},{"line_number":142,"context_line":"pseudo sequence diagram.::"},{"line_number":143,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"3f79a3b5_f6ae552f","line":140,"range":{"start_line":139,"start_character":0,"end_line":140,"end_character":31},"in_reply_to":"3f79a3b5_e6a1caa8","updated":"2018-11-27 17:49:19.000000000","message":"Done.","commit_id":"47b1bd8745b09740913f5fd68e233825b10179a7"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"0e726e02df88fa27f84db3dd5dfc467fe7df2321","unresolved":false,"context_lines":[{"line_number":226,"context_line":"          |                                                        |                                   |                +-----------------------------------+ |"},{"line_number":227,"context_line":"          |                                                        |                                   |                                                      |"},{"line_number":228,"context_line":""},{"line_number":229,"context_line":"The claim object is a context manager. It therefore has an ``exit`` method that"},{"line_number":230,"context_line":"calls ``abort`` and eventually ``drop_move_claim`` if an unhandled exception"},{"line_number":231,"context_line":"is raised by any code within the context. This provides transparent claim"},{"line_number":232,"context_line":"rollback if the live migration fails. If the live migration succeeds,"},{"line_number":233,"context_line":"``drop_move_claim`` needs to be called manually, similar to how"},{"line_number":234,"context_line":"``_confirm_resize`` does it in the compute manager. Whether to do this in"},{"line_number":235,"context_line":"``post_live_migration`` on the source or ``post_live_migration_at_destination``"}],"source_content_type":"text/x-rst","patch_set":7,"id":"3f79a3b5_e68f8a1e","line":232,"range":{"start_line":229,"start_character":0,"end_line":232,"end_character":37},"updated":"2018-11-26 18:31:19.000000000","message":"See reply in PS6. This is true to a point but the context manager aspect is lost unless you intend to pass the claim back from check_can_live_migrate_destination to conductor which will then pass it down to the source service live_migration method which will use it as a context manager?","commit_id":"47b1bd8745b09740913f5fd68e233825b10179a7"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"fabafb781bebd41a1d90816461f39e539c4d7ed4","unresolved":false,"context_lines":[{"line_number":226,"context_line":"          |                                                        |                                   |                +-----------------------------------+ |"},{"line_number":227,"context_line":"          |                                                        |                                   |                                                      |"},{"line_number":228,"context_line":""},{"line_number":229,"context_line":"The claim object is a context manager. It therefore has an ``exit`` method that"},{"line_number":230,"context_line":"calls ``abort`` and eventually ``drop_move_claim`` if an unhandled exception"},{"line_number":231,"context_line":"is raised by any code within the context. This provides transparent claim"},{"line_number":232,"context_line":"rollback if the live migration fails. If the live migration succeeds,"},{"line_number":233,"context_line":"``drop_move_claim`` needs to be called manually, similar to how"},{"line_number":234,"context_line":"``_confirm_resize`` does it in the compute manager. Whether to do this in"},{"line_number":235,"context_line":"``post_live_migration`` on the source or ``post_live_migration_at_destination``"}],"source_content_type":"text/x-rst","patch_set":7,"id":"3f79a3b5_93ca7a15","line":232,"range":{"start_line":229,"start_character":0,"end_line":232,"end_character":37},"in_reply_to":"3f79a3b5_8aefa2b0","updated":"2018-11-27 19:00:00.000000000","message":"Sure.","commit_id":"47b1bd8745b09740913f5fd68e233825b10179a7"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"e449df7866a73b4b3e5abaeda160159d2fed1af7","unresolved":false,"context_lines":[{"line_number":226,"context_line":"          |                                                        |                                   |                +-----------------------------------+ |"},{"line_number":227,"context_line":"          |                                                        |                                   |                                                      |"},{"line_number":228,"context_line":""},{"line_number":229,"context_line":"The claim object is a context manager. It therefore has an ``exit`` method that"},{"line_number":230,"context_line":"calls ``abort`` and eventually ``drop_move_claim`` if an unhandled exception"},{"line_number":231,"context_line":"is raised by any code within the context. This provides transparent claim"},{"line_number":232,"context_line":"rollback if the live migration fails. If the live migration succeeds,"},{"line_number":233,"context_line":"``drop_move_claim`` needs to be called manually, similar to how"},{"line_number":234,"context_line":"``_confirm_resize`` does it in the compute manager. Whether to do this in"},{"line_number":235,"context_line":"``post_live_migration`` on the source or ``post_live_migration_at_destination``"}],"source_content_type":"text/x-rst","patch_set":7,"id":"3f79a3b5_8aefa2b0","line":232,"range":{"start_line":229,"start_character":0,"end_line":232,"end_character":37},"in_reply_to":"3f79a3b5_e68f8a1e","updated":"2018-11-27 17:49:19.000000000","message":"Ah, yeah, that\u0027s problematic. I think I\u0027ll just say we\u0027ll call drop_move_claim with the correct host manually in all the places that need it, and leave the details for the implementation. Would that fly?","commit_id":"47b1bd8745b09740913f5fd68e233825b10179a7"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"0e726e02df88fa27f84db3dd5dfc467fe7df2321","unresolved":false,"context_lines":[{"line_number":229,"context_line":"The claim object is a context manager. It therefore has an ``exit`` method that"},{"line_number":230,"context_line":"calls ``abort`` and eventually ``drop_move_claim`` if an unhandled exception"},{"line_number":231,"context_line":"is raised by any code within the context. This provides transparent claim"},{"line_number":232,"context_line":"rollback if the live migration fails. If the live migration succeeds,"},{"line_number":233,"context_line":"``drop_move_claim`` needs to be called manually, similar to how"},{"line_number":234,"context_line":"``_confirm_resize`` does it in the compute manager. Whether to do this in"},{"line_number":235,"context_line":"``post_live_migration`` on the source or ``post_live_migration_at_destination``"},{"line_number":236,"context_line":"is left as an implementation detail."},{"line_number":237,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"3f79a3b5_c61daebf","line":234,"range":{"start_line":232,"start_character":38,"end_line":234,"end_character":51},"updated":"2018-11-26 18:31:19.000000000","message":"Honestly I don\u0027t know why _confirm_resize calls drop_move_claim, since during a resize the move claim is done on the dest host during prep_resize:\n\nhttps://github.com/openstack/nova/blob/594c653dc1a312d0364ad24c703e1a9b228133e1/nova/compute/manager.py#L4182\n\nIt makes sense to me for revert_resize to call drop_move_claim since that runs on the dest machine and we want to drop the claim on resources since we\u0027re removing the resized instance from that host during the revert.\n\nI would think drop_move_claim in the live migration scenario happens on failure during rollback. Since _rollback_live_migration runs on the source service but RPC casts to rollback_live_migration_at_destination on the dest, I\u0027d think we\u0027d drop the move claim in rollback_live_migration_at_destination but note that we only call that conditionally (because it historically has to do with nova-network and backing file storage).","commit_id":"47b1bd8745b09740913f5fd68e233825b10179a7"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"fabafb781bebd41a1d90816461f39e539c4d7ed4","unresolved":false,"context_lines":[{"line_number":229,"context_line":"The claim object is a context manager. It therefore has an ``exit`` method that"},{"line_number":230,"context_line":"calls ``abort`` and eventually ``drop_move_claim`` if an unhandled exception"},{"line_number":231,"context_line":"is raised by any code within the context. This provides transparent claim"},{"line_number":232,"context_line":"rollback if the live migration fails. If the live migration succeeds,"},{"line_number":233,"context_line":"``drop_move_claim`` needs to be called manually, similar to how"},{"line_number":234,"context_line":"``_confirm_resize`` does it in the compute manager. Whether to do this in"},{"line_number":235,"context_line":"``post_live_migration`` on the source or ``post_live_migration_at_destination``"},{"line_number":236,"context_line":"is left as an implementation detail."},{"line_number":237,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"3f79a3b5_73955ee1","line":234,"range":{"start_line":232,"start_character":38,"end_line":234,"end_character":51},"in_reply_to":"3f79a3b5_2cfa4010","updated":"2018-11-27 19:00:00.000000000","message":"Yeah drop_move_claim always throws me because for cold migration, the move claim is made on the dest host in prep_resize, and then on confirm we call drop_move_claim on the source, but that\u0027s to free up usage on the source and on revert resize we call drop_move_claim() on the dest because we destroy the guest from the dest and restart it on the source. Anyway, it was confusion on my part so I think we\u0027re OK.","commit_id":"47b1bd8745b09740913f5fd68e233825b10179a7"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"e449df7866a73b4b3e5abaeda160159d2fed1af7","unresolved":false,"context_lines":[{"line_number":229,"context_line":"The claim object is a context manager. It therefore has an ``exit`` method that"},{"line_number":230,"context_line":"calls ``abort`` and eventually ``drop_move_claim`` if an unhandled exception"},{"line_number":231,"context_line":"is raised by any code within the context. This provides transparent claim"},{"line_number":232,"context_line":"rollback if the live migration fails. If the live migration succeeds,"},{"line_number":233,"context_line":"``drop_move_claim`` needs to be called manually, similar to how"},{"line_number":234,"context_line":"``_confirm_resize`` does it in the compute manager. Whether to do this in"},{"line_number":235,"context_line":"``post_live_migration`` on the source or ``post_live_migration_at_destination``"},{"line_number":236,"context_line":"is left as an implementation detail."},{"line_number":237,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"3f79a3b5_2cfa4010","line":234,"range":{"start_line":232,"start_character":38,"end_line":234,"end_character":51},"in_reply_to":"3f79a3b5_c61daebf","updated":"2018-11-27 17:49:19.000000000","message":"\u003e Honestly I don\u0027t know why _confirm_resize calls drop_move_claim,\n \u003e since during a resize the move claim is done on the dest host\n \u003e during prep_resize:\n \u003e \n \u003e https://github.com/openstack/nova/blob/594c653dc1a312d0364ad24c703e1a9b228133e1/nova/compute/manager.py#L4182\n \u003e \n \u003e It makes sense to me for revert_resize to call drop_move_claim\n \u003e since that runs on the dest machine and we want to drop the claim\n \u003e on resources since we\u0027re removing the resized instance from that\n \u003e host during the revert.\n\nI think the top comment in drop_move_claim is misleading. It says \"on the destination node\", buf if you look at the code, it eventually calls self._update() with the nodename given as an argument to drop_move_claim(). So back in _confirm_resize(), it calls drop_move_claim() with the source nodename to leave only the destination\u0027s resources used up. I proposed https://review.openstack.org/620170 to clear that up.\n\n \u003e \n \u003e I would think drop_move_claim in the live migration scenario\n \u003e happens on failure during rollback. Since _rollback_live_migration\n \u003e runs on the source service but RPC casts to rollback_live_migration_at_destination\n \u003e on the dest, I\u0027d think we\u0027d drop the move claim in\n \u003e rollback_live_migration_at_destination but note that we only call\n \u003e that conditionally (because it historically has to do with\n \u003e nova-network and backing file storage).","commit_id":"47b1bd8745b09740913f5fd68e233825b10179a7"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"e449df7866a73b4b3e5abaeda160159d2fed1af7","unresolved":false,"context_lines":[{"line_number":229,"context_line":"The claim object is a context manager. It therefore has an ``exit`` method that"},{"line_number":230,"context_line":"calls ``abort`` and eventually ``drop_move_claim`` if an unhandled exception"},{"line_number":231,"context_line":"is raised by any code within the context. This provides transparent claim"},{"line_number":232,"context_line":"rollback if the live migration fails. If the live migration succeeds,"},{"line_number":233,"context_line":"``drop_move_claim`` needs to be called manually, similar to how"},{"line_number":234,"context_line":"``_confirm_resize`` does it in the compute manager. Whether to do this in"},{"line_number":235,"context_line":"``post_live_migration`` on the source or ``post_live_migration_at_destination``"},{"line_number":236,"context_line":"is left as an implementation detail."},{"line_number":237,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"3f79a3b5_299c726e","line":234,"range":{"start_line":232,"start_character":38,"end_line":234,"end_character":51},"in_reply_to":"3f79a3b5_c61daebf","updated":"2018-11-27 17:49:19.000000000","message":"\u003e Honestly I don\u0027t know why _confirm_resize calls drop_move_claim,\n \u003e since during a resize the move claim is done on the dest host\n \u003e during prep_resize:\n \u003e \n \u003e https://github.com/openstack/nova/blob/594c653dc1a312d0364ad24c703e1a9b228133e1/nova/compute/manager.py#L4182\n \u003e \n \u003e It makes sense to me for revert_resize to call drop_move_claim\n \u003e since that runs on the dest machine and we want to drop the claim\n \u003e on resources since we\u0027re removing the resized instance from that\n \u003e host during the revert.\n \u003e \n \u003e I would think drop_move_claim in the live migration scenario\n \u003e happens on failure during rollback. Since _rollback_live_migration\n \u003e runs on the source service but RPC casts to rollback_live_migration_at_destination\n \u003e on the dest, I\u0027d think we\u0027d drop the move claim in\n \u003e rollback_live_migration_at_destination but note that we only call\n \u003e that conditionally (because it historically has to do with\n \u003e nova-network and backing file storage).","commit_id":"47b1bd8745b09740913f5fd68e233825b10179a7"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"70e874fa8e3acd2d6f6887c48fb0e4b938fed8ef","unresolved":false,"context_lines":[{"line_number":26,"context_line":"for example CPU pinning and hugepages. CPU pinning can be achieved without a"},{"line_number":27,"context_line":"guest NUMA topology, but the two concepts are unfortunately tightly coupled in"},{"line_number":28,"context_line":"Nova and instance pinning is not possible without an instance NUMA topology."},{"line_number":29,"context_line":"For this reason, NUMA is used as a catchall term."},{"line_number":30,"context_line":""},{"line_number":31,"context_line":"The problem can best be described with three examples."},{"line_number":32,"context_line":""}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_d15e98e0","line":29,"updated":"2018-11-28 16:14:50.000000000","message":"nit: maybe the above paragraph could be just a RST note (given it\u0027s not the problem)","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"c91fb82b4dbdc08e5aa6e5bbab56c6e8104fe784","unresolved":false,"context_lines":[{"line_number":26,"context_line":"for example CPU pinning and hugepages. CPU pinning can be achieved without a"},{"line_number":27,"context_line":"guest NUMA topology, but the two concepts are unfortunately tightly coupled in"},{"line_number":28,"context_line":"Nova and instance pinning is not possible without an instance NUMA topology."},{"line_number":29,"context_line":"For this reason, NUMA is used as a catchall term."},{"line_number":30,"context_line":""},{"line_number":31,"context_line":"The problem can best be described with three examples."},{"line_number":32,"context_line":""}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_ffa35afb","line":29,"in_reply_to":"3f79a3b5_d15e98e0","updated":"2018-11-29 00:37:41.000000000","message":"Sure.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"70e874fa8e3acd2d6f6887c48fb0e4b938fed8ef","unresolved":false,"context_lines":[{"line_number":31,"context_line":"The problem can best be described with three examples."},{"line_number":32,"context_line":""},{"line_number":33,"context_line":"The first example is live migration with CPU pinning. An instance with a"},{"line_number":34,"context_line":"``hw:cpu_policy\u003ddedicated`` extra spec and pinned CPUs is live-migrated.  Its"},{"line_number":35,"context_line":"pin mappings are naively copied over to the destination host. This creates two"},{"line_number":36,"context_line":"problems.  First, its pinned pCPUs aren\u0027t properly claimed on the destination."},{"line_number":37,"context_line":"This means that, should a second instance with pinned CPUs land on the"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_71b6a4fe","line":34,"range":{"start_line":34,"start_character":0,"end_line":34,"end_character":39},"updated":"2018-11-28 16:14:50.000000000","message":"nit: you should provide https://docs.openstack.org/nova/latest/user/flavors.html#extra-specs as a reference but meh.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"c91fb82b4dbdc08e5aa6e5bbab56c6e8104fe784","unresolved":false,"context_lines":[{"line_number":31,"context_line":"The problem can best be described with three examples."},{"line_number":32,"context_line":""},{"line_number":33,"context_line":"The first example is live migration with CPU pinning. An instance with a"},{"line_number":34,"context_line":"``hw:cpu_policy\u003ddedicated`` extra spec and pinned CPUs is live-migrated.  Its"},{"line_number":35,"context_line":"pin mappings are naively copied over to the destination host. This creates two"},{"line_number":36,"context_line":"problems.  First, its pinned pCPUs aren\u0027t properly claimed on the destination."},{"line_number":37,"context_line":"This means that, should a second instance with pinned CPUs land on the"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_bf82c249","line":34,"range":{"start_line":34,"start_character":0,"end_line":34,"end_character":39},"in_reply_to":"3f79a3b5_71b6a4fe","updated":"2018-11-29 00:37:41.000000000","message":"Done","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"70e874fa8e3acd2d6f6887c48fb0e4b938fed8ef","unresolved":false,"context_lines":[{"line_number":36,"context_line":"problems.  First, its pinned pCPUs aren\u0027t properly claimed on the destination."},{"line_number":37,"context_line":"This means that, should a second instance with pinned CPUs land on the"},{"line_number":38,"context_line":"destination, both instances\u0027 vCPUs could be pinned to the same pCPUs. Second,"},{"line_number":39,"context_line":"any existing pin mappings on the destination are ignored. If another instance"},{"line_number":40,"context_line":"already exists on the destination, both instances\u0027s vCPUs could be pinned to"},{"line_number":41,"context_line":"the same pCPUs. In both cases, the ``dedicated`` CPU policy is violated,"},{"line_number":42,"context_line":"potentially leading to unpredictable performance degradation."}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_911100b1","line":39,"range":{"start_line":39,"start_character":44,"end_line":39,"end_character":57},"updated":"2018-11-28 16:14:50.000000000","message":"for libvirt at least (because libvirtd doesn\u0027t provide an exception if so)","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"c91fb82b4dbdc08e5aa6e5bbab56c6e8104fe784","unresolved":false,"context_lines":[{"line_number":36,"context_line":"problems.  First, its pinned pCPUs aren\u0027t properly claimed on the destination."},{"line_number":37,"context_line":"This means that, should a second instance with pinned CPUs land on the"},{"line_number":38,"context_line":"destination, both instances\u0027 vCPUs could be pinned to the same pCPUs. Second,"},{"line_number":39,"context_line":"any existing pin mappings on the destination are ignored. If another instance"},{"line_number":40,"context_line":"already exists on the destination, both instances\u0027s vCPUs could be pinned to"},{"line_number":41,"context_line":"the same pCPUs. In both cases, the ``dedicated`` CPU policy is violated,"},{"line_number":42,"context_line":"potentially leading to unpredictable performance degradation."}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_7f02eaba","line":39,"range":{"start_line":39,"start_character":44,"end_line":39,"end_character":57},"in_reply_to":"3f79a3b5_911100b1","updated":"2018-11-29 00:37:41.000000000","message":"Yeah, this spec is mostly for libvirt. I\u0027ll add a note to that effect on top.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"70e874fa8e3acd2d6f6887c48fb0e4b938fed8ef","unresolved":false,"context_lines":[{"line_number":48,"context_line":"all 8 of its pages. One instance is live-migrated to the other host. The"},{"line_number":49,"context_line":"libvirt driver enforces strict NUMA affinity and does not regenerate the"},{"line_number":50,"context_line":"instance XML. Both instances end up on the hosts NUMA node 0, and the"},{"line_number":51,"context_line":"live-migrated instance fails to run."},{"line_number":52,"context_line":""},{"line_number":53,"context_line":"The third example is an instance with a virtual NUMA topology (but without"},{"line_number":54,"context_line":"hugepages). If an instance affined to its host\u0027s NUMA node 2 is live migrated"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_9c1117b1","line":51,"range":{"start_line":51,"start_character":0,"end_line":51,"end_character":36},"updated":"2018-11-28 16:14:50.000000000","message":"that\u0027s not really the same problem than above, because libvirt provides this time an exception (instead of silently accepting the wrong instance), but yeah, that\u0027s still an issue.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"c91fb82b4dbdc08e5aa6e5bbab56c6e8104fe784","unresolved":false,"context_lines":[{"line_number":48,"context_line":"all 8 of its pages. One instance is live-migrated to the other host. The"},{"line_number":49,"context_line":"libvirt driver enforces strict NUMA affinity and does not regenerate the"},{"line_number":50,"context_line":"instance XML. Both instances end up on the hosts NUMA node 0, and the"},{"line_number":51,"context_line":"live-migrated instance fails to run."},{"line_number":52,"context_line":""},{"line_number":53,"context_line":"The third example is an instance with a virtual NUMA topology (but without"},{"line_number":54,"context_line":"hugepages). If an instance affined to its host\u0027s NUMA node 2 is live migrated"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_df1bfe19","line":51,"range":{"start_line":51,"start_character":0,"end_line":51,"end_character":36},"in_reply_to":"3f79a3b5_9c1117b1","updated":"2018-11-29 00:37:41.000000000","message":"That\u0027s why it\u0027s a different example :)","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"70e874fa8e3acd2d6f6887c48fb0e4b938fed8ef","unresolved":false,"context_lines":[{"line_number":77,"context_line":"NUMA characteristics need to be recalculated to fit on the new host. Second,"},{"line_number":78,"context_line":"the resources that the instance will consume on the new host need to be"},{"line_number":79,"context_line":"claimed. Third, information about the instance\u0027s new NUMA characteristics needs"},{"line_number":80,"context_line":"to be generated on the destination (an ``InstanceNUMATopolgy`` object is not"},{"line_number":81,"context_line":"enough, more on that later). Fourth, this information needs to be sent from"},{"line_number":82,"context_line":"the destination to the source, in order for the source to generate the correct"},{"line_number":83,"context_line":"XML for the instance to be able to run on the destination. Finally, the"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_9c9277fa","line":80,"range":{"start_line":80,"start_character":6,"end_line":80,"end_character":15},"updated":"2018-11-28 16:14:50.000000000","message":"nit: regenerated\n\nThe corollar is that the possibly existing InstanceNUMATopology should be \u0027unpersisted\u0027 from the instance.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"c91fb82b4dbdc08e5aa6e5bbab56c6e8104fe784","unresolved":false,"context_lines":[{"line_number":77,"context_line":"NUMA characteristics need to be recalculated to fit on the new host. Second,"},{"line_number":78,"context_line":"the resources that the instance will consume on the new host need to be"},{"line_number":79,"context_line":"claimed. Third, information about the instance\u0027s new NUMA characteristics needs"},{"line_number":80,"context_line":"to be generated on the destination (an ``InstanceNUMATopolgy`` object is not"},{"line_number":81,"context_line":"enough, more on that later). Fourth, this information needs to be sent from"},{"line_number":82,"context_line":"the destination to the source, in order for the source to generate the correct"},{"line_number":83,"context_line":"XML for the instance to be able to run on the destination. Finally, the"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_7fd0aa2a","line":80,"range":{"start_line":80,"start_character":6,"end_line":80,"end_character":15},"in_reply_to":"3f79a3b5_9c9277fa","updated":"2018-11-29 00:37:41.000000000","message":"No, it\u0027s \"generated\", because it\u0027s \"information about the *new* NUMA topology\" :)","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"70e874fa8e3acd2d6f6887c48fb0e4b938fed8ef","unresolved":false,"context_lines":[{"line_number":82,"context_line":"the destination to the source, in order for the source to generate the correct"},{"line_number":83,"context_line":"XML for the instance to be able to run on the destination. Finally, the"},{"line_number":84,"context_line":"instance\u0027s resource claims need to \"converge\" to reflect the success or failure"},{"line_number":85,"context_line":"of the live migration. If the live migration succeeded, the claim on the source"},{"line_number":86,"context_line":"needs to be released. If it failed, the claim on the destination needs to be"},{"line_number":87,"context_line":"rolled back."},{"line_number":88,"context_line":""}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_9c2ef7a7","line":85,"range":{"start_line":85,"start_character":45,"end_line":85,"end_character":54},"updated":"2018-11-28 16:14:50.000000000","message":"You could detail the failure cases :\n- libvirtd raises an exception\n- the claim doesn\u0027t work\n\nI don\u0027t see other failures that would mean the live migration didn\u0027t succeed.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"67eaa7df4966e93e19b5b7f548602d0b9898901b","unresolved":false,"context_lines":[{"line_number":82,"context_line":"the destination to the source, in order for the source to generate the correct"},{"line_number":83,"context_line":"XML for the instance to be able to run on the destination. Finally, the"},{"line_number":84,"context_line":"instance\u0027s resource claims need to \"converge\" to reflect the success or failure"},{"line_number":85,"context_line":"of the live migration. If the live migration succeeded, the claim on the source"},{"line_number":86,"context_line":"needs to be released. If it failed, the claim on the destination needs to be"},{"line_number":87,"context_line":"rolled back."},{"line_number":88,"context_line":""}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_f3888ec4","line":85,"range":{"start_line":85,"start_character":60,"end_line":85,"end_character":65},"updated":"2018-11-27 19:06:17.000000000","message":"nit: \"usage\" might be more appropriate here since we\u0027re claiming on the dest.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"c91fb82b4dbdc08e5aa6e5bbab56c6e8104fe784","unresolved":false,"context_lines":[{"line_number":82,"context_line":"the destination to the source, in order for the source to generate the correct"},{"line_number":83,"context_line":"XML for the instance to be able to run on the destination. Finally, the"},{"line_number":84,"context_line":"instance\u0027s resource claims need to \"converge\" to reflect the success or failure"},{"line_number":85,"context_line":"of the live migration. If the live migration succeeded, the claim on the source"},{"line_number":86,"context_line":"needs to be released. If it failed, the claim on the destination needs to be"},{"line_number":87,"context_line":"rolled back."},{"line_number":88,"context_line":""}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_3faf929c","line":85,"range":{"start_line":85,"start_character":45,"end_line":85,"end_character":54},"in_reply_to":"3f79a3b5_9c2ef7a7","updated":"2018-11-29 00:37:41.000000000","message":"If the claim doesn\u0027t work it\u0027s a MigrationPreCheck error and we try a different host - that\u0027s part of the \"Second, the resource balh blah\". I don\u0027t want to add it here since it\u0027s just an intro, but I\u0027ll put in the Resource Claims paragraph below.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"c91fb82b4dbdc08e5aa6e5bbab56c6e8104fe784","unresolved":false,"context_lines":[{"line_number":82,"context_line":"the destination to the source, in order for the source to generate the correct"},{"line_number":83,"context_line":"XML for the instance to be able to run on the destination. Finally, the"},{"line_number":84,"context_line":"instance\u0027s resource claims need to \"converge\" to reflect the success or failure"},{"line_number":85,"context_line":"of the live migration. If the live migration succeeded, the claim on the source"},{"line_number":86,"context_line":"needs to be released. If it failed, the claim on the destination needs to be"},{"line_number":87,"context_line":"rolled back."},{"line_number":88,"context_line":""}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_9f59062b","line":85,"range":{"start_line":85,"start_character":60,"end_line":85,"end_character":65},"in_reply_to":"3f79a3b5_f3888ec4","updated":"2018-11-29 00:37:41.000000000","message":"Done","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"70e874fa8e3acd2d6f6887c48fb0e4b938fed8ef","unresolved":false,"context_lines":[{"line_number":84,"context_line":"instance\u0027s resource claims need to \"converge\" to reflect the success or failure"},{"line_number":85,"context_line":"of the live migration. If the live migration succeeded, the claim on the source"},{"line_number":86,"context_line":"needs to be released. If it failed, the claim on the destination needs to be"},{"line_number":87,"context_line":"rolled back."},{"line_number":88,"context_line":""},{"line_number":89,"context_line":"Resource claims"},{"line_number":90,"context_line":"---------------"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_9c49178e","line":87,"updated":"2018-11-28 16:14:50.000000000","message":"somewhere in this process, we need to make sure we correctly persist the right nested topology in the record (either the original if the live migration fails, or the new one if it succeeds)","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"515256ed908c189299cad9824c0619d0aaff828c","unresolved":false,"context_lines":[{"line_number":84,"context_line":"instance\u0027s resource claims need to \"converge\" to reflect the success or failure"},{"line_number":85,"context_line":"of the live migration. If the live migration succeeded, the claim on the source"},{"line_number":86,"context_line":"needs to be released. If it failed, the claim on the destination needs to be"},{"line_number":87,"context_line":"rolled back."},{"line_number":88,"context_line":""},{"line_number":89,"context_line":"Resource claims"},{"line_number":90,"context_line":"---------------"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_72951be2","line":87,"in_reply_to":"3f79a3b5_782be030","updated":"2018-11-30 09:45:09.000000000","message":"Fair enough, let\u0027s say it\u0027s an implementation detail.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"c91fb82b4dbdc08e5aa6e5bbab56c6e8104fe784","unresolved":false,"context_lines":[{"line_number":84,"context_line":"instance\u0027s resource claims need to \"converge\" to reflect the success or failure"},{"line_number":85,"context_line":"of the live migration. If the live migration succeeded, the claim on the source"},{"line_number":86,"context_line":"needs to be released. If it failed, the claim on the destination needs to be"},{"line_number":87,"context_line":"rolled back."},{"line_number":88,"context_line":""},{"line_number":89,"context_line":"Resource claims"},{"line_number":90,"context_line":"---------------"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_782be030","line":87,"in_reply_to":"3f79a3b5_9c49178e","updated":"2018-11-29 00:37:41.000000000","message":"When you create a MoveClaim in _move_claim() in the resource tracker, it\u0027ll eventually call self._update() which will call save() on the compute node and the pci tracker. I imagine this is what persists the new usage, including the NUMA stuff via ComputeNode.NUMATopology. That\u0027s existing code, I don\u0027t think I need to spell this out here, right?","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"67eaa7df4966e93e19b5b7f548602d0b9898901b","unresolved":false,"context_lines":[{"line_number":86,"context_line":"needs to be released. If it failed, the claim on the destination needs to be"},{"line_number":87,"context_line":"rolled back."},{"line_number":88,"context_line":""},{"line_number":89,"context_line":"Resource claims"},{"line_number":90,"context_line":"---------------"},{"line_number":91,"context_line":""},{"line_number":92,"context_line":"Let\u0027s address the resource claims aspect first. An effort has begun to support"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_536ce203","line":89,"updated":"2018-11-27 19:06:17.000000000","message":"Just so we\u0027re aware, and that I\u0027ve pointed it out, making the claims code work with live migration is going to be a lot more than simply calling rt._move_claim because the RT migration-based code is very much not dealing with live migration today, e.g.:\n\nhttps://github.com/openstack/nova/blob/8545ba2af7476e0884b5e7fb90965bef92d605bc/nova/compute/resource_tracker.py#L75\n\nSo we\u0027ll need to be aware of all of that while enabling this support, which means the changes should probably all be staggered in a series, e.g. make the RT/claims code work for live migrations, and then use it during live migration but as separate changes.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"70e874fa8e3acd2d6f6887c48fb0e4b938fed8ef","unresolved":false,"context_lines":[{"line_number":86,"context_line":"needs to be released. If it failed, the claim on the destination needs to be"},{"line_number":87,"context_line":"rolled back."},{"line_number":88,"context_line":""},{"line_number":89,"context_line":"Resource claims"},{"line_number":90,"context_line":"---------------"},{"line_number":91,"context_line":""},{"line_number":92,"context_line":"Let\u0027s address the resource claims aspect first. An effort has begun to support"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_fc07eb1c","line":89,"in_reply_to":"3f79a3b5_536ce203","updated":"2018-11-28 16:14:50.000000000","message":"Yeah, I see some implementation concerns, but that\u0027s something we should discuss in the changes directly.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"70e874fa8e3acd2d6f6887c48fb0e4b938fed8ef","unresolved":false,"context_lines":[{"line_number":92,"context_line":"Let\u0027s address the resource claims aspect first. An effort has begun to support"},{"line_number":93,"context_line":"NUMA resource providers in placement [3]_ and to standardize CPU resource"},{"line_number":94,"context_line":"tracking [8]_. However, placement can only track inventories and allocations of"},{"line_number":95,"context_line":"quantities of resources. It does not track which specific resources are used."},{"line_number":96,"context_line":"Specificity is needed for NUMA live migration. Consider an instance that uses"},{"line_number":97,"context_line":"4 dedicated CPUs in a future where the standard CPU resource tracking spec [8]_"},{"line_number":98,"context_line":"has been implemented. During live migration, the scheduler claims those 4 CPUs"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_f7a864da","line":95,"range":{"start_line":95,"start_character":25,"end_line":95,"end_character":77},"updated":"2018-11-28 16:14:50.000000000","message":"Well, we say that Placement doesn\u0027t \u0027place\u0027 instances to resource providers, it just \u0027filters\u0027 the resource providers (nit, again)","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"c91fb82b4dbdc08e5aa6e5bbab56c6e8104fe784","unresolved":false,"context_lines":[{"line_number":92,"context_line":"Let\u0027s address the resource claims aspect first. An effort has begun to support"},{"line_number":93,"context_line":"NUMA resource providers in placement [3]_ and to standardize CPU resource"},{"line_number":94,"context_line":"tracking [8]_. However, placement can only track inventories and allocations of"},{"line_number":95,"context_line":"quantities of resources. It does not track which specific resources are used."},{"line_number":96,"context_line":"Specificity is needed for NUMA live migration. Consider an instance that uses"},{"line_number":97,"context_line":"4 dedicated CPUs in a future where the standard CPU resource tracking spec [8]_"},{"line_number":98,"context_line":"has been implemented. During live migration, the scheduler claims those 4 CPUs"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_f81610f3","line":95,"range":{"start_line":95,"start_character":25,"end_line":95,"end_character":77},"in_reply_to":"3f79a3b5_f7a864da","updated":"2018-11-29 00:37:41.000000000","message":"Should have called it Filterment then ;) Since this is a nit I\u0027ll leave as it, I\u0027m talking about tracking resources, which placement definitely does in the form of allocations, it just doesn\u0027t care about specific individuals, so to speak.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"70e874fa8e3acd2d6f6887c48fb0e4b938fed8ef","unresolved":false,"context_lines":[{"line_number":101,"context_line":"of CPUs in placement, we need to claim specific CPUs on the compute host. The"},{"line_number":102,"context_line":"compute resource tracker already exists for exactly this purpose, and it will"},{"line_number":103,"context_line":"continue to be used to claim specific resources on the destination, even in a"},{"line_number":104,"context_line":"NUMA-enabled placement future."},{"line_number":105,"context_line":""},{"line_number":106,"context_line":"Fitting to the new host"},{"line_number":107,"context_line":"-----------------------"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_5777d88d","line":104,"updated":"2018-11-28 16:14:50.000000000","message":"++","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"70e874fa8e3acd2d6f6887c48fb0e4b938fed8ef","unresolved":false,"context_lines":[{"line_number":135,"context_line":"necessary NUMA-related information as Nova versioned objects. These new objects"},{"line_number":136,"context_line":"should be as virt driver independent as reasonnably possible, but as the use"},{"line_number":137,"context_line":"case is still libvirt talking to libvirt, abstraction for the sake of"},{"line_number":138,"context_line":"abstraction is not appropriate either."},{"line_number":139,"context_line":""},{"line_number":140,"context_line":"Sending the new NUMA Nova objects"},{"line_number":141,"context_line":"---------------------------------"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_b76d0c69","line":138,"updated":"2018-11-28 16:14:50.000000000","message":"somewhere you need to change the RPC signature of some compute method calling the destination (hence getting some upgrade concerns) but you detail it later.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"c91fb82b4dbdc08e5aa6e5bbab56c6e8104fe784","unresolved":false,"context_lines":[{"line_number":135,"context_line":"necessary NUMA-related information as Nova versioned objects. These new objects"},{"line_number":136,"context_line":"should be as virt driver independent as reasonnably possible, but as the use"},{"line_number":137,"context_line":"case is still libvirt talking to libvirt, abstraction for the sake of"},{"line_number":138,"context_line":"abstraction is not appropriate either."},{"line_number":139,"context_line":""},{"line_number":140,"context_line":"Sending the new NUMA Nova objects"},{"line_number":141,"context_line":"---------------------------------"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_18d3cc37","line":138,"in_reply_to":"3f79a3b5_b76d0c69","updated":"2018-11-29 00:37:41.000000000","message":"That\u0027s sort of what I describe in the next section, even if I don\u0027t spell out the exact signature change.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"70e874fa8e3acd2d6f6887c48fb0e4b938fed8ef","unresolved":false,"context_lines":[{"line_number":156,"context_line":"          |                                      |                                    |             |"},{"line_number":157,"context_line":"          |                                      | migrate_data                       |             |"},{"line_number":158,"context_line":"          |                                      |-----------------------------------\u003e|             |"},{"line_number":159,"context_line":"          |                                      |                                    |             |"},{"line_number":160,"context_line":"          |                                      |                       migrate_data |             |"},{"line_number":161,"context_line":"          |\u003c--------------------------------------------------------------------------|             |"},{"line_number":162,"context_line":"          |                                      |                                    |             |"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_17c16030","line":159,"range":{"start_line":159,"start_character":85,"end_line":159,"end_character":88},"updated":"2018-11-28 16:14:50.000000000","message":"this is where the call to move_claim() should be done on the target host.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"c91fb82b4dbdc08e5aa6e5bbab56c6e8104fe784","unresolved":false,"context_lines":[{"line_number":156,"context_line":"          |                                      |                                    |             |"},{"line_number":157,"context_line":"          |                                      | migrate_data                       |             |"},{"line_number":158,"context_line":"          |                                      |-----------------------------------\u003e|             |"},{"line_number":159,"context_line":"          |                                      |                                    |             |"},{"line_number":160,"context_line":"          |                                      |                       migrate_data |             |"},{"line_number":161,"context_line":"          |\u003c--------------------------------------------------------------------------|             |"},{"line_number":162,"context_line":"          |                                      |                                    |             |"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_98e6fc13","line":159,"range":{"start_line":159,"start_character":85,"end_line":159,"end_character":88},"in_reply_to":"3f79a3b5_17c16030","updated":"2018-11-29 00:37:41.000000000","message":"I kinda thought it\u0027d be in check_can_live_migrate_destination(), so that if the claim fails we can raise a MigrationPreCheckError and get the scheduler to pick a new host?","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"70e874fa8e3acd2d6f6887c48fb0e4b938fed8ef","unresolved":false,"context_lines":[{"line_number":227,"context_line":"          |                                                        |                                   |                +-----------------------------------+ |"},{"line_number":228,"context_line":"          |                                                        |                                   |                | generate NUMA XML for destination |-|"},{"line_number":229,"context_line":"          |                                                        |                                   |                +-----------------------------------+ |"},{"line_number":230,"context_line":"          |                                                        |                                   |                                                      |"},{"line_number":231,"context_line":""},{"line_number":232,"context_line":""},{"line_number":233,"context_line":"Claim convergence"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_57e038ca","line":230,"range":{"start_line":230,"start_character":66,"end_line":230,"end_character":69},"updated":"2018-11-28 16:14:50.000000000","message":"we should somehow make sure the persisted InstanceNUMATopology from the instance is correct there.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"c91fb82b4dbdc08e5aa6e5bbab56c6e8104fe784","unresolved":false,"context_lines":[{"line_number":227,"context_line":"          |                                                        |                                   |                +-----------------------------------+ |"},{"line_number":228,"context_line":"          |                                                        |                                   |                | generate NUMA XML for destination |-|"},{"line_number":229,"context_line":"          |                                                        |                                   |                +-----------------------------------+ |"},{"line_number":230,"context_line":"          |                                                        |                                   |                                                      |"},{"line_number":231,"context_line":""},{"line_number":232,"context_line":""},{"line_number":233,"context_line":"Claim convergence"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_78b9a0e8","line":230,"range":{"start_line":230,"start_character":66,"end_line":230,"end_character":69},"in_reply_to":"3f79a3b5_57e038ca","updated":"2018-11-29 00:37:41.000000000","message":"Yeah, I think claims do that for us, see L87.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"67eaa7df4966e93e19b5b7f548602d0b9898901b","unresolved":false,"context_lines":[{"line_number":239,"context_line":"to use the claim as a context manager. For that reason, if the live migration"},{"line_number":240,"context_line":"fails, ``drop_move_claim`` needs to be called manually during the rollback to"},{"line_number":241,"context_line":"drop the claim from the destination.  Whether to do this on the source in"},{"line_number":242,"context_line":"``rollback_live_migration`` or in ``rollback_live_migration_at_destination`` is"},{"line_number":243,"context_line":"left as an implementation detail."},{"line_number":244,"context_line":""},{"line_number":245,"context_line":"Similarly, if the live migration succeeds, ``drop_move_claim`` needs to be"},{"line_number":246,"context_line":"called to drop the claim from the source, similar to how ``_confirm_resize``"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_b346967d","line":243,"range":{"start_line":242,"start_character":77,"end_line":243,"end_character":33},"updated":"2018-11-27 19:06:17.000000000","message":"I could be wrong, but on rollback I think it has to run on the dest host, because otherwise the checks in the RT.drop_move_claim could fail (depends on if the instance on the dest RT is in the tracked_migrations dict I guess). But yeah, definitely not something we need to sort out in the spec, it will have to be tested.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"70e874fa8e3acd2d6f6887c48fb0e4b938fed8ef","unresolved":false,"context_lines":[{"line_number":239,"context_line":"to use the claim as a context manager. For that reason, if the live migration"},{"line_number":240,"context_line":"fails, ``drop_move_claim`` needs to be called manually during the rollback to"},{"line_number":241,"context_line":"drop the claim from the destination.  Whether to do this on the source in"},{"line_number":242,"context_line":"``rollback_live_migration`` or in ``rollback_live_migration_at_destination`` is"},{"line_number":243,"context_line":"left as an implementation detail."},{"line_number":244,"context_line":""},{"line_number":245,"context_line":"Similarly, if the live migration succeeds, ``drop_move_claim`` needs to be"},{"line_number":246,"context_line":"called to drop the claim from the source, similar to how ``_confirm_resize``"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_d7cbe844","line":243,"range":{"start_line":242,"start_character":77,"end_line":243,"end_character":33},"in_reply_to":"3f79a3b5_b346967d","updated":"2018-11-28 16:14:50.000000000","message":"Yup, I agree, that\u0027s an implementation question.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"67eaa7df4966e93e19b5b7f548602d0b9898901b","unresolved":false,"context_lines":[{"line_number":244,"context_line":""},{"line_number":245,"context_line":"Similarly, if the live migration succeeds, ``drop_move_claim`` needs to be"},{"line_number":246,"context_line":"called to drop the claim from the source, similar to how ``_confirm_resize``"},{"line_number":247,"context_line":"does it in the compute manager. Whether to do this in ``post_live_migration``"},{"line_number":248,"context_line":"on the source or in ``post_live_migration_at_destination`` is left as an"},{"line_number":249,"context_line":"implementation detail."},{"line_number":250,"context_line":""},{"line_number":251,"context_line":"Alternatives"},{"line_number":252,"context_line":"------------"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_134bca7f","line":249,"range":{"start_line":247,"start_character":32,"end_line":249,"end_character":22},"updated":"2018-11-27 19:06:17.000000000","message":"I think it\u0027s obvious that we\u0027d do it on the source (post_live_migration) since we\u0027re freeing up usage on the source after the guest has been transferred.\n\nI was sort of wondering how this is handled today for live migration wrt the RT, and I think it\u0027s just that once the instance.host changes, the RT on the source will no longer track that instance as usage, so it heals itself.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"70e874fa8e3acd2d6f6887c48fb0e4b938fed8ef","unresolved":false,"context_lines":[{"line_number":324,"context_line":"source and destination compute hosts have been upgraded to a version that"},{"line_number":325,"context_line":"supports it. To achieve this, the conductor can check the source and"},{"line_number":326,"context_line":"destination compute\u0027s service version and fail the migration if either one is"},{"line_number":327,"context_line":"too old."},{"line_number":328,"context_line":""},{"line_number":329,"context_line":"Implementation"},{"line_number":330,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_d739a83c","line":327,"updated":"2018-11-28 16:14:50.000000000","message":"Some operators could like to get the existing behaviour where, even if NUMA topology is wrong, they could still live-migrate from A to B.\n\nIf we\u0027re going to change the behaviour and enforce NUMA topologies on migrations, I do wonder whethere we need to microversion it, so operators could opt-out from this behaviour they don\u0027t want.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"c91fb82b4dbdc08e5aa6e5bbab56c6e8104fe784","unresolved":false,"context_lines":[{"line_number":324,"context_line":"source and destination compute hosts have been upgraded to a version that"},{"line_number":325,"context_line":"supports it. To achieve this, the conductor can check the source and"},{"line_number":326,"context_line":"destination compute\u0027s service version and fail the migration if either one is"},{"line_number":327,"context_line":"too old."},{"line_number":328,"context_line":""},{"line_number":329,"context_line":"Implementation"},{"line_number":330,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_54e6ecb4","line":327,"in_reply_to":"3f79a3b5_570d983c","updated":"2018-11-29 00:37:41.000000000","message":"Did a truth table thingee.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"fc71718af1adccefe854975961103f0801bbccca","unresolved":false,"context_lines":[{"line_number":324,"context_line":"source and destination compute hosts have been upgraded to a version that"},{"line_number":325,"context_line":"supports it. To achieve this, the conductor can check the source and"},{"line_number":326,"context_line":"destination compute\u0027s service version and fail the migration if either one is"},{"line_number":327,"context_line":"too old."},{"line_number":328,"context_line":""},{"line_number":329,"context_line":"Implementation"},{"line_number":330,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_570d983c","line":327,"in_reply_to":"3f79a3b5_d739a83c","updated":"2018-11-28 16:25:34.000000000","message":"After discussing it more, I think we need to signal to the new compute node that it\u0027s a live migration coming from an old system, and then not doing the claim accordingly.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"e68597cbdd528be153c829a295704b4aee67cfc0","unresolved":false,"context_lines":[{"line_number":324,"context_line":"source and destination compute hosts have been upgraded to a version that"},{"line_number":325,"context_line":"supports it. To achieve this, the conductor can check the source and"},{"line_number":326,"context_line":"destination compute\u0027s service version and fail the migration if either one is"},{"line_number":327,"context_line":"too old."},{"line_number":328,"context_line":""},{"line_number":329,"context_line":"Implementation"},{"line_number":330,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":8,"id":"3f79a3b5_17a72053","line":327,"in_reply_to":"3f79a3b5_d739a83c","updated":"2018-11-28 16:19:10.000000000","message":"Didn\u0027t this already come up in https://review.openstack.org/#/c/611088/ ?\n\nI\u0027m not sure we need a microversion for this, especially since it\u0027s a very low-level, virt driver specific change to an admin-only API. Anyone that is doing NUMA live migrations today, and has it working, is getting lucky.","commit_id":"74593631526a54dc7f2b1db2d90cd71d3e897eac"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"515256ed908c189299cad9824c0619d0aaff828c","unresolved":false,"context_lines":[{"line_number":92,"context_line":"needs to be released. If it failed, the claim on the destination needs to be"},{"line_number":93,"context_line":"rolled back."},{"line_number":94,"context_line":""},{"line_number":95,"context_line":"Resource claims"},{"line_number":96,"context_line":"---------------"},{"line_number":97,"context_line":""},{"line_number":98,"context_line":"Let\u0027s address the resource claims aspect first. An effort has begun to support"}],"source_content_type":"text/x-rst","patch_set":9,"id":"3f79a3b5_b2597369","line":95,"updated":"2018-11-30 09:45:09.000000000","message":"Just adding two points that were discussed in PS8 to verify on the implementation:\n* make sure the correct usage is persisted (either the new one if the migration succeeded, or the older if the migration didn\u0027t work)\n* make sure rt._move_claim works correctly with live-migration","commit_id":"42054c174749d2b253cc2bcb49898e4f8c6358d8"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"515256ed908c189299cad9824c0619d0aaff828c","unresolved":false,"context_lines":[{"line_number":115,"context_line":"window, using up NUMA resources that the scheduler thought were free. This race"},{"line_number":116,"context_line":"leads to the resource claim failing on the destination. This spec proposes to"},{"line_number":117,"context_line":"handle this claim failure using the existing ``MigrationPreCheckError``"},{"line_number":118,"context_line":"exception mechanism, causing the scheduler to pick a new host."},{"line_number":119,"context_line":""},{"line_number":120,"context_line":"Fitting to the new host"},{"line_number":121,"context_line":"-----------------------"}],"source_content_type":"text/x-rst","patch_set":9,"id":"3f79a3b5_526a5fea","line":118,"updated":"2018-11-30 09:45:09.000000000","message":"thanks","commit_id":"42054c174749d2b253cc2bcb49898e4f8c6358d8"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"ad009407a37b284ecf028d5688acb1e09809d72f","unresolved":false,"context_lines":[{"line_number":322,"context_line":""},{"line_number":323,"context_line":"In the case of a mixed N/N+1 cloud, the possibilities for the exchange of"},{"line_number":324,"context_line":"information between the destination and the source are summarized in the"},{"line_number":325,"context_line":"following table. In it, **no** indicates that the new code is not present,"},{"line_number":326,"context_line":"**old path** indicates that the new code is present but choses to execute the"},{"line_number":327,"context_line":"old code for backwards compatibility, and **yes** indicates that the new"},{"line_number":328,"context_line":"functionality is used."}],"source_content_type":"text/x-rst","patch_set":9,"id":"3f79a3b5_2fc66b6c","line":325,"range":{"start_line":325,"start_character":26,"end_line":325,"end_character":28},"updated":"2018-11-29 19:12:40.000000000","message":"\"n/a\" makes a bit more sense for me here, since the new operation is not applicable in this case as the code doesn\u0027t exist.","commit_id":"42054c174749d2b253cc2bcb49898e4f8c6358d8"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"ad009407a37b284ecf028d5688acb1e09809d72f","unresolved":false,"context_lines":[{"line_number":323,"context_line":"In the case of a mixed N/N+1 cloud, the possibilities for the exchange of"},{"line_number":324,"context_line":"information between the destination and the source are summarized in the"},{"line_number":325,"context_line":"following table. In it, **no** indicates that the new code is not present,"},{"line_number":326,"context_line":"**old path** indicates that the new code is present but choses to execute the"},{"line_number":327,"context_line":"old code for backwards compatibility, and **yes** indicates that the new"},{"line_number":328,"context_line":"functionality is used."},{"line_number":329,"context_line":""}],"source_content_type":"text/x-rst","patch_set":9,"id":"3f79a3b5_2fdd0b40","line":326,"range":{"start_line":326,"start_character":2,"end_line":326,"end_character":10},"updated":"2018-11-29 19:12:40.000000000","message":"nit: \"legacy\" makes more sense to me","commit_id":"42054c174749d2b253cc2bcb49898e4f8c6358d8"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"ad009407a37b284ecf028d5688acb1e09809d72f","unresolved":false,"context_lines":[{"line_number":324,"context_line":"information between the destination and the source are summarized in the"},{"line_number":325,"context_line":"following table. In it, **no** indicates that the new code is not present,"},{"line_number":326,"context_line":"**old path** indicates that the new code is present but choses to execute the"},{"line_number":327,"context_line":"old code for backwards compatibility, and **yes** indicates that the new"},{"line_number":328,"context_line":"functionality is used."},{"line_number":329,"context_line":""},{"line_number":330,"context_line":".. list-table:: Mixed N/N+1 cloud"}],"source_content_type":"text/x-rst","patch_set":9,"id":"3f79a3b5_4fdac748","line":327,"range":{"start_line":327,"start_character":44,"end_line":327,"end_character":47},"updated":"2018-11-29 19:12:40.000000000","message":"nit: \"new\" makes more sense to me here","commit_id":"42054c174749d2b253cc2bcb49898e4f8c6358d8"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"515256ed908c189299cad9824c0619d0aaff828c","unresolved":false,"context_lines":[{"line_number":380,"context_line":"       | Claim drop for source on success | yes      |"},{"line_number":381,"context_line":"       +----------------------------------+----------+"},{"line_number":382,"context_line":"       | Claim drop for dest on failure   | yes      |"},{"line_number":383,"context_line":"       +----------------------------------+----------+"},{"line_number":384,"context_line":""},{"line_number":385,"context_line":"Implementation"},{"line_number":386,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":9,"id":"3f79a3b5_d8045116","line":383,"updated":"2018-11-30 09:45:09.000000000","message":"Honestly, even by looking http://logs.openstack.org/87/599587/9/check/openstack-tox-docs/e26905b/html/specs/stein/approved/numa-aware-live-migration.html I don\u0027t know how to read the table, but I trust you about the fact you understand the concern and you\u0027ll create the implementation changes about this correctly.","commit_id":"42054c174749d2b253cc2bcb49898e4f8c6358d8"}]}
