)]}'
{"/PATCHSET_LEVEL":[{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"c51b1fe5f69b359506b52ed51b21c7d8e2215e07","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"b4dc3100_39483ab5","updated":"2022-07-18 09:36:10.000000000","message":"Dropping -2 before abandoning","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"52cf3edf7732ca9080e51b7fc15a5103d031cf08","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"e8bd3845_2930ae31","updated":"2021-12-14 10:00:40.000000000","message":"I\u0027m OK with the use case. But I have clarification questions inline. -1 as I would like to see why this is libvirt specific, if at all.","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"b7b64eb9a05e0934bc95d5a4f98e4724b260c304","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"12d9589f_0983b8f1","updated":"2022-01-14 09:10:56.000000000","message":"Procedural -2: We hit spec freeze [1]. If you are still working on this the please re-propose it to Z release once we have the directory created (we miss the Z release naming).\nDetails of the process of accepting feature requests can be found on [2].\nIf any questions left about the process, feel free to ping bauzas on #openstack-nova or please attend any Nova meeting [3].\n\nThanks.\n\n[1] http://lists.openstack.org/pipermail/openstack-discuss/2022-January/026530.html\n[2] https://docs.openstack.org/nova/latest/contributor/process.html#spec-and-blueprint-approval-freeze\n[3] https://wiki.openstack.org/wiki/Meetings/Nova\n\n","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"ebaede4fdf9553d9e55e6875e7533d34896a13f8","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"7cbf6272_4469945e","updated":"2022-10-17 09:20:18.000000000","message":"Superseded by https://review.opendev.org/c/openstack/nova-specs/+/861591","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"43a5b990aab1c9138ee41dd1f284e22ac5db0b9f","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"dd206bff_824b14c6","updated":"2021-12-14 15:09:06.000000000","message":"This spec looks like Matryoshka dolls. There are three levels of support for this and the last one is probably the most important one to discuss. Could you at least try to make this spec a bit cleaner by splitting those three things clearly ?\n","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"2a557a17056d9cb69abb1a18f31b4060e95ffbf7","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"4f0def70_6df7a2f5","updated":"2021-12-14 16:04:36.000000000","message":"thanks gibi/sylvain i will make this virt driver indepentent by moving the events to the compte section and try and adress the other comments in the next revision.\n\nit is all valuable feedback and you raised some good point of clarity that i should capture in the spec.","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"}],"specs/yoga/approved/external-cpu-management.rst":[{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"43a5b990aab1c9138ee41dd1f284e22ac5db0b9f","unresolved":true,"context_lines":[{"line_number":28,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":29,"context_line":""},{"line_number":30,"context_line":"For our large telco operators, often find they have 2-4 CPUs that are"},{"line_number":31,"context_line":"not usable due to cpu pining/packing requirements per host. Each cpu"},{"line_number":32,"context_line":"consumes 3-5 watts per core or 12-20 watts per host. assuming a nominal"},{"line_number":33,"context_line":"cost per KWH of 20c and 1000 hosts that are $35,040 in wasted electricity"},{"line_number":34,"context_line":"a year alone from just the idle cpu usage plus the additional cost of"}],"source_content_type":"text/x-rst","patch_set":2,"id":"9a3180e2_92f0e1a5","line":31,"range":{"start_line":31,"start_character":22,"end_line":31,"end_character":28},"updated":"2021-12-14 15:09:06.000000000","message":"nit: pinning","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"43a5b990aab1c9138ee41dd1f284e22ac5db0b9f","unresolved":true,"context_lines":[{"line_number":29,"context_line":""},{"line_number":30,"context_line":"For our large telco operators, often find they have 2-4 CPUs that are"},{"line_number":31,"context_line":"not usable due to cpu pining/packing requirements per host. Each cpu"},{"line_number":32,"context_line":"consumes 3-5 watts per core or 12-20 watts per host. assuming a nominal"},{"line_number":33,"context_line":"cost per KWH of 20c and 1000 hosts that are $35,040 in wasted electricity"},{"line_number":34,"context_line":"a year alone from just the idle cpu usage plus the additional cost of"},{"line_number":35,"context_line":"dissipating all of the heat generated."}],"source_content_type":"text/x-rst","patch_set":2,"id":"ca414632_f1d7cc58","line":32,"range":{"start_line":32,"start_character":53,"end_line":32,"end_character":54},"updated":"2021-12-14 15:09:06.000000000","message":"nit: uppercase.","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"43a5b990aab1c9138ee41dd1f284e22ac5db0b9f","unresolved":true,"context_lines":[{"line_number":30,"context_line":"For our large telco operators, often find they have 2-4 CPUs that are"},{"line_number":31,"context_line":"not usable due to cpu pining/packing requirements per host. Each cpu"},{"line_number":32,"context_line":"consumes 3-5 watts per core or 12-20 watts per host. assuming a nominal"},{"line_number":33,"context_line":"cost per KWH of 20c and 1000 hosts that are $35,040 in wasted electricity"},{"line_number":34,"context_line":"a year alone from just the idle cpu usage plus the additional cost of"},{"line_number":35,"context_line":"dissipating all of the heat generated."},{"line_number":36,"context_line":""}],"source_content_type":"text/x-rst","patch_set":2,"id":"f975ef29_7297a706","line":33,"range":{"start_line":33,"start_character":9,"end_line":33,"end_character":12},"updated":"2021-12-14 15:09:06.000000000","message":"nit: kWh.","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"52cf3edf7732ca9080e51b7fc15a5103d031cf08","unresolved":true,"context_lines":[{"line_number":35,"context_line":"dissipating all of the heat generated."},{"line_number":36,"context_line":""},{"line_number":37,"context_line":"Furthermore, while many telco use-cases require low latency and high"},{"line_number":38,"context_line":"throughput not all require the cpu to run at the max frequency. "},{"line_number":39,"context_line":""},{"line_number":40,"context_line":"Use Cases"},{"line_number":41,"context_line":"---------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"53a1e192_13fa8614","line":38,"range":{"start_line":38,"start_character":62,"end_line":38,"end_character":64},"updated":"2021-12-14 10:00:40.000000000","message":"nit: trailing whitespace","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"43a5b990aab1c9138ee41dd1f284e22ac5db0b9f","unresolved":true,"context_lines":[{"line_number":50,"context_line":"Proposed change"},{"line_number":51,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":52,"context_line":""},{"line_number":53,"context_line":"There are two parts to this proposal the first is simple."},{"line_number":54,"context_line":""},{"line_number":55,"context_line":"Add a config option to the libvirt section to declare that the host CPUs"},{"line_number":56,"context_line":"are externally managed `[1]`_."}],"source_content_type":"text/x-rst","patch_set":2,"id":"3ee14f69_19365876","line":53,"range":{"start_line":53,"start_character":36,"end_line":53,"end_character":37},"updated":"2021-12-14 15:09:06.000000000","message":"either a comma, or bullets to clarify both parts ?","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"43a5b990aab1c9138ee41dd1f284e22ac5db0b9f","unresolved":true,"context_lines":[{"line_number":50,"context_line":"Proposed change"},{"line_number":51,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":52,"context_line":""},{"line_number":53,"context_line":"There are two parts to this proposal the first is simple."},{"line_number":54,"context_line":""},{"line_number":55,"context_line":"Add a config option to the libvirt section to declare that the host CPUs"},{"line_number":56,"context_line":"are externally managed `[1]`_."}],"source_content_type":"text/x-rst","patch_set":2,"id":"27592391_0cf1a2a3","line":53,"range":{"start_line":53,"start_character":10,"end_line":53,"end_character":13},"updated":"2021-12-14 15:09:06.000000000","message":"nit: you mention three points :\n* the config option\n* the placement RP update\n* the external event add","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"2a557a17056d9cb69abb1a18f31b4060e95ffbf7","unresolved":true,"context_lines":[{"line_number":50,"context_line":"Proposed change"},{"line_number":51,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":52,"context_line":""},{"line_number":53,"context_line":"There are two parts to this proposal the first is simple."},{"line_number":54,"context_line":""},{"line_number":55,"context_line":"Add a config option to the libvirt section to declare that the host CPUs"},{"line_number":56,"context_line":"are externally managed `[1]`_."}],"source_content_type":"text/x-rst","patch_set":2,"id":"2f41b317_26989f24","line":53,"range":{"start_line":53,"start_character":10,"end_line":53,"end_character":13},"in_reply_to":"27592391_0cf1a2a3","updated":"2021-12-14 16:04:36.000000000","message":"yep i forgot to update this between ps 1 and 2\ni orginally tought i could do the rp update without a nova change but then dicovered reserved is currently harded to 0 for pcpus and overriten every time so i added the RP update step but forgot to update this.","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"43a5b990aab1c9138ee41dd1f284e22ac5db0b9f","unresolved":true,"context_lines":[{"line_number":57,"context_line":"[libvirt]"},{"line_number":58,"context_line":"cpu_external_management\u003dTrue|False(default)"},{"line_number":59,"context_line":""},{"line_number":60,"context_line":"second, the update provider tree will be modified to use the cpu reserved"},{"line_number":61,"context_line":"value-form placement if cpu_external_management is true. This will allow"},{"line_number":62,"context_line":"the external tool to notify placement of the offlined CPUs for correct"},{"line_number":63,"context_line":"scheduling."}],"source_content_type":"text/x-rst","patch_set":2,"id":"57686725_1bea7223","line":60,"range":{"start_line":60,"start_character":12,"end_line":60,"end_character":32},"updated":"2021-12-14 15:09:06.000000000","message":"\"secondly, the nova-compute service will update its Placement resources \"","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"52cf3edf7732ca9080e51b7fc15a5103d031cf08","unresolved":true,"context_lines":[{"line_number":57,"context_line":"[libvirt]"},{"line_number":58,"context_line":"cpu_external_management\u003dTrue|False(default)"},{"line_number":59,"context_line":""},{"line_number":60,"context_line":"second, the update provider tree will be modified to use the cpu reserved"},{"line_number":61,"context_line":"value-form placement if cpu_external_management is true. This will allow"},{"line_number":62,"context_line":"the external tool to notify placement of the offlined CPUs for correct"},{"line_number":63,"context_line":"scheduling."},{"line_number":64,"context_line":""}],"source_content_type":"text/x-rst","patch_set":2,"id":"494c2acd_d5eed125","line":61,"range":{"start_line":60,"start_character":54,"end_line":61,"end_character":20},"updated":"2021-12-14 10:00:40.000000000","message":"Does it mean that nova will not overwrite the reserved value on VCPU and PCPU resources in placement?","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"43a5b990aab1c9138ee41dd1f284e22ac5db0b9f","unresolved":true,"context_lines":[{"line_number":57,"context_line":"[libvirt]"},{"line_number":58,"context_line":"cpu_external_management\u003dTrue|False(default)"},{"line_number":59,"context_line":""},{"line_number":60,"context_line":"second, the update provider tree will be modified to use the cpu reserved"},{"line_number":61,"context_line":"value-form placement if cpu_external_management is true. This will allow"},{"line_number":62,"context_line":"the external tool to notify placement of the offlined CPUs for correct"},{"line_number":63,"context_line":"scheduling."},{"line_number":64,"context_line":""}],"source_content_type":"text/x-rst","patch_set":2,"id":"e26115e3_4ecaf4ad","line":61,"range":{"start_line":60,"start_character":54,"end_line":61,"end_character":20},"in_reply_to":"494c2acd_d5eed125","updated":"2021-12-14 15:09:06.000000000","message":"same question here, this is a bit unclear. I guess Sean wanted to tell that Nova will update Placement to correctly mark the offloaded CPUs to be reserved.","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"124a7f1ec8b00b9a31c7e798f0a45acb1ac8c9a9","unresolved":true,"context_lines":[{"line_number":57,"context_line":"[libvirt]"},{"line_number":58,"context_line":"cpu_external_management\u003dTrue|False(default)"},{"line_number":59,"context_line":""},{"line_number":60,"context_line":"second, the update provider tree will be modified to use the cpu reserved"},{"line_number":61,"context_line":"value-form placement if cpu_external_management is true. This will allow"},{"line_number":62,"context_line":"the external tool to notify placement of the offlined CPUs for correct"},{"line_number":63,"context_line":"scheduling."},{"line_number":64,"context_line":""}],"source_content_type":"text/x-rst","patch_set":2,"id":"0a02cb22_5ac38543","line":61,"range":{"start_line":60,"start_character":54,"end_line":61,"end_character":20},"in_reply_to":"494c2acd_d5eed125","updated":"2021-12-14 15:59:13.000000000","message":"that is the intent yes for pCPUs.\n\nso how I would like this to work is the external tool would set the reserved value for pCPUs equal to the number of cores that is offlined. today we hard code reserved to 0 on every update\n\nhttps://github.com/openstack/nova/blob/master/nova/virt/libvirt/driver.py#L8373-L8381\n\nwe allready have the inventory info which we use to read the allocation ratios form placement\n\nhttps://github.com/openstack/nova/blob/master/nova/virt/libvirt/driver.py#L8344-L8345\n\n\nso this would really be soemthing like\n\nif pcpus:\n            result[orc.PCPU] \u003d {\n                \u0027total\u0027: pcpus,\n                \u0027min_unit\u0027: 1,\n                \u0027max_unit\u0027: pcpus,\n                \u0027step_size\u0027: 1,\n                \u0027allocation_ratio\u0027: 1,\n                \u0027reserved\u0027: 0,\n            }\n\n-\u003e\nif pcpus:\n            result[orc.PCPU] \u003d {\n                \u0027total\u0027: pcpus,\n                \u0027min_unit\u0027: 1,\n                \u0027max_unit\u0027: pcpus,\n                \u0027step_size\u0027: 1,\n                \u0027allocation_ratio\u0027: 1,\n                \u0027reserved\u0027: (inv or {}).get(orc.PCPU,{}).get(\u0027reserved\u0027,0)\n            }\n\n\nnot sure if ^ actully works specificly the (inv or {}) part but you get the idea.\nwe would prefer the value currently in placment if set and fallback to 0 if not.","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"52cf3edf7732ca9080e51b7fc15a5103d031cf08","unresolved":true,"context_lines":[{"line_number":65,"context_line":"third, optionally add a new external event and a second config option to"},{"line_number":66,"context_line":"wait for the event."},{"line_number":67,"context_line":""},{"line_number":68,"context_line":"[libvirt]"},{"line_number":69,"context_line":"external_managment_timeout\u003d-1"},{"line_number":70,"context_line":""},{"line_number":71,"context_line":"-1 is disabled (default)"}],"source_content_type":"text/x-rst","patch_set":2,"id":"defc7031_ae085614","line":68,"updated":"2021-12-14 10:00:40.000000000","message":"Is this feature really libvirt driver specific? What part of the feature needs libvirt\u0027s cooperation?","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"2a557a17056d9cb69abb1a18f31b4060e95ffbf7","unresolved":true,"context_lines":[{"line_number":65,"context_line":"third, optionally add a new external event and a second config option to"},{"line_number":66,"context_line":"wait for the event."},{"line_number":67,"context_line":""},{"line_number":68,"context_line":"[libvirt]"},{"line_number":69,"context_line":"external_managment_timeout\u003d-1"},{"line_number":70,"context_line":""},{"line_number":71,"context_line":"-1 is disabled (default)"}],"source_content_type":"text/x-rst","patch_set":2,"id":"031ca444_6f1417a6","line":68,"in_reply_to":"07728257_c615346c","updated":"2021-12-14 16:04:36.000000000","message":"technically I guess this could be compute specific rather then virt driver.\n\nmy poc host daemon reads the libvirt xml currntly to retrive teh per isntance cpu pinning but\ntechnically that info is in the notifications too.\n\nso in principal there are no libvirt depndency but the libvirt driver is the primary one that support cpu pinnng so it is my main objective to enable.\n\ni will move this to the compute section","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"43a5b990aab1c9138ee41dd1f284e22ac5db0b9f","unresolved":true,"context_lines":[{"line_number":65,"context_line":"third, optionally add a new external event and a second config option to"},{"line_number":66,"context_line":"wait for the event."},{"line_number":67,"context_line":""},{"line_number":68,"context_line":"[libvirt]"},{"line_number":69,"context_line":"external_managment_timeout\u003d-1"},{"line_number":70,"context_line":""},{"line_number":71,"context_line":"-1 is disabled (default)"}],"source_content_type":"text/x-rst","patch_set":2,"id":"07728257_c615346c","line":68,"in_reply_to":"defc7031_ae085614","updated":"2021-12-14 15:09:06.000000000","message":"agreed, we should let this be generic in case other drivers would want to do it as well.","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"52cf3edf7732ca9080e51b7fc15a5103d031cf08","unresolved":true,"context_lines":[{"line_number":66,"context_line":"wait for the event."},{"line_number":67,"context_line":""},{"line_number":68,"context_line":"[libvirt]"},{"line_number":69,"context_line":"external_managment_timeout\u003d-1"},{"line_number":70,"context_line":""},{"line_number":71,"context_line":"-1 is disabled (default)"},{"line_number":72,"context_line":"0 is waiting indefinitely"}],"source_content_type":"text/x-rst","patch_set":2,"id":"947ad0fc_2e5d2c39","line":69,"range":{"start_line":69,"start_character":0,"end_line":69,"end_character":8},"updated":"2021-12-14 10:00:40.000000000","message":"would prefix this as well with cpu, ie. cpu_external_management_timeout. Or is it a more generic use case?","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"43a5b990aab1c9138ee41dd1f284e22ac5db0b9f","unresolved":true,"context_lines":[{"line_number":66,"context_line":"wait for the event."},{"line_number":67,"context_line":""},{"line_number":68,"context_line":"[libvirt]"},{"line_number":69,"context_line":"external_managment_timeout\u003d-1"},{"line_number":70,"context_line":""},{"line_number":71,"context_line":"-1 is disabled (default)"},{"line_number":72,"context_line":"0 is waiting indefinitely"}],"source_content_type":"text/x-rst","patch_set":2,"id":"8b19854d_6762eca8","line":69,"range":{"start_line":69,"start_character":0,"end_line":69,"end_character":8},"in_reply_to":"947ad0fc_2e5d2c39","updated":"2021-12-14 15:09:06.000000000","message":"good question, how much of this stuff can be generic enough vs. CPU-specific ?","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"124a7f1ec8b00b9a31c7e798f0a45acb1ac8c9a9","unresolved":true,"context_lines":[{"line_number":66,"context_line":"wait for the event."},{"line_number":67,"context_line":""},{"line_number":68,"context_line":"[libvirt]"},{"line_number":69,"context_line":"external_managment_timeout\u003d-1"},{"line_number":70,"context_line":""},{"line_number":71,"context_line":"-1 is disabled (default)"},{"line_number":72,"context_line":"0 is waiting indefinitely"}],"source_content_type":"text/x-rst","patch_set":2,"id":"4dfc9f10_cdbf18dd","line":69,"range":{"start_line":69,"start_character":0,"end_line":69,"end_character":8},"in_reply_to":"947ad0fc_2e5d2c39","updated":"2021-12-14 15:59:13.000000000","message":"that is a good question\ni had conceved as it as potentially more generic but i was also assuming only one external agent.\nif there were many or could be more then one making this more specific with the cpu_ prefix likely makes more sense.\n\ni\u0027m not sure which is best.\n\ni could see arbiterd eventually doing other turning say memory bandwith or cache allocation like RMD was intended to do and im not sure if its better to prefix and add a flag for each capablity or keep it generic. so that we dont need to update nova in the future.\n\nim leaning towrds generic but im open to other suggestions for this.","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"43a5b990aab1c9138ee41dd1f284e22ac5db0b9f","unresolved":true,"context_lines":[{"line_number":78,"context_line":""},{"line_number":79,"context_line":"as synchronization during a live migrate is nontrivial and nova cannot"},{"line_number":80,"context_line":"control when the VM un-pauses no additional waiting will be added for"},{"line_number":81,"context_line":"live-migrate"},{"line_number":82,"context_line":""},{"line_number":83,"context_line":"The external service would listen to the versioned nova notification"},{"line_number":84,"context_line":"stream to know that an instance needs to be managed so no direct interfaction"}],"source_content_type":"text/x-rst","patch_set":2,"id":"88183651_defe3684","line":81,"updated":"2021-12-14 15:09:06.000000000","message":"thanks, appreciated...","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"52cf3edf7732ca9080e51b7fc15a5103d031cf08","unresolved":true,"context_lines":[{"line_number":82,"context_line":""},{"line_number":83,"context_line":"The external service would listen to the versioned nova notification"},{"line_number":84,"context_line":"stream to know that an instance needs to be managed so no direct interfaction"},{"line_number":85,"context_line":"between nova and the external service is required."},{"line_number":86,"context_line":""},{"line_number":87,"context_line":""},{"line_number":88,"context_line":"Alternatives"}],"source_content_type":"text/x-rst","patch_set":2,"id":"21ddd86a_f477340e","line":85,"updated":"2021-12-14 10:00:40.000000000","message":"Could you add a sentence describing what the external system can achieve with this synchronization? Is it so that the external system can stop the nova actions before they start consuming hypervisor resources so that the external system can bring up CPUs then notify nova to proceed consuming them?\n\nHow this will work together with the reserved value of the VCPU and PCPU resource in placement? If the external system keeps the offline resources reserved then nova scheduling will not select those resources so no waiting will ever triggered for those resources to bring them up. Or are these two use case (reserved value - offline CPU, and waiting for external sync) are separate?","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"43a5b990aab1c9138ee41dd1f284e22ac5db0b9f","unresolved":true,"context_lines":[{"line_number":82,"context_line":""},{"line_number":83,"context_line":"The external service would listen to the versioned nova notification"},{"line_number":84,"context_line":"stream to know that an instance needs to be managed so no direct interfaction"},{"line_number":85,"context_line":"between nova and the external service is required."},{"line_number":86,"context_line":""},{"line_number":87,"context_line":""},{"line_number":88,"context_line":"Alternatives"}],"source_content_type":"text/x-rst","patch_set":2,"id":"0815e9d2_5ffd8060","line":85,"in_reply_to":"21ddd86a_f477340e","updated":"2021-12-14 15:09:06.000000000","message":"+1","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"124a7f1ec8b00b9a31c7e798f0a45acb1ac8c9a9","unresolved":true,"context_lines":[{"line_number":82,"context_line":""},{"line_number":83,"context_line":"The external service would listen to the versioned nova notification"},{"line_number":84,"context_line":"stream to know that an instance needs to be managed so no direct interfaction"},{"line_number":85,"context_line":"between nova and the external service is required."},{"line_number":86,"context_line":""},{"line_number":87,"context_line":""},{"line_number":88,"context_line":"Alternatives"}],"source_content_type":"text/x-rst","patch_set":2,"id":"c706c4a0_ebe95bea","line":85,"in_reply_to":"21ddd86a_f477340e","updated":"2021-12-14 15:59:13.000000000","message":"yes, the intent would be to allow the external system to online the CPUs that were selected by nova before it invokes libvirt.\n\nthe other case that is more immediate is allowing you to set the cpu governor and or frequency before the VM starts.\n\nregarding scheduling, I was not sure how open upstream would be to the external event and synchronization.\n\nif we have the event we do not need to update reserved as we can online the cores before the VM starts\nif we don\u0027t have it then we need to do the placement management.\n\nI guess I would have the arbiterd disable the placement reservation management when the events when the notification was enabled and only allow it to dynamically offline CPUs when it\u0027s available.\n\nif we do not have the notification the intended use case was the admin would do \"systemctl enable arbiterd\" to offline CPUs statically and to allow new workloads they would do \"systemctl disabled arbiterd\"\n\nat least in the initial version that has no interaction with nova notifications system.\n\nthat breaks down if we want to add the ability to manage cpu frequency via userspace governor as arbiterd or any external management application like RMD would have to run to introspect change to guest metadata.\nso the workflow would likely be more like updating the external agent\u0027s config file or touching a file \n/run/arbiterd/arbitrate-cpu-state and deleting it to deactivate it.\n\n\nI have presented this in 3 steps because each step enables more functionality.\n\nif we have step one only then we can offline the CPUs and update the cpu governor asynchronously.\nwithout the ability to update the reserved state in placement we can either disable the service or\ncreate an allocation to get the correct scheduling behavior.\n\nif we also do step 2 we can manage the reserved state which is a little cleaner.\nif we do step 3 technically we don\u0027t need step 2 it just is more invasive of a change to nova but it is what I think would be most useful long term.\n\nwhat are your thoughts on step 2 (reserved value) and step 3 external event and notification?","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"52cf3edf7732ca9080e51b7fc15a5103d031cf08","unresolved":true,"context_lines":[{"line_number":104,"context_line":"---------------"},{"line_number":105,"context_line":""},{"line_number":106,"context_line":"A new external event type will be added."},{"line_number":107,"context_line":"``external-management-complete``"},{"line_number":108,"context_line":""},{"line_number":109,"context_line":"Security impact"},{"line_number":110,"context_line":"---------------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"55db5aaf_12beaea2","line":107,"updated":"2021-12-14 10:00:40.000000000","message":"I probably call this cpu-external-management-complete if it is cpu specific, or hypervisor-external-management-complete if it is not.","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"124a7f1ec8b00b9a31c7e798f0a45acb1ac8c9a9","unresolved":false,"context_lines":[{"line_number":104,"context_line":"---------------"},{"line_number":105,"context_line":""},{"line_number":106,"context_line":"A new external event type will be added."},{"line_number":107,"context_line":"``external-management-complete``"},{"line_number":108,"context_line":""},{"line_number":109,"context_line":"Security impact"},{"line_number":110,"context_line":"---------------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"4956ac2a_1a9180fc","line":107,"in_reply_to":"55db5aaf_12beaea2","updated":"2021-12-14 15:59:13.000000000","message":"Ack","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"52cf3edf7732ca9080e51b7fc15a5103d031cf08","unresolved":true,"context_lines":[{"line_number":119,"context_line":""},{"line_number":120,"context_line":"if we implement step 3, adding the external event we may also want to"},{"line_number":121,"context_line":"send a new notification to indicate we have started waiting for"},{"line_number":122,"context_line":"external management of the instance."},{"line_number":123,"context_line":""},{"line_number":124,"context_line":""},{"line_number":125,"context_line":"Other end user impact"}],"source_content_type":"text/x-rst","patch_set":2,"id":"91e6d2d5_58dddfa8","line":122,"updated":"2021-12-14 10:00:40.000000000","message":"That make sense.","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"52cf3edf7732ca9080e51b7fc15a5103d031cf08","unresolved":true,"context_lines":[{"line_number":130,"context_line":"Performance Impact"},{"line_number":131,"context_line":"------------------"},{"line_number":132,"context_line":""},{"line_number":133,"context_line":"This should not impact the performance of nova but would provide a"},{"line_number":134,"context_line":"side-channel to allow guests to influence their performance if the"},{"line_number":135,"context_line":"external management application support it."},{"line_number":136,"context_line":""}],"source_content_type":"text/x-rst","patch_set":2,"id":"2b45f370_70578a2e","line":133,"range":{"start_line":133,"start_character":0,"end_line":133,"end_character":46},"updated":"2021-12-14 10:00:40.000000000","message":"If this feature is configured certain nova operations takes an extra REST API call (the external-event-arrival) to complete. This could increase load on the REST API as well as the message bus in a busy system. \n\nStill this feature is opt-in so I\u0027m not worried. The operators enabling this should measure the performance hit and dimension accordingly.","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"124a7f1ec8b00b9a31c7e798f0a45acb1ac8c9a9","unresolved":true,"context_lines":[{"line_number":130,"context_line":"Performance Impact"},{"line_number":131,"context_line":"------------------"},{"line_number":132,"context_line":""},{"line_number":133,"context_line":"This should not impact the performance of nova but would provide a"},{"line_number":134,"context_line":"side-channel to allow guests to influence their performance if the"},{"line_number":135,"context_line":"external management application support it."},{"line_number":136,"context_line":""}],"source_content_type":"text/x-rst","patch_set":2,"id":"a2ec10e6_fac65c40","line":133,"range":{"start_line":133,"start_character":0,"end_line":133,"end_character":46},"in_reply_to":"2b45f370_70578a2e","updated":"2021-12-14 15:59:13.000000000","message":"ack that is a valid point i can call that out here.\nbut yes its intended to be opt in and presumable the operator could extend any timeouts or plan for the extra load.\n\none additional even in principal should not be a large load but it still worth capturing.","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"52cf3edf7732ca9080e51b7fc15a5103d031cf08","unresolved":true,"context_lines":[{"line_number":144,"context_line":"Developer impact"},{"line_number":145,"context_line":"----------------"},{"line_number":146,"context_line":""},{"line_number":147,"context_line":"None"},{"line_number":148,"context_line":""},{"line_number":149,"context_line":"Upgrade impact"},{"line_number":150,"context_line":"--------------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"e5b07b99_8c6e3d1d","line":147,"updated":"2021-12-14 10:00:40.000000000","message":"The external event handling is virt driver agnostic but this event is virt driver specific. We need a logic between compute manage and virt driver so that the compute manager knows when to wait for the external-events. \n\nAlso other in tree virt driver developers can optionally implement the support for the same logic in the respective virt driver","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"43a5b990aab1c9138ee41dd1f284e22ac5db0b9f","unresolved":true,"context_lines":[{"line_number":144,"context_line":"Developer impact"},{"line_number":145,"context_line":"----------------"},{"line_number":146,"context_line":""},{"line_number":147,"context_line":"None"},{"line_number":148,"context_line":""},{"line_number":149,"context_line":"Upgrade impact"},{"line_number":150,"context_line":"--------------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"fee31115_9d56f779","line":147,"in_reply_to":"e5b07b99_8c6e3d1d","updated":"2021-12-14 15:09:06.000000000","message":"+1, I think the external event should be virt-driver agnostic, meaning ideally this should be the compute service and not the driver which should be responsible for emitting it.","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"124a7f1ec8b00b9a31c7e798f0a45acb1ac8c9a9","unresolved":true,"context_lines":[{"line_number":144,"context_line":"Developer impact"},{"line_number":145,"context_line":"----------------"},{"line_number":146,"context_line":""},{"line_number":147,"context_line":"None"},{"line_number":148,"context_line":""},{"line_number":149,"context_line":"Upgrade impact"},{"line_number":150,"context_line":"--------------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"11eb122f_2ced11a9","line":147,"in_reply_to":"e5b07b99_8c6e3d1d","updated":"2021-12-14 15:59:13.000000000","message":"ack yes so perhaps the config option for the event should really be in the compute section.\nthen we can keep this outside of the virt driver.","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"52cf3edf7732ca9080e51b7fc15a5103d031cf08","unresolved":true,"context_lines":[{"line_number":193,"context_line":"new notification."},{"line_number":194,"context_line":""},{"line_number":195,"context_line":"the API sample tests will be updated to account for the new notification."},{"line_number":196,"context_line":""},{"line_number":197,"context_line":""},{"line_number":198,"context_line":"Documentation Impact"},{"line_number":199,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":2,"id":"ac601c05_bdf4e1d7","line":196,"updated":"2021-12-14 10:00:40.000000000","message":"I\u0027m wondering if tempest test can be done with a small noop daemon that listens to nova notifications and triggers the external event without doing anything on the hypervisor. This would create a nice template for daemon developers how to create their own logic.","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"124a7f1ec8b00b9a31c7e798f0a45acb1ac8c9a9","unresolved":true,"context_lines":[{"line_number":193,"context_line":"new notification."},{"line_number":194,"context_line":""},{"line_number":195,"context_line":"the API sample tests will be updated to account for the new notification."},{"line_number":196,"context_line":""},{"line_number":197,"context_line":""},{"line_number":198,"context_line":"Documentation Impact"},{"line_number":199,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":2,"id":"9c544ddc_196f1258","line":196,"in_reply_to":"ac601c05_bdf4e1d7","updated":"2021-12-14 15:59:13.000000000","message":"yep, it likely could. I could also see just adding a dry run mode to arbiterd or similar.\nI could explore doing this also in the nova tree via a nova devstack plugin\nit would be simple to add a nova console script entry point that would just listen on the notification bus and respond with the external event. ill add that to the spec.\n\nI do plan to provide an arbiterd devstack plugin and eventually upstream that to opendev.\nI currently want to support py3.8 + which is part of the reason I have not purposed that in this cycle.\nadding 3.6 support would not be hard I just don\u0027t want to use the old APIs right now.","commit_id":"b241b8673b22441890beccd3fa3c75016e10d655"}]}
