)]}' {"/PATCHSET_LEVEL":[{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"be8ee3453674a6abcba336d82dbe74dc91945a15","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"a408da1e_e094d405","updated":"2025-03-06 09:43:08.000000000","message":"Thank @dms@danplanet.com for proposing this. A couple of comments inline. The only gap I currently see is supporting VFs.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"c938b8c369c4bb892208c5a72339c5bcd0b21e4c","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"3d005b29_2a7af1b1","updated":"2025-03-05 23:26:48.000000000","message":"i didceed to finish doing a first pass.\n\n-1 is mainly because of the lifecycle operation that you intend to work.\nor rather becasue that is not covered clearly in the spec.\n\ni have left some other comments mainly on how i think the design should be implemented but that does not change the intent of the design, im just trying to avoid buring the device if we fail to set up the bdms for some reason.\ni.e. we fail before we even call the virt driver to spawn the vm.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"354a7009bd3a384b0a88d1017b02203445a50844","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":3,"id":"67ec5097_62695750","updated":"2025-03-07 09:30:34.000000000","message":"Strong and more than happy +1 on that spec proposal that would make operators eager to do post-deletion actions on PCI devices.\n\nA few nits to solve but this sounds to me already in good shape.","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"ef77bc6fa7f49a922339cecfa293d76ce513ff10","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":3,"id":"92850f6a_3f2f2429","updated":"2025-03-07 14:36:31.000000000","message":"Thanks for the review, everyone!","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"22c00398d4db0d464eafddfa0a1e34b6184cf1fa","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"b7238631_1bbffa50","updated":"2025-03-11 15:26:22.000000000","message":"Nothing else to say, thanks Dan !","commit_id":"99beb3c1de9ac76753cf26ad19abf927b689d80a"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"b267dabfa818da33ff7c0bb79911269d7bcba0a3","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":5,"id":"f894317a_0bfc5927","updated":"2025-03-12 21:34:44.000000000","message":"I like the idea of this and I expect it to see a lot of use (I imagine it mostly in the context of security/data exposure between tenants). I also much appreciate the simplicity of the proposed change. +1 as I am not a PCI expert and can\u0027t usefully review the particulars of that like the other reviewers on here can.","commit_id":"a33243fe620ddc255a2a89fc7a485fa88ffbf830"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"ebb2169cfab465e0b4ccd9549d15fe2f1af8d0be","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":5,"id":"68d7d8ea_22b28846","updated":"2025-03-13 13:35:20.000000000","message":"Thanks melwitt, even a high-level \"this makes sense and is understandable\" is helpful.\n\nI\u0027ll push up a typo fix and include my new use-case for good measure.","commit_id":"a33243fe620ddc255a2a89fc7a485fa88ffbf830"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"47ec13b03075b46b0a0a9dd43e08f16f4eddbada","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":6,"id":"5631c168_66b882fa","updated":"2025-03-17 16:34:42.000000000","message":"I don\u0027t reasonably think this spec is wrong, but I see gibi\u0027s comments.\nAdding back my +2 but for sure, awaiting gibi to reply.\n\nFWIW, I\u0027d rather prefer to accept that spec as it is and discuss about the right explanations in another follow-up, but I\u0027d leave others to tell what they prefer.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"9534616a86770c959955aa55a4596114970f0452","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":6,"id":"cad16acd_63bc4801","updated":"2025-03-17 16:16:12.000000000","message":"I see some terminology issue around burning the use during move operations","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"e273a3fbe2c613988a5e6de21fce96192f4d4888","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":6,"id":"9aa796bf_b5771dbb","updated":"2025-03-13 21:44:23.000000000","message":"The added NVMe use case makes sense, still LGTM","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"fadf1286b254524dfb227b4cb9e61c60d39f9eb2","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":6,"id":"9965fac4_9a53ba1a","updated":"2025-03-14 18:07:49.000000000","message":"This is obviously something i have strong feeling about but i think this is in a state where i can live with it as described in the spec.\n\nso +2 over all and ill try and take a look at the implementaion if i get time","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"e174b2d78d422fe4b65ee5c46c2f9df6f8ba3809","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":8,"id":"0431f21c_8413277f","updated":"2025-03-18 00:04:07.000000000","message":"Additional info LGTM","commit_id":"248cd1e23230a72600805eb53195fd5060a585cc"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"8e56721f791d308fe4c370be16dce3f603c6016a","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":8,"id":"49895a08_a046cfb3","updated":"2025-03-17 18:58:27.000000000","message":"Thanks Dan. Looks good to me.","commit_id":"248cd1e23230a72600805eb53195fd5060a585cc"}],"specs/2025.2/approved/one-time-use-devices.rst":[{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"c938b8c369c4bb892208c5a72339c5bcd0b21e4c","unresolved":true,"context_lines":[{"line_number":15,"context_line":"is passed directly through to a guest may need to have known-good firmware"},{"line_number":16,"context_line":"re-written to it to make sure the previous user hadn\u0027t violated it in some"},{"line_number":17,"context_line":"way. A GPU might have sensitive residue in memory that needs to be zeroed."},{"line_number":18,"context_line":"An NVMe device is a storage medium that needs to be wiped or discarded."},{"line_number":19,"context_line":""},{"line_number":20,"context_line":"Problem description"},{"line_number":21,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":2,"id":"96c5c8f0_ad98da31","line":18,"range":{"start_line":18,"start_character":1,"end_line":18,"end_character":71},"updated":"2025-03-05 23:26:48.000000000","message":"nit: this reads slightly diffent form the previous statement \n\nif you want to keep the same tone then perhasp somethign like this\n\n\n```suggestion\nAn intrinsicly stateful device like an nvme ssd is a storage medium that needs to be wiped or discarded.\n```","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"d0eb941a2405f41601d694b033939c23a8c51159","unresolved":false,"context_lines":[{"line_number":15,"context_line":"is passed directly through to a guest may need to have known-good firmware"},{"line_number":16,"context_line":"re-written to it to make sure the previous user hadn\u0027t violated it in some"},{"line_number":17,"context_line":"way. A GPU might have sensitive residue in memory that needs to be zeroed."},{"line_number":18,"context_line":"An NVMe device is a storage medium that needs to be wiped or discarded."},{"line_number":19,"context_line":""},{"line_number":20,"context_line":"Problem description"},{"line_number":21,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":2,"id":"93c2d5b2_c731a387","line":18,"range":{"start_line":18,"start_character":1,"end_line":18,"end_character":71},"in_reply_to":"96c5c8f0_ad98da31","updated":"2025-03-06 18:20:16.000000000","message":"I didn\u0027t make a change here because I don\u0027t understand why you think this makes it more clear (or matching tone). You said it\u0027s a nit, so I\u0027ll leave it, but feel free to clarify if you think it really needs changing.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"65f805065753a1d31f76fd61976d96b2a1494333","unresolved":true,"context_lines":[{"line_number":22,"context_line":""},{"line_number":23,"context_line":"Currently there is no good way for operators to define and execute a"},{"line_number":24,"context_line":"device-cleaning workflow outside of Nova. Further, Nova does not intend to"},{"line_number":25,"context_line":"take on such tasks itself, in support of the long-term \"no more orchestration\""},{"line_number":26,"context_line":"goal."},{"line_number":27,"context_line":""},{"line_number":28,"context_line":"Use Cases"},{"line_number":29,"context_line":"---------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"a8eae34e_f31e2880","line":26,"range":{"start_line":25,"start_character":29,"end_line":26,"end_character":5},"updated":"2025-03-06 12:56:12.000000000","message":"I disagree with this statement by the way.\n\nI do not consider resource management or assignment to be orchestration.\n\nmulti create is an example of orchestration.\n\nhttps://docs.openstack.org/nova/latest/contributor/project-scope.html#no-more-orchestration\n\ncyborg is not an orchestrator in my view nor is nova pci manager.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"fadf1286b254524dfb227b4cb9e61c60d39f9eb2","unresolved":false,"context_lines":[{"line_number":22,"context_line":""},{"line_number":23,"context_line":"Currently there is no good way for operators to define and execute a"},{"line_number":24,"context_line":"device-cleaning workflow outside of Nova. Further, Nova does not intend to"},{"line_number":25,"context_line":"take on such tasks itself, in support of the long-term \"no more orchestration\""},{"line_number":26,"context_line":"goal."},{"line_number":27,"context_line":""},{"line_number":28,"context_line":"Use Cases"},{"line_number":29,"context_line":"---------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"5467882e_f1f01852","line":26,"range":{"start_line":25,"start_character":29,"end_line":26,"end_character":5},"in_reply_to":"05547d8e_613e318f","updated":"2025-03-14 18:07:49.000000000","message":"I tend to disagree that it\u0027s appropriate to have vgpu or pmem support in Nova if we turn around and say it\u0027s not appropriate to have cleaning of NVMe devices, but this is not the topic of this spec.\n\nim just going to resove this as this converation is not going to result in a matiral change to the content of this spec.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"2580a7e616e05a1e05ae2b8c219259a99d8ab96e","unresolved":true,"context_lines":[{"line_number":22,"context_line":""},{"line_number":23,"context_line":"Currently there is no good way for operators to define and execute a"},{"line_number":24,"context_line":"device-cleaning workflow outside of Nova. Further, Nova does not intend to"},{"line_number":25,"context_line":"take on such tasks itself, in support of the long-term \"no more orchestration\""},{"line_number":26,"context_line":"goal."},{"line_number":27,"context_line":""},{"line_number":28,"context_line":"Use Cases"},{"line_number":29,"context_line":"---------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"b880d4a0_9cb6cd53","line":26,"range":{"start_line":25,"start_character":29,"end_line":26,"end_character":5},"in_reply_to":"3ae57bab_50633f4c","updated":"2025-03-11 15:24:17.000000000","message":"Sean, I was just saying that it was just a wording issue : dan haven\u0027t said this was an orchestration usage (eg. flashing a firmware) but this was related : Nova is not here for having an hardware fleet inventory with some APIs for help them, it could maybe be Cyborg but definitely not Nova.\nI guess we don\u0027t disagree on that, right?","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"bb57474c25900ec90d94ae55946b53443ef34cbb","unresolved":true,"context_lines":[{"line_number":22,"context_line":""},{"line_number":23,"context_line":"Currently there is no good way for operators to define and execute a"},{"line_number":24,"context_line":"device-cleaning workflow outside of Nova. Further, Nova does not intend to"},{"line_number":25,"context_line":"take on such tasks itself, in support of the long-term \"no more orchestration\""},{"line_number":26,"context_line":"goal."},{"line_number":27,"context_line":""},{"line_number":28,"context_line":"Use Cases"},{"line_number":29,"context_line":"---------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"3ae57bab_50633f4c","line":26,"range":{"start_line":25,"start_character":29,"end_line":26,"end_character":5},"in_reply_to":"8c4d5473_e4af88ec","updated":"2025-03-07 15:09:09.000000000","message":"by the way i make a distiction between basic provisioning (allocating an ephemeral disk file with qemu-img) and orchestrating complex interaction over an extended period of time.\n\nflashing firmware is not what I would consider basic provisioning.\n\nZeroing a device or calling a tool to \"reset it,\" like we call qemu image, shread, or daxio today, I think is part of basic provisioning because it\u0027s all local and uses simple tools.\n\nthis is what I was mainly commenting on. Orchestration to me is a much more complex thing, the provisioning resources on the hypervisor.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"abc5ad5038c21f048b03eb2711182aafa7842584","unresolved":true,"context_lines":[{"line_number":22,"context_line":""},{"line_number":23,"context_line":"Currently there is no good way for operators to define and execute a"},{"line_number":24,"context_line":"device-cleaning workflow outside of Nova. Further, Nova does not intend to"},{"line_number":25,"context_line":"take on such tasks itself, in support of the long-term \"no more orchestration\""},{"line_number":26,"context_line":"goal."},{"line_number":27,"context_line":""},{"line_number":28,"context_line":"Use Cases"},{"line_number":29,"context_line":"---------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"8c4d5473_e4af88ec","line":26,"range":{"start_line":25,"start_character":29,"end_line":26,"end_character":5},"in_reply_to":"8f20509e_e71618dd","updated":"2025-03-07 14:56:38.000000000","message":"i dont know what your disagreeing with.\n\ni never mentioned firmware or drivers and that is not something i would add to nova.\n\nwe belived it was perfectly in scope for nova to secure erase intep persistent memory by invoking a utility to do that\n\nhttps://github.com/openstack/nova/blob/276685b3db6e8f2ad59c33bc254461c255700ff8/nova/privsep/libvirt.py#L246-L249\n\ni dont see why havign the option to have nova do \n\n\"cat /dev/zero \u003e /dev/nvmeXYZ\"\nor\n\"shred /dev/nvmeXYZ\"\n\nis really any differnt.\n\nwe alreadt gave cide fir tgus\n\nhttps://github.com/openstack/nova/blob/276685b3db6e8f2ad59c33bc254461c255700ff8/nova/privsep/fs.py#L99-L107\n\nwe use it as part of the lvm image backend to make sure the date is erased when the volume is delete.\n\ndeleting volumes in lvm does nto delete any data by default so we clean up the device as part of deallcoting it.\n\nto me if a device vendor provides a toll to reset a device for use or there is a generic tool that can be used to od it ist not an extream suggestion to say nova could provide a very minimal interface ot do that.\n\nif libvirt provided an api for this (secure eraseing a nvme device) i dont think we would we would be having this conversation.\n\n\nthis si a slightly different conversation however.\n\ndan is askign to support one time devices\n\nthis side converstaion si supprot for stateful devices.\n\nfrom an upstream point of view if we approve dans fature nvm device would technill stil not be supproted by nova as we are not adding offical supprot for stateful devices.\n\nwe are, however, creating a hook point that would allow an operator to build that support themselves.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"8b8919a01222a7e43151d2f1c75f354964794158","unresolved":true,"context_lines":[{"line_number":22,"context_line":""},{"line_number":23,"context_line":"Currently there is no good way for operators to define and execute a"},{"line_number":24,"context_line":"device-cleaning workflow outside of Nova. Further, Nova does not intend to"},{"line_number":25,"context_line":"take on such tasks itself, in support of the long-term \"no more orchestration\""},{"line_number":26,"context_line":"goal."},{"line_number":27,"context_line":""},{"line_number":28,"context_line":"Use Cases"},{"line_number":29,"context_line":"---------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"dc260d59_564b7ec6","line":26,"range":{"start_line":25,"start_character":29,"end_line":26,"end_character":5},"in_reply_to":"a8eae34e_f31e2880","updated":"2025-03-06 15:03:51.000000000","message":"Multi-create came from (long) before we put that project tenet in place. Multi-create is also very much the business of nova itself, and thus I wouldn\u0027t really call it a violation of that rule.\n\nPreparing, attaching, cleaning, and unreserving devices with all kinds of required workflows, tools, operational windows, etc is definitely squarely inside of \"orchestration\" to me, and that\u0027s what Cyborg does.\n\nThe PCI manager is nothing more than accounting to me, so I don\u0027t see the parallel there.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c63bd3c99022e4b9f1205e6aafe31f557d07a5a0","unresolved":true,"context_lines":[{"line_number":22,"context_line":""},{"line_number":23,"context_line":"Currently there is no good way for operators to define and execute a"},{"line_number":24,"context_line":"device-cleaning workflow outside of Nova. Further, Nova does not intend to"},{"line_number":25,"context_line":"take on such tasks itself, in support of the long-term \"no more orchestration\""},{"line_number":26,"context_line":"goal."},{"line_number":27,"context_line":""},{"line_number":28,"context_line":"Use Cases"},{"line_number":29,"context_line":"---------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"05547d8e_613e318f","line":26,"range":{"start_line":25,"start_character":29,"end_line":26,"end_character":5},"in_reply_to":"b880d4a0_9cb6cd53","updated":"2025-03-11 15:28:09.000000000","message":"To me, this is enabling a consistent behavior that allows something outside of nova to do the actual orchestration. Thus, it is _not_ orchestration inside of nova, just a synchronous behavior that enables something else to do it safely.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"354a7009bd3a384b0a88d1017b02203445a50844","unresolved":true,"context_lines":[{"line_number":22,"context_line":""},{"line_number":23,"context_line":"Currently there is no good way for operators to define and execute a"},{"line_number":24,"context_line":"device-cleaning workflow outside of Nova. Further, Nova does not intend to"},{"line_number":25,"context_line":"take on such tasks itself, in support of the long-term \"no more orchestration\""},{"line_number":26,"context_line":"goal."},{"line_number":27,"context_line":""},{"line_number":28,"context_line":"Use Cases"},{"line_number":29,"context_line":"---------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"8f20509e_e71618dd","line":26,"range":{"start_line":25,"start_character":29,"end_line":26,"end_character":5},"in_reply_to":"dc260d59_564b7ec6","updated":"2025-03-07 09:30:34.000000000","message":"I strongly disagree with the disagreement, maybe because this is just a wording issue. \nWe know that Nova by design tries to do a lot of things but not something related to some hardware fleet inventory usage (eg. updating a firmware, etc.). Maybe this could be Cyborg, but the consensus here at least is that this is not done by Nova.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"c938b8c369c4bb892208c5a72339c5bcd0b21e4c","unresolved":true,"context_lines":[{"line_number":36,"context_line":"initialization, etc)."},{"line_number":37,"context_line":""},{"line_number":38,"context_line":"As a cloud operator, I want to provide fast direct-passthrough storage support,"},{"line_number":39,"context_line":"but without risking information leakage between tenants."},{"line_number":40,"context_line":""},{"line_number":41,"context_line":""},{"line_number":42,"context_line":"Proposed change"}],"source_content_type":"text/x-rst","patch_set":2,"id":"92d4bf03_1e1e7a7b","line":39,"updated":"2025-03-05 23:26:48.000000000","message":"on this point i have not read the fulll spec yet, jsut skimmed parts of it but when i review this properly one thing im hoping to see discusseesd si the implication for lifecycle operatons.\n\ni.e. what is the implciation, if any, for move operations.\n\nwe recently added live_migratable but i wonder if this feature implies we need a \"migratable\" or \"stateful\" tag in the future?\n\nwould we for example say that pci device with \"one_time_use\" allow all move ops excpet live migrate but explcitly lose any state on the device.\n\nwoudl we decied to block the operations like shelve or evacueate on the ground that data loss might break the workload.\n\ni have not reflected on this idea enogh to have formed an opion yet but i would like this to be discussed and specifed in this spec.\n\ni am leaing towards the \"all life cycle operations work but any date stored on the device is lost\" camp i think.\n\nthat just because if this was an ssd and the operator had to do an evacuation becaus the mother board died, since we evacuate to stopped not they could potiallly grap the nvme ssd and in stall it in the new host or copy the data.\n\n\nif the device was somethign like an fpga the vm might just need ot reinitallise the device with the bistream or wahtever.\n\nso im not sure if we should that the oeprros side and not force them into a corner\nor if we shoudl that the user side and not allow the admin to break them.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"8b8919a01222a7e43151d2f1c75f354964794158","unresolved":true,"context_lines":[{"line_number":36,"context_line":"initialization, etc)."},{"line_number":37,"context_line":""},{"line_number":38,"context_line":"As a cloud operator, I want to provide fast direct-passthrough storage support,"},{"line_number":39,"context_line":"but without risking information leakage between tenants."},{"line_number":40,"context_line":""},{"line_number":41,"context_line":""},{"line_number":42,"context_line":"Proposed change"}],"source_content_type":"text/x-rst","patch_set":2,"id":"e2ac538b_b73c20b0","line":39,"in_reply_to":"92d4bf03_1e1e7a7b","updated":"2025-03-06 15:03:51.000000000","message":"\u003e i am leaing towards the \"all life cycle operations work but any date stored on the device is lost\" camp i think.\n\nYep, I think this is the right way. If it\u0027s not live-migratable, then it\u0027s not, but otherwise I think we should expect cold migration to work, but data is lost and the device on the source is burned.\n\nIn order for that to work, we need the ability to exchange one allocation for another in placement. Since we also need it to fix the accounting bug of migrating away from a full host (or a flavor that is more than half of the host) I think we can call that a separate thing, but you\u0027re right I should mention it in this spec.\n\n\u003e if the device was somethign like an fpga the vm might just need ot reinitallise the device with the bistream or wahtever.\n\nWell, we\u0027re not going to do that because we don\u0027t prepare the devices, and I\u0027d say any device that needs to be prepped (for create *or* move) would be a cyborg thing.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"d0eb941a2405f41601d694b033939c23a8c51159","unresolved":false,"context_lines":[{"line_number":36,"context_line":"initialization, etc)."},{"line_number":37,"context_line":""},{"line_number":38,"context_line":"As a cloud operator, I want to provide fast direct-passthrough storage support,"},{"line_number":39,"context_line":"but without risking information leakage between tenants."},{"line_number":40,"context_line":""},{"line_number":41,"context_line":""},{"line_number":42,"context_line":"Proposed change"}],"source_content_type":"text/x-rst","patch_set":2,"id":"3f8f7560_cc001e1f","line":39,"in_reply_to":"e2ac538b_b73c20b0","updated":"2025-03-06 18:20:16.000000000","message":"Done","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"be8ee3453674a6abcba336d82dbe74dc91945a15","unresolved":true,"context_lines":[{"line_number":52,"context_line":"to do this itself, even though it will not take on the actual task of doing"},{"line_number":53,"context_line":"any device cleaning."},{"line_number":54,"context_line":""},{"line_number":55,"context_line":"The annotation mechanism here will utilize the `reserved` inventory count,"},{"line_number":56,"context_line":"on top of PCI-in-placement. Basically, when Nova goes to allocate the device"},{"line_number":57,"context_line":"for the instance, it will follow up with a bump of the `reserved`` count. When"},{"line_number":58,"context_line":"we go to de-allocate the device, we will not touch the `reserved` count, thus"},{"line_number":59,"context_line":"leaving the resource provider for the device fully-reserved (and thus not"},{"line_number":60,"context_line":"allocatable)."},{"line_number":61,"context_line":""},{"line_number":62,"context_line":"Through whatever workflow the operator decides, they can clean the device, and"},{"line_number":63,"context_line":"decrement the `reserved` count once they are ready for the device to rejoin"}],"source_content_type":"text/x-rst","patch_set":2,"id":"0d632a35_d2a49074","line":60,"range":{"start_line":55,"start_character":0,"end_line":60,"end_character":13},"updated":"2025-03-06 09:43:08.000000000","message":"We need to differentiate between device types doe to existing design constraints.\n\n* When a device is PF or full PCI device then it is represented as a RP pointing to the PCI address of the PF with an RC, total\u003d1 inventory in placement. So the RP-device mapping is unambiguous. And the reserved bump proposed here works without any complication\n\n* When a device is a VF and the same PF provides N equivalent VFs (either via the normal NIC SRIOV, or by having the same GPU device type used when the VFs are created from the PF) then these N equivalent VFs are represented as an RP pointing to the parent PF PCI address, with an RC, total\u003dN. So here the reserved bump does not work alone as total\u003dN,reserved\u003d1 does not tell the user which VF from the N VFs needs the cleanup.\n\nRight now I don\u0027t know if the one time use feature is only useful for the former category (PF, PCI) or also useful for the latter (N equvi VFs) category as well. If we want to support it for the latter as well then we need to do the reservation not (only) in Placement but also in the PCI device tracker where the unambiguous, VF PCI address based, allocation is stored.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"65f805065753a1d31f76fd61976d96b2a1494333","unresolved":true,"context_lines":[{"line_number":52,"context_line":"to do this itself, even though it will not take on the actual task of doing"},{"line_number":53,"context_line":"any device cleaning."},{"line_number":54,"context_line":""},{"line_number":55,"context_line":"The annotation mechanism here will utilize the `reserved` inventory count,"},{"line_number":56,"context_line":"on top of PCI-in-placement. Basically, when Nova goes to allocate the device"},{"line_number":57,"context_line":"for the instance, it will follow up with a bump of the `reserved`` count. When"},{"line_number":58,"context_line":"we go to de-allocate the device, we will not touch the `reserved` count, thus"},{"line_number":59,"context_line":"leaving the resource provider for the device fully-reserved (and thus not"},{"line_number":60,"context_line":"allocatable)."},{"line_number":61,"context_line":""},{"line_number":62,"context_line":"Through whatever workflow the operator decides, they can clean the device, and"},{"line_number":63,"context_line":"decrement the `reserved` count once they are ready for the device to rejoin"}],"source_content_type":"text/x-rst","patch_set":2,"id":"f4ddf7bc_e9d73721","line":60,"range":{"start_line":55,"start_character":0,"end_line":60,"end_character":13},"in_reply_to":"0d632a35_d2a49074","updated":"2025-03-06 12:56:12.000000000","message":"i guess if one time use was applied to a VF we could have 1 RP per VF\n\ni know that on some buggy hardware a workaorund to driver issues with PF passthough is to sometime use a single VF to provide access to the entire device.\nso im unsure if deciding this is only supproted for type-pf and type-pci is the right approch or not either.\n\nwe could start that way i guess and later expand to supporting VFs.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"d0eb941a2405f41601d694b033939c23a8c51159","unresolved":false,"context_lines":[{"line_number":52,"context_line":"to do this itself, even though it will not take on the actual task of doing"},{"line_number":53,"context_line":"any device cleaning."},{"line_number":54,"context_line":""},{"line_number":55,"context_line":"The annotation mechanism here will utilize the `reserved` inventory count,"},{"line_number":56,"context_line":"on top of PCI-in-placement. Basically, when Nova goes to allocate the device"},{"line_number":57,"context_line":"for the instance, it will follow up with a bump of the `reserved`` count. When"},{"line_number":58,"context_line":"we go to de-allocate the device, we will not touch the `reserved` count, thus"},{"line_number":59,"context_line":"leaving the resource provider for the device fully-reserved (and thus not"},{"line_number":60,"context_line":"allocatable)."},{"line_number":61,"context_line":""},{"line_number":62,"context_line":"Through whatever workflow the operator decides, they can clean the device, and"},{"line_number":63,"context_line":"decrement the `reserved` count once they are ready for the device to rejoin"}],"source_content_type":"text/x-rst","patch_set":2,"id":"ec794766_6c63740d","line":60,"range":{"start_line":55,"start_character":0,"end_line":60,"end_character":13},"in_reply_to":"a0143e0a_f554356f","updated":"2025-03-06 18:20:16.000000000","message":"Done","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"4d5443a77a8be9db0766d8898e4abd4bd8f3b604","unresolved":false,"context_lines":[{"line_number":52,"context_line":"to do this itself, even though it will not take on the actual task of doing"},{"line_number":53,"context_line":"any device cleaning."},{"line_number":54,"context_line":""},{"line_number":55,"context_line":"The annotation mechanism here will utilize the `reserved` inventory count,"},{"line_number":56,"context_line":"on top of PCI-in-placement. Basically, when Nova goes to allocate the device"},{"line_number":57,"context_line":"for the instance, it will follow up with a bump of the `reserved`` count. When"},{"line_number":58,"context_line":"we go to de-allocate the device, we will not touch the `reserved` count, thus"},{"line_number":59,"context_line":"leaving the resource provider for the device fully-reserved (and thus not"},{"line_number":60,"context_line":"allocatable)."},{"line_number":61,"context_line":""},{"line_number":62,"context_line":"Through whatever workflow the operator decides, they can clean the device, and"},{"line_number":63,"context_line":"decrement the `reserved` count once they are ready for the device to rejoin"}],"source_content_type":"text/x-rst","patch_set":2,"id":"26e1eb4d_896fa71b","line":60,"range":{"start_line":55,"start_character":0,"end_line":60,"end_character":13},"in_reply_to":"ec794766_6c63740d","updated":"2025-03-10 15:22:37.000000000","message":"After further discussion I\u0027m OK to have this only supported for PFs and not for VFs. A sane VF implementor should take care of the sharing / cleanup issues.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"8b8919a01222a7e43151d2f1c75f354964794158","unresolved":true,"context_lines":[{"line_number":52,"context_line":"to do this itself, even though it will not take on the actual task of doing"},{"line_number":53,"context_line":"any device cleaning."},{"line_number":54,"context_line":""},{"line_number":55,"context_line":"The annotation mechanism here will utilize the `reserved` inventory count,"},{"line_number":56,"context_line":"on top of PCI-in-placement. Basically, when Nova goes to allocate the device"},{"line_number":57,"context_line":"for the instance, it will follow up with a bump of the `reserved`` count. When"},{"line_number":58,"context_line":"we go to de-allocate the device, we will not touch the `reserved` count, thus"},{"line_number":59,"context_line":"leaving the resource provider for the device fully-reserved (and thus not"},{"line_number":60,"context_line":"allocatable)."},{"line_number":61,"context_line":""},{"line_number":62,"context_line":"Through whatever workflow the operator decides, they can clean the device, and"},{"line_number":63,"context_line":"decrement the `reserved` count once they are ready for the device to rejoin"}],"source_content_type":"text/x-rst","patch_set":2,"id":"a0143e0a_f554356f","line":60,"range":{"start_line":55,"start_character":0,"end_line":60,"end_character":13},"in_reply_to":"f4ddf7bc_e9d73721","updated":"2025-03-06 15:03:51.000000000","message":"I was intending/assuming this would only be useful for full PFs. I would expect any device with a VF to not need cleaning (in the NIC or GPU case) although an NVMe could violate that I suppose. Either way, I say we focus on PF for the moment.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"be8ee3453674a6abcba336d82dbe74dc91945a15","unresolved":true,"context_lines":[{"line_number":68,"context_line":"------------"},{"line_number":69,"context_line":""},{"line_number":70,"context_line":"One alternative is to do nothing and continue to operate as we do today. Nova"},{"line_number":71,"context_line":"does not provide any device cleaning ability, nor any real hooks or integration"},{"line_number":72,"context_line":"for operators desiring it."},{"line_number":73,"context_line":""},{"line_number":74,"context_line":"Another alternative is to say that this is in the scope of Cyborg, which is"},{"line_number":75,"context_line":"definitely a very valid take. The one-time-use-devices idea sits somewhere"}],"source_content_type":"text/x-rst","patch_set":2,"id":"c09aade6_5cba8dff","line":72,"range":{"start_line":71,"start_character":46,"end_line":72,"end_character":26},"updated":"2025-03-06 09:43:08.000000000","message":"This situation is basically intentional. \n* I.e. nova intentionally removed most of the plug-in point due to the maintenance cost of them. \n* the nova notification interface was always designed as an async, fire and forget interface\n* nova spent effort to build up an interface to cyborg as way to support the specific device handling needs\n\nI got that using cyborg for the above use case in an env where cyborg hasn\u0027t been deployed yet, is a huge cost. I just want to make sure that we are aware that by not recommending using cyborg for this use case actually further decrease the chance that cyborg will eventually adopted on the field and therefore nova can easily recommend a cyborg based solution.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"d0eb941a2405f41601d694b033939c23a8c51159","unresolved":false,"context_lines":[{"line_number":68,"context_line":"------------"},{"line_number":69,"context_line":""},{"line_number":70,"context_line":"One alternative is to do nothing and continue to operate as we do today. Nova"},{"line_number":71,"context_line":"does not provide any device cleaning ability, nor any real hooks or integration"},{"line_number":72,"context_line":"for operators desiring it."},{"line_number":73,"context_line":""},{"line_number":74,"context_line":"Another alternative is to say that this is in the scope of Cyborg, which is"},{"line_number":75,"context_line":"definitely a very valid take. The one-time-use-devices idea sits somewhere"}],"source_content_type":"text/x-rst","patch_set":2,"id":"de700da6_bbe76c2c","line":72,"range":{"start_line":71,"start_character":46,"end_line":72,"end_character":26},"in_reply_to":"51cf4540_8c0f71a3","updated":"2025-03-06 18:20:16.000000000","message":"I think I resolved this with more words.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"8b8919a01222a7e43151d2f1c75f354964794158","unresolved":true,"context_lines":[{"line_number":68,"context_line":"------------"},{"line_number":69,"context_line":""},{"line_number":70,"context_line":"One alternative is to do nothing and continue to operate as we do today. Nova"},{"line_number":71,"context_line":"does not provide any device cleaning ability, nor any real hooks or integration"},{"line_number":72,"context_line":"for operators desiring it."},{"line_number":73,"context_line":""},{"line_number":74,"context_line":"Another alternative is to say that this is in the scope of Cyborg, which is"},{"line_number":75,"context_line":"definitely a very valid take. The one-time-use-devices idea sits somewhere"}],"source_content_type":"text/x-rst","patch_set":2,"id":"51cf4540_8c0f71a3","line":72,"range":{"start_line":71,"start_character":46,"end_line":72,"end_character":26},"in_reply_to":"a2911b3f_82c383f0","updated":"2025-03-06 15:03:51.000000000","message":"Gibi, I don\u0027t want to discourage use of Cyborg and I don\u0027t want this feature to be seen as treading on their turf. To me this is a very small change in Nova\u0027s behavior that _allows_ a better post-delete integration story but is a long (long) way from making Cyborg less useful. My view is that anyone that thinks this feature is enough to not need Cyborg probably did not need it enough to justify deploying it and would have just implemented some other hacky solution to their problem. I\u0027m happy to state that officially here if you think it would help.\n\nSean, I\u0027ve said it before but I\u0027ll codify it here for posterity: I definitely do _not_ think general device prep and cleaning is a good thing for nova to take in scope. We spent a bunch of time trying to get cyborg-\u003enova interaction nailed down and they are the right place to do it, IMHO. We can\u0027t even gracefully shut down our own RPC connections, so I definitely don\u0027t think that data-exposure-risk long-running cleaning orchestration is what we need :)","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"65f805065753a1d31f76fd61976d96b2a1494333","unresolved":true,"context_lines":[{"line_number":68,"context_line":"------------"},{"line_number":69,"context_line":""},{"line_number":70,"context_line":"One alternative is to do nothing and continue to operate as we do today. Nova"},{"line_number":71,"context_line":"does not provide any device cleaning ability, nor any real hooks or integration"},{"line_number":72,"context_line":"for operators desiring it."},{"line_number":73,"context_line":""},{"line_number":74,"context_line":"Another alternative is to say that this is in the scope of Cyborg, which is"},{"line_number":75,"context_line":"definitely a very valid take. The one-time-use-devices idea sits somewhere"}],"source_content_type":"text/x-rst","patch_set":2,"id":"a2911b3f_82c383f0","line":72,"range":{"start_line":71,"start_character":46,"end_line":72,"end_character":26},"in_reply_to":"c09aade6_5cba8dff","updated":"2025-03-06 12:56:12.000000000","message":"i think its within scope of nova to have a generic device attachment api and to be in the busness of managing device cleaning\n\nprecedent for this is we implemented it for intel persistent memory.\nhttps://specs.openstack.org/openstack/nova-specs/specs/train/implemented/virtual-persistent-memory.html#virtual-persistent-memory-disposal\nand that was done after cyborg exsited.\nthe code still exist to do secure erasere using the daxio tool\nbecause it managed in nova we can also supprot movign the data on cold migrate and qemu can do it for live migrate.\n\nif we wanted to have nova supprot device cleanup in a generic way, we could do it via a os-vif/os-brick style interface i.e. only two functions prepare_device() clean_device() and we do not need to make that plugable but we would have to make it configurable via the pci device spec. if we want to supprot cold migration we would need some form of copy capablity too if require for the device.\n\nthats assuming we did not want to have it in tree liek we have for intel persitnet memory.\n\ni think this is perfectly in the scope fo nova to continue to evolve in that direction. with that said i get why dan and other dont want nova to have to do this if another project exists that could do it.\n\nAt some point we will need to supprot CXL in openstack and we shoudl be supporting USB and direct block device already IMO\n\nThat does not mean it has to be in Nova.\nI think it could be in Nova and it would be good for the community to have it in nova but I can accept doing it in Cyborg or another project.\nHowever, if we go down that path we need to embrace it fully.\n\nmeaning this should be the last or one of the last new new extentions ot the pci pasthoguh furncitonliy in nova.\n\ncompleting pci in neutron placement is fine; thats just closing a party gap is not net new.\n\ni could see us also adding pci grouping\nhttps://specs.openstack.org/openstack/nova-specs/specs/2024.1/approved/pci-passthrough-groups.html\nor addign supprot for multi funtion pci device assignment.\n\nboth of those are really just makign our configuration more expressive to express constraits.\n\nbut if nvidia turns arond and says we are done with PCIE all new GPUS and VGPUs will be build on CXL going forward then that gets implemtned in cyborg.\n\nto date outside fo the fpga programming use case, there is nothing that you can do with cycborg that can\u0027t be done with nova pci passthrough and generic mdev support, at least in the context of a stateless device like accelerators.\n\nwhen we added the virtual persistent memory feature we diseign the database model exiplcity to supprot tracking new host assignable resouce so that we could add usb or cxl or simlary host device mangemetn like nvme device in the future as needed.\n\nI have always believed that Cyborg should have been a library like os-vif or os-brick and not a standalone project with device management done in nova.\n\ni dont disagree that extending nova to have more robust device manage ment compets with cyborg but that actully a healty thing. there is ment to be competion wihtin the openstack echosystrem btween projects liek horizon vs skyline or all of the insallers.\n\nas long as we allow cyborg to integrate when tehy want too an enable them to sucsseed its oke fo have basic supprot similar things in nova and let the market choose what to deploy.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"c938b8c369c4bb892208c5a72339c5bcd0b21e4c","unresolved":true,"context_lines":[{"line_number":88,"context_line":"There should be no data model impact if we use the existing PCI `dev_spec` to"},{"line_number":89,"context_line":"flag a device as `one_time_use\u003d(yes|no)`. This is a similar approach to the"},{"line_number":90,"context_line":"recent migrate-vfio-devices-using-kernel-variant-drivers spec which allows"},{"line_number":91,"context_line":"operators to flag them as `live_migratable\u003d(yes|no)`."},{"line_number":92,"context_line":""},{"line_number":93,"context_line":"REST API impact"},{"line_number":94,"context_line":"---------------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"bc4cb9e0_403fa0e5","line":91,"updated":"2025-03-05 23:26:48.000000000","message":"ack we proably will want to add the tag to the list of ignored tags for the pci pools so that we dont end up storin ghtis in the pci_stats which has the pci pools in the compute node table.\n\nthat a trivial thing to do, the tag can be sored in the pci_dic extra info column in the cell db just as it is for live_migratable\n\n\ngiven that a json blob there is no schdme change although we probaly will want to add a new propery to the pci_decice class to add a layer of indirection over the extra_info dict.\n\n\nwe do not have the pci devspec out side of the compute node and the deivce is not assined to the instance claim on the compute manager so we will need to defer the reservation of the deice in placment until at least that point, the placement allcoation can happen as it normally does in the conductor. that means if the instance claim fails for any reason or if we fail early enough in the compute manager we can avoid \"burning\" the pci dvice if it never got as far as being used by the vm.\n\nthis is not an object change as it does not change any of the filed in the obejct so ya there is effectily no versioned data model change.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"d0eb941a2405f41601d694b033939c23a8c51159","unresolved":false,"context_lines":[{"line_number":88,"context_line":"There should be no data model impact if we use the existing PCI `dev_spec` to"},{"line_number":89,"context_line":"flag a device as `one_time_use\u003d(yes|no)`. This is a similar approach to the"},{"line_number":90,"context_line":"recent migrate-vfio-devices-using-kernel-variant-drivers spec which allows"},{"line_number":91,"context_line":"operators to flag them as `live_migratable\u003d(yes|no)`."},{"line_number":92,"context_line":""},{"line_number":93,"context_line":"REST API impact"},{"line_number":94,"context_line":"---------------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"68d96d87_adaf0634","line":91,"in_reply_to":"0783e27d_990948cc","updated":"2025-03-06 18:20:16.000000000","message":"I\u0027m going to resolve this in favor of the discussion below.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"8b8919a01222a7e43151d2f1c75f354964794158","unresolved":true,"context_lines":[{"line_number":88,"context_line":"There should be no data model impact if we use the existing PCI `dev_spec` to"},{"line_number":89,"context_line":"flag a device as `one_time_use\u003d(yes|no)`. This is a similar approach to the"},{"line_number":90,"context_line":"recent migrate-vfio-devices-using-kernel-variant-drivers spec which allows"},{"line_number":91,"context_line":"operators to flag them as `live_migratable\u003d(yes|no)`."},{"line_number":92,"context_line":""},{"line_number":93,"context_line":"REST API impact"},{"line_number":94,"context_line":"---------------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"0783e27d_990948cc","line":91,"in_reply_to":"bc4cb9e0_403fa0e5","updated":"2025-03-06 15:03:51.000000000","message":"I was originally planning to do this in the compute manager around where we do the PCI tracker accounting, mostly for the reason you state (don\u0027t burn it until the last minute). Gibi had me convinced that the best place to do it was in the post-scheduler code in the conductor because of the other live-migration series. But, see below.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"c938b8c369c4bb892208c5a72339c5bcd0b21e4c","unresolved":true,"context_lines":[{"line_number":111,"context_line":"Other end user impact"},{"line_number":112,"context_line":"---------------------"},{"line_number":113,"context_line":""},{"line_number":114,"context_line":"None (invisible to users)."},{"line_number":115,"context_line":""},{"line_number":116,"context_line":"Performance Impact"},{"line_number":117,"context_line":"------------------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"ef45eda5_e23a899d","line":114,"updated":"2025-03-05 23:26:48.000000000","message":"seem my questions about life cycle impacts above ^ you might be right but\ni think it could have end user or operator impact depending on where we want to draw the line on what lifecycle operations are allowed in this case.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"b267dabfa818da33ff7c0bb79911269d7bcba0a3","unresolved":true,"context_lines":[{"line_number":111,"context_line":"Other end user impact"},{"line_number":112,"context_line":"---------------------"},{"line_number":113,"context_line":""},{"line_number":114,"context_line":"None (invisible to users)."},{"line_number":115,"context_line":""},{"line_number":116,"context_line":"Performance Impact"},{"line_number":117,"context_line":"------------------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"f34fb039_2384ba43","line":114,"in_reply_to":"5b01ef69_a4ee7ab4","updated":"2025-03-12 21:34:44.000000000","message":"In my view, \"end user\" !\u003d \"operator\" and I would say this has no visible impact to end users of the cloud but it does have impact to operators of the cloud, and operators are covered below in the \"Other deployer impact\" section (I consider \"operator\" \u003d\u003d \"deployer\").","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"354a7009bd3a384b0a88d1017b02203445a50844","unresolved":true,"context_lines":[{"line_number":111,"context_line":"Other end user impact"},{"line_number":112,"context_line":"---------------------"},{"line_number":113,"context_line":""},{"line_number":114,"context_line":"None (invisible to users)."},{"line_number":115,"context_line":""},{"line_number":116,"context_line":"Performance Impact"},{"line_number":117,"context_line":"------------------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"5b01ef69_a4ee7ab4","line":114,"in_reply_to":"e4f10cac_2c404af8","updated":"2025-03-07 09:30:34.000000000","message":"Yup, I don\u0027t see any user impact here, that will be fully transparent (rather like the `managed` flag too, since live_migratable can be a little confusing tag as we also have something related in the flavor)\n\nDan\u0027s proposal on lifecycle operations seems to me the behaviour that an user should expect, ie. a device being \u0027burned\u0027 if the instance moving to another host.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"8b8919a01222a7e43151d2f1c75f354964794158","unresolved":true,"context_lines":[{"line_number":111,"context_line":"Other end user impact"},{"line_number":112,"context_line":"---------------------"},{"line_number":113,"context_line":""},{"line_number":114,"context_line":"None (invisible to users)."},{"line_number":115,"context_line":""},{"line_number":116,"context_line":"Performance Impact"},{"line_number":117,"context_line":"------------------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"e4f10cac_2c404af8","line":114,"in_reply_to":"ef45eda5_e23a899d","updated":"2025-03-06 15:03:51.000000000","message":"I think the impact to users is still none, modulo bugs (i.e. our double accounting allocation swap) and the existing `live_migratable\u003dno` restriction that is unrelated to this.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"ebb2169cfab465e0b4ccd9549d15fe2f1af8d0be","unresolved":false,"context_lines":[{"line_number":111,"context_line":"Other end user impact"},{"line_number":112,"context_line":"---------------------"},{"line_number":113,"context_line":""},{"line_number":114,"context_line":"None (invisible to users)."},{"line_number":115,"context_line":""},{"line_number":116,"context_line":"Performance Impact"},{"line_number":117,"context_line":"------------------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"c739f074_e4fbf5a7","line":114,"in_reply_to":"f34fb039_2384ba43","updated":"2025-03-13 13:35:20.000000000","message":"Acknowledged","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"c938b8c369c4bb892208c5a72339c5bcd0b21e4c","unresolved":true,"context_lines":[{"line_number":119,"context_line":"This will involve a single additional call to placement to update the"},{"line_number":120,"context_line":"inventory after we allocate the device. This should be negligible in terms of"},{"line_number":121,"context_line":"performance impact, and the error handling will be identical to that of the"},{"line_number":122,"context_line":"case where we fail to do the allocation itself."},{"line_number":123,"context_line":""},{"line_number":124,"context_line":"Other deployer impact"},{"line_number":125,"context_line":"---------------------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"85c641ed_3402d160","line":122,"updated":"2025-03-05 23:26:48.000000000","message":"since your not proposing modifying the ailias this wont have any impact on the mechanics of the pci and numa filters as a one time use device is not directly schdulable\n\nif we add the trait then you can get that for free if someone really wanted as the pci aials supprot tratis.\n\n\nso i agree there should be almost no performace impact to this.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"8b8919a01222a7e43151d2f1c75f354964794158","unresolved":false,"context_lines":[{"line_number":119,"context_line":"This will involve a single additional call to placement to update the"},{"line_number":120,"context_line":"inventory after we allocate the device. This should be negligible in terms of"},{"line_number":121,"context_line":"performance impact, and the error handling will be identical to that of the"},{"line_number":122,"context_line":"case where we fail to do the allocation itself."},{"line_number":123,"context_line":""},{"line_number":124,"context_line":"Other deployer impact"},{"line_number":125,"context_line":"---------------------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"4a2d0b37_94ff4e0f","line":122,"in_reply_to":"85c641ed_3402d160","updated":"2025-03-06 15:03:51.000000000","message":"Acknowledged","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"c938b8c369c4bb892208c5a72339c5bcd0b21e4c","unresolved":true,"context_lines":[{"line_number":158,"context_line":"----------"},{"line_number":159,"context_line":""},{"line_number":160,"context_line":"* Parse `one_time_use` from `[pci]dev_spec` config"},{"line_number":161,"context_line":"* Add code to bump reserved count when we allocate the PCI device in placement"},{"line_number":162,"context_line":"* Add documentation and a sample cleanup listener script"},{"line_number":163,"context_line":""},{"line_number":164,"context_line":"Dependencies"}],"source_content_type":"text/x-rst","patch_set":2,"id":"42686642_988e2f17","line":161,"updated":"2025-03-05 23:26:48.000000000","message":"this cant happen until the instance claim in the compute manager.\n\nunless we want to finally move that to the conductor instead of doing it on the compute :) that a much bigger change tehn your propsoing so i suggest lets not do that in this spec.\n\nwe should defintly do that eventually... but that defintly out of scope.\n\n\nso up until this point in the instance claim we can still reject the host based on numa affintiy issues\n\nhttps://github.com/openstack/nova/blob/master/nova/compute/claims.py#L124-L170\n\nfor cold migration/resize the equivalent is \n\nhttps://github.com/openstack/nova/blob/master/nova/compute/claims.py#L209-L233\n\n\nfro sriov live migration it happens a littel difernlty but we can ignore that for now.\n\nthis feature is propsoed to work only with pci in placement\nwe do not suprpot that feautre for neutron ports today\nso as a result we are only talkign about passhtough devices request via the alisa.\n\n\nthe actual allcoation/claiming of the pci dvices actuly does not happen until a littel after this .\n\n\nwe create the claim object here and then do the actully claim in the db (allocating the speific pci device) here\n\nhttps://github.com/openstack/nova/blob/master/nova/compute/resource_tracker.py#L190-L205\n\nthen we mapp teh claimed resouce back to the allcoations form placmenet if approrate\n\nhttps://github.com/openstack/nova/blob/master/nova/compute/resource_tracker.py#L207-L208\n\n\nthat function is invoke as a context manager here\n\nhttps://github.com/openstack/nova/blob/master/nova/compute/manager.py#L2622-L2623\n\nright before we do the late (anti)affintiy check \n\nhttps://github.com/openstack/nova/blob/master/nova/compute/manager.py#L2627-L2628\n\ni would suggest doing the reservation in placment in the _build_resources\n\nwhich is where we are interacting with cyborg to get the bound arqs, and interact wiht neturon and cidner.\n\nso i woudl set reserved\u003d1 here\n\nhttps://github.com/openstack/nova/blob/master/nova/compute/manager.py#L2900-L2902\n\nright before we yeild and boot the vm.\n\nif we got that far basiclly the only way to \"burn hte device\" is if prot biding failed, or we got some sort of libvirt error.\n\nin which case the instance will go to error and we can treate it as if it has at least attpemted to boot.\n\nby doing it in _build_resources i think it woudl cover all lifecycle oeprations like unshlve or resize but you would have to verify that.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"65f805065753a1d31f76fd61976d96b2a1494333","unresolved":true,"context_lines":[{"line_number":158,"context_line":"----------"},{"line_number":159,"context_line":""},{"line_number":160,"context_line":"* Parse `one_time_use` from `[pci]dev_spec` config"},{"line_number":161,"context_line":"* Add code to bump reserved count when we allocate the PCI device in placement"},{"line_number":162,"context_line":"* Add documentation and a sample cleanup listener script"},{"line_number":163,"context_line":""},{"line_number":164,"context_line":"Dependencies"}],"source_content_type":"text/x-rst","patch_set":2,"id":"ffa0f5dd_3242c396","line":161,"in_reply_to":"3e5df84c_f1c4dc40","updated":"2025-03-06 12:56:12.000000000","message":"i was debating if we shoudl add a 4 state in the pci trackker\n\ncurrently we have aviable, claimed, and allocated. we could add \"used\" or some other sate for \"need operator intervation to use again\"\n\nthe two problems with that are visablity and how to signal that its cleaned.\n\nwe can really use nova manage for this (we technially could) becaues teh compute nodes are not expect ot have db access and if you are going ot do cleaning it a little odd ot have the thing that is doing the cleaning be remote form the compute.\n\nas a result i think pci in placement is a hard requirement for this as currently formulated\n\non the vf aspect, i commented on that above, but i wonder if we just end up not supproting VF in v1 or move to one RP per vf if it has the single use tag applied.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"be8ee3453674a6abcba336d82dbe74dc91945a15","unresolved":true,"context_lines":[{"line_number":158,"context_line":"----------"},{"line_number":159,"context_line":""},{"line_number":160,"context_line":"* Parse `one_time_use` from `[pci]dev_spec` config"},{"line_number":161,"context_line":"* Add code to bump reserved count when we allocate the PCI device in placement"},{"line_number":162,"context_line":"* Add documentation and a sample cleanup listener script"},{"line_number":163,"context_line":""},{"line_number":164,"context_line":"Dependencies"}],"source_content_type":"text/x-rst","patch_set":2,"id":"3e5df84c_f1c4dc40","line":161,"in_reply_to":"42686642_988e2f17","updated":"2025-03-06 09:43:08.000000000","message":"If we use PCI in Placement for the reservation (see my above comment about the issue with VFs), then it might make sense to mark the device as one time use in Placement as well with a trait. So then the allocation in Placement and the reservation in Placement does not need to happens separately, the reservation does not need to wait for the PCI claim in the compute manager but can happen right after the a_c is allocated, based on the trait on the RP being allocated.\n\nDefinitely it is a trade off about where we put the complexity:\n* a) do the reservation after the pci claim and therefore do it far from the place where the allocation happens, but limit the case when reservation needs to be rollbacked without cleanup as the device was never in use.\n* b) make the code more cohesive and do the reservation right after the allocation but implement some rollback logic\n\nIf we need to support on time use VFs then the whole point is moot as for that we need to do the reservation in the PCI tracker instead of only in placement so the PCI claim code will be affected anyhow.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"4d5443a77a8be9db0766d8898e4abd4bd8f3b604","unresolved":true,"context_lines":[{"line_number":158,"context_line":"----------"},{"line_number":159,"context_line":""},{"line_number":160,"context_line":"* Parse `one_time_use` from `[pci]dev_spec` config"},{"line_number":161,"context_line":"* Add code to bump reserved count when we allocate the PCI device in placement"},{"line_number":162,"context_line":"* Add documentation and a sample cleanup listener script"},{"line_number":163,"context_line":""},{"line_number":164,"context_line":"Dependencies"}],"source_content_type":"text/x-rst","patch_set":2,"id":"5c676565_6ac6f688","line":161,"in_reply_to":"4ffc2466_746395ac","updated":"2025-03-10 15:22:37.000000000","message":"We discussed over IRC and I looked at the proposed WIP implementation. Based on these I\u0027m OK to do the reservation after the pci claim.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"354a7009bd3a384b0a88d1017b02203445a50844","unresolved":true,"context_lines":[{"line_number":158,"context_line":"----------"},{"line_number":159,"context_line":""},{"line_number":160,"context_line":"* Parse `one_time_use` from `[pci]dev_spec` config"},{"line_number":161,"context_line":"* Add code to bump reserved count when we allocate the PCI device in placement"},{"line_number":162,"context_line":"* Add documentation and a sample cleanup listener script"},{"line_number":163,"context_line":""},{"line_number":164,"context_line":"Dependencies"}],"source_content_type":"text/x-rst","patch_set":2,"id":"4ffc2466_746395ac","line":161,"in_reply_to":"585617f5_30553e7d","updated":"2025-03-07 09:30:34.000000000","message":"After thinking hard, I\u0027d definitely prefer the latter solution (late reserve the device only when the manager claim is done) since it provides us more simplicity and less error handling in case of instance actions failures and the requested rollback mechanisms that we\u0027d need to implement.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"ea2dac9cb85d9aa71326bbc2f7464c24c7c0ea7e","unresolved":true,"context_lines":[{"line_number":158,"context_line":"----------"},{"line_number":159,"context_line":""},{"line_number":160,"context_line":"* Parse `one_time_use` from `[pci]dev_spec` config"},{"line_number":161,"context_line":"* Add code to bump reserved count when we allocate the PCI device in placement"},{"line_number":162,"context_line":"* Add documentation and a sample cleanup listener script"},{"line_number":163,"context_line":""},{"line_number":164,"context_line":"Dependencies"}],"source_content_type":"text/x-rst","patch_set":2,"id":"6302753c_ec63ec97","line":161,"in_reply_to":"5c676565_6ac6f688","updated":"2025-03-11 15:24:51.000000000","message":"Noted, thanks gibi for reviewing the WIP !","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c63bd3c99022e4b9f1205e6aafe31f557d07a5a0","unresolved":true,"context_lines":[{"line_number":158,"context_line":"----------"},{"line_number":159,"context_line":""},{"line_number":160,"context_line":"* Parse `one_time_use` from `[pci]dev_spec` config"},{"line_number":161,"context_line":"* Add code to bump reserved count when we allocate the PCI device in placement"},{"line_number":162,"context_line":"* Add documentation and a sample cleanup listener script"},{"line_number":163,"context_line":""},{"line_number":164,"context_line":"Dependencies"}],"source_content_type":"text/x-rst","patch_set":2,"id":"7763577f_9dd417c6","line":161,"in_reply_to":"6302753c_ec63ec97","updated":"2025-03-11 15:28:09.000000000","message":"Oops, I just realized I should update this work item to refer to the fact that we\u0027ll do it in the compute manager.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"6968c9f1d5f535bc1882afc00f570c31069b3bbc","unresolved":false,"context_lines":[{"line_number":158,"context_line":"----------"},{"line_number":159,"context_line":""},{"line_number":160,"context_line":"* Parse `one_time_use` from `[pci]dev_spec` config"},{"line_number":161,"context_line":"* Add code to bump reserved count when we allocate the PCI device in placement"},{"line_number":162,"context_line":"* Add documentation and a sample cleanup listener script"},{"line_number":163,"context_line":""},{"line_number":164,"context_line":"Dependencies"}],"source_content_type":"text/x-rst","patch_set":2,"id":"55d5b39d_e9053b7d","line":161,"in_reply_to":"7763577f_9dd417c6","updated":"2025-03-17 16:26:37.000000000","message":"Done","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"8b8919a01222a7e43151d2f1c75f354964794158","unresolved":true,"context_lines":[{"line_number":158,"context_line":"----------"},{"line_number":159,"context_line":""},{"line_number":160,"context_line":"* Parse `one_time_use` from `[pci]dev_spec` config"},{"line_number":161,"context_line":"* Add code to bump reserved count when we allocate the PCI device in placement"},{"line_number":162,"context_line":"* Add documentation and a sample cleanup listener script"},{"line_number":163,"context_line":""},{"line_number":164,"context_line":"Dependencies"}],"source_content_type":"text/x-rst","patch_set":2,"id":"585617f5_30553e7d","line":161,"in_reply_to":"ffa0f5dd_3242c396","updated":"2025-03-06 15:03:51.000000000","message":"I agree that PCI-in-placement is a hard requirement for this because it\u0027s visible and the operator has a well-defined interface to un-reserve the device.\n\nPersonally, I think closer to the PCI tracker is the right place to do this because I don\u0027t want to have to roll back the reservation *anywhere*. Doing so incorrectly (like in some code shared by create and rebuild or something) could inadvertently expose a device that was supposed to be held for cleaning. That should never happen.\n\nI think we either need to be prepared to burn devices if we fail between schedule and boot, or we need to do the reservation as late as possible.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"be8ee3453674a6abcba336d82dbe74dc91945a15","unresolved":true,"context_lines":[{"line_number":170,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":171,"context_line":""},{"line_number":172,"context_line":"This will be tested fully in unit/functional tests since it requires a real"},{"line_number":173,"context_line":"device to test with tempest."},{"line_number":174,"context_line":""},{"line_number":175,"context_line":"Documentation Impact"},{"line_number":176,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":2,"id":"d177bc71_3d03a071","line":173,"updated":"2025-03-06 09:43:08.000000000","message":"I happy to use a local devstack with emulated IGB SRIOV NICs to do the manual verification of this feature.\n\nEventually we will have IGB NIC available on the gate as well so we will be able to cover this in tempest. But is it definitely a separate story.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"d0eb941a2405f41601d694b033939c23a8c51159","unresolved":false,"context_lines":[{"line_number":170,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":171,"context_line":""},{"line_number":172,"context_line":"This will be tested fully in unit/functional tests since it requires a real"},{"line_number":173,"context_line":"device to test with tempest."},{"line_number":174,"context_line":""},{"line_number":175,"context_line":"Documentation Impact"},{"line_number":176,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":2,"id":"99102ddb_18135d54","line":173,"in_reply_to":"75574923_30818686","updated":"2025-03-06 18:20:16.000000000","message":"Done","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"8b8919a01222a7e43151d2f1c75f354964794158","unresolved":true,"context_lines":[{"line_number":170,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":171,"context_line":""},{"line_number":172,"context_line":"This will be tested fully in unit/functional tests since it requires a real"},{"line_number":173,"context_line":"device to test with tempest."},{"line_number":174,"context_line":""},{"line_number":175,"context_line":"Documentation Impact"},{"line_number":176,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":2,"id":"75574923_30818686","line":173,"in_reply_to":"adf6d250_90bb43e0","updated":"2025-03-06 15:03:51.000000000","message":"I\u0027m planning to test this locally too, so I definitely didn\u0027t mean to imply otherwise :)\n\nI\u0027ve got an igb device locally, but as Sean says, I think we actually only care about PF, and thus anything will work.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"65f805065753a1d31f76fd61976d96b2a1494333","unresolved":true,"context_lines":[{"line_number":170,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":171,"context_line":""},{"line_number":172,"context_line":"This will be tested fully in unit/functional tests since it requires a real"},{"line_number":173,"context_line":"device to test with tempest."},{"line_number":174,"context_line":""},{"line_number":175,"context_line":"Documentation Impact"},{"line_number":176,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":2,"id":"adf6d250_90bb43e0","line":173,"in_reply_to":"d177bc71_3d03a071","updated":"2025-03-06 12:56:12.000000000","message":"+1 to eventually using igb\n\nwe could also talk to infra/the cloud providers\n\ntehre are other way sto add \"pci device\" to a nova instnace\n\ni.e. cinder volumes attached as virtio-blk\nor just addtional neutorn ports.\n\nit may be possible to just add an addtional volume or port to a zuul vm in a pre playbook or via a nodeset that we could use as a stand in device for testing today..\n\nfor exampl;e the virtio-random number generator\n\nubuntu@sean-devstack-watcher-1:~$ lspci\n00:00.0 Host bridge: Intel Corporation 440FX - 82441FX PMC [Natoma] (rev 02)\n00:01.0 ISA bridge: Intel Corporation 82371SB PIIX3 ISA [Natoma/Triton II]\n00:01.1 IDE interface: Intel Corporation 82371SB PIIX3 IDE [Natoma/Triton II]\n00:01.2 USB controller: Intel Corporation 82371SB PIIX3 USB [Natoma/Triton II] (rev 01)\n00:01.3 Bridge: Intel Corporation 82371AB/EB/MB PIIX4 ACPI (rev 03)\n00:02.0 VGA compatible controller: Red Hat, Inc. Virtio 1.0 GPU (rev 01)\n00:03.0 Ethernet controller: Red Hat, Inc. Virtio network device\n00:04.0 SCSI storage controller: Red Hat, Inc. Virtio block device\n00:05.0 Unclassified device [00ff]: Red Hat, Inc. Virtio memory balloon\n00:06.0 Unclassified device [00ff]: Red Hat, Inc. Virtio RNG\n\nas long as we enable the viommu in the image used for the nodeset\ntesting type-pci or type-PF is probably doable today.\n\nigb would only be needed for type-vf\n\ni think we can investigate what options we have but we do not need to make having this work a gating requirement to merge.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"74b4698a4b52630332bde2bb2ae0c28bfd88694e","unresolved":true,"context_lines":[{"line_number":183,"context_line":"References"},{"line_number":184,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":185,"context_line":""},{"line_number":186,"context_line":"The mechanism for tagging devices is nearly identical to this recent effort:"},{"line_number":187,"context_line":""},{"line_number":188,"context_line":"https://specs.openstack.org/openstack/nova-specs/specs/2025.1/approved/migrate-vfio-devices-using-kernel-variant-drivers.html"},{"line_number":189,"context_line":""}],"source_content_type":"text/x-rst","patch_set":2,"id":"51ad0311_62e95a36","line":186,"range":{"start_line":186,"start_character":0,"end_line":186,"end_character":2},"updated":"2025-03-05 20:19:05.000000000","message":"so on that i wonder if we want to diverge very slightly\n\nwe could advertise teh inventory with a new trait HW_ONE_TIME_USE (i dont really care about the exact name) that just annotates the RP that it has this behavior.\n\n\nthat way if we look a the rps with this behvioer trivally and review the inventory to see if it has total\u003dreserved and no allcoation. then we know this device is nolonger used and need to be cleaned before it can be resused.\n\nwith that we could eaislly find these after the fact.\n\nthe state of if it has been used would be tracked by reserved as you proposed\nthe trait would just advertised that this is not a reusable device.\n\nwe could provide an example in script to discober this or provide a nova-manage command to list those but dont think this woudl be particalary hard to wrtie on your own.\n\nyou could list all resocue provider with the ONE_TIME_USE trait by passing it as a requried trait which i think you can do with osc.\nits defintly supported by the api\n\nhttps://docs.openstack.org/api-ref/placement/#list-resource-providers\nand then you would just have to loop over those and get each of the inventories which you could do with osc and a couple of lines of bash","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"c938b8c369c4bb892208c5a72339c5bcd0b21e4c","unresolved":true,"context_lines":[{"line_number":183,"context_line":"References"},{"line_number":184,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":185,"context_line":""},{"line_number":186,"context_line":"The mechanism for tagging devices is nearly identical to this recent effort:"},{"line_number":187,"context_line":""},{"line_number":188,"context_line":"https://specs.openstack.org/openstack/nova-specs/specs/2025.1/approved/migrate-vfio-devices-using-kernel-variant-drivers.html"},{"line_number":189,"context_line":""}],"source_content_type":"text/x-rst","patch_set":2,"id":"7ac95798_dd82c4f2","line":186,"range":{"start_line":186,"start_character":0,"end_line":186,"end_character":2},"in_reply_to":"28a58202_875586fb","updated":"2025-03-05 23:26:48.000000000","message":"yes, nova would set it on the RP related to the pci device spec entry if the tag is defiend in the device spec. the presence of the triat just meen one_time_use\u003dtrue is in the device spec and that nova will not automaticlly make it available to use again once its the allocation is removed.\n\nit has no bearing on if it has been used or is in need of cleaning its just saying this RP has this behavior.\n\nif its currently used is tracked by if there is an allocation\nif reserved\u003dtotal\u003d1 and there is no allcoation you know that it was once used\nand an operator or cron jobs need to recommision the device by doing whatever cleaning is required before they manulaly set reserved\u003d0 to signal that its done.\n\ni didnt actully check if passing a required trait to rp list is supproted in osc-placement but even if it was not since its in the api its not hard to add or do with just curl.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"55772565a03c82916c048de72d74063068a7fa9b","unresolved":true,"context_lines":[{"line_number":183,"context_line":"References"},{"line_number":184,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":185,"context_line":""},{"line_number":186,"context_line":"The mechanism for tagging devices is nearly identical to this recent effort:"},{"line_number":187,"context_line":""},{"line_number":188,"context_line":"https://specs.openstack.org/openstack/nova-specs/specs/2025.1/approved/migrate-vfio-devices-using-kernel-variant-drivers.html"},{"line_number":189,"context_line":""}],"source_content_type":"text/x-rst","patch_set":2,"id":"28a58202_875586fb","line":186,"range":{"start_line":186,"start_character":0,"end_line":186,"end_character":2},"in_reply_to":"51ad0311_62e95a36","updated":"2025-03-05 20:58:16.000000000","message":"with that we could eaislly find these after the fact.\n\u003e \n\u003e the state of if it has been used would be tracked by reserved as you proposed\n\u003e the trait would just advertised that this is not a reusable device.\n\nJust to be clear, you mean nova would set that trait on any RP that it is considering to be one-time-use, but *not* set and remove it according to the cleaning status, right? It would just be a way to find all the ones that could potentially be needing to be cleaned if reserved but not allocated? If so, that\u0027s fine, but I do not want to try to use both reserved\u003d1 and the trait to indicate that it needs to be cleaned, because of the potential for those getting out of sync like I mentioned.\n\n\u003e we could provide an example in script to discober this or provide a nova-manage command to list those but dont think this woudl be particalary hard to wrtie on your own.\n\u003e \n\u003e you could list all resocue provider with the ONE_TIME_USE trait by passing it as a requried trait which i think you can do with osc.\n\u003e its defintly supported by the api\n\u003e \n\u003e https://docs.openstack.org/api-ref/placement/#list-resource-providers\n\u003e and then you would just have to loop over those and get each of the inventories which you could do with osc and a couple of lines of bash\n\nYes, this would also make the sample script we provide even simpler, because it would be a polling-based solution which works but is not very efficient and leave the notifications-based approach as an \"exercise for the reader\" which would be nice.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"8b8919a01222a7e43151d2f1c75f354964794158","unresolved":false,"context_lines":[{"line_number":183,"context_line":"References"},{"line_number":184,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":185,"context_line":""},{"line_number":186,"context_line":"The mechanism for tagging devices is nearly identical to this recent effort:"},{"line_number":187,"context_line":""},{"line_number":188,"context_line":"https://specs.openstack.org/openstack/nova-specs/specs/2025.1/approved/migrate-vfio-devices-using-kernel-variant-drivers.html"},{"line_number":189,"context_line":""}],"source_content_type":"text/x-rst","patch_set":2,"id":"443c82c1_3c3fb01b","line":186,"range":{"start_line":186,"start_character":0,"end_line":186,"end_character":2},"in_reply_to":"7ac95798_dd82c4f2","updated":"2025-03-06 15:03:51.000000000","message":"Ack, I\u0027ll add this.","commit_id":"563380758ced38e8b989b8bce49b7e18deda299e"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"354a7009bd3a384b0a88d1017b02203445a50844","unresolved":true,"context_lines":[{"line_number":42,"context_line":"Proposed change"},{"line_number":43,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":44,"context_line":""},{"line_number":45,"context_line":"Nova gains a notion of a \"one time use\" device. That is a device where we will"},{"line_number":46,"context_line":"allocate it for a new instance only once. When that instance is deleted, the"},{"line_number":47,"context_line":"device will not be returned to an allocatable state automatically (as would"},{"line_number":48,"context_line":"normally happen) and instead remain in a reserved state until some action is"}],"source_content_type":"text/x-rst","patch_set":3,"id":"799c94a5_d1dd45f7","line":45,"range":{"start_line":45,"start_character":5,"end_line":45,"end_character":10},"updated":"2025-03-07 09:30:34.000000000","message":"femtonit: instead of present, could be future continuous)","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"ef77bc6fa7f49a922339cecfa293d76ce513ff10","unresolved":false,"context_lines":[{"line_number":42,"context_line":"Proposed change"},{"line_number":43,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":44,"context_line":""},{"line_number":45,"context_line":"Nova gains a notion of a \"one time use\" device. That is a device where we will"},{"line_number":46,"context_line":"allocate it for a new instance only once. When that instance is deleted, the"},{"line_number":47,"context_line":"device will not be returned to an allocatable state automatically (as would"},{"line_number":48,"context_line":"normally happen) and instead remain in a reserved state until some action is"}],"source_content_type":"text/x-rst","patch_set":3,"id":"8a7e1e31_ad263344","line":45,"range":{"start_line":45,"start_character":5,"end_line":45,"end_character":10},"in_reply_to":"799c94a5_d1dd45f7","updated":"2025-03-07 14:36:31.000000000","message":"Acknowledged","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"354a7009bd3a384b0a88d1017b02203445a50844","unresolved":false,"context_lines":[{"line_number":57,"context_line":"for the instance, it will follow up with a bump of the `reserved`` count. When"},{"line_number":58,"context_line":"we go to de-allocate the device, we will not touch the `reserved` count, thus"},{"line_number":59,"context_line":"leaving the resource provider for the device fully-reserved (and thus not"},{"line_number":60,"context_line":"allocatable)."},{"line_number":61,"context_line":""},{"line_number":62,"context_line":".. note::"},{"line_number":63,"context_line":""}],"source_content_type":"text/x-rst","patch_set":3,"id":"9acb26c3_ce3f1360","line":60,"updated":"2025-03-07 09:30:34.000000000","message":"definitely +1 to that idea.","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"354a7009bd3a384b0a88d1017b02203445a50844","unresolved":true,"context_lines":[{"line_number":67,"context_line":""},{"line_number":68,"context_line":"Through whatever workflow the operator decides, they can clean the device, and"},{"line_number":69,"context_line":"decrement the `reserved` count once they are ready for the device to rejoin"},{"line_number":70,"context_line":"the pool of allocatable devices again. This would likely be listening to"},{"line_number":71,"context_line":"notifications for deleted instances and scheduling such cleaning."},{"line_number":72,"context_line":""},{"line_number":73,"context_line":"We will also introduce a new trait (tentatively called `HW_ONE_TIME_USE`) that"},{"line_number":74,"context_line":"nova will add to resource providers that it is managing as one-time-use. This"}],"source_content_type":"text/x-rst","patch_set":3,"id":"bf51635b_cc745e38","line":71,"range":{"start_line":70,"start_character":39,"end_line":71,"end_character":65},"updated":"2025-03-07 09:30:34.000000000","message":"or this could be something else, like manually cleaning a disk or updating a firmware and then moving it back.","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"22c00398d4db0d464eafddfa0a1e34b6184cf1fa","unresolved":false,"context_lines":[{"line_number":67,"context_line":""},{"line_number":68,"context_line":"Through whatever workflow the operator decides, they can clean the device, and"},{"line_number":69,"context_line":"decrement the `reserved` count once they are ready for the device to rejoin"},{"line_number":70,"context_line":"the pool of allocatable devices again. This would likely be listening to"},{"line_number":71,"context_line":"notifications for deleted instances and scheduling such cleaning."},{"line_number":72,"context_line":""},{"line_number":73,"context_line":"We will also introduce a new trait (tentatively called `HW_ONE_TIME_USE`) that"},{"line_number":74,"context_line":"nova will add to resource providers that it is managing as one-time-use. This"}],"source_content_type":"text/x-rst","patch_set":3,"id":"7aa900b1_c9dd2f17","line":71,"range":{"start_line":70,"start_character":39,"end_line":71,"end_character":65},"in_reply_to":"0ecf09e9_b0115d94","updated":"2025-03-11 15:26:22.000000000","message":"Done","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"ef77bc6fa7f49a922339cecfa293d76ce513ff10","unresolved":true,"context_lines":[{"line_number":67,"context_line":""},{"line_number":68,"context_line":"Through whatever workflow the operator decides, they can clean the device, and"},{"line_number":69,"context_line":"decrement the `reserved` count once they are ready for the device to rejoin"},{"line_number":70,"context_line":"the pool of allocatable devices again. This would likely be listening to"},{"line_number":71,"context_line":"notifications for deleted instances and scheduling such cleaning."},{"line_number":72,"context_line":""},{"line_number":73,"context_line":"We will also introduce a new trait (tentatively called `HW_ONE_TIME_USE`) that"},{"line_number":74,"context_line":"nova will add to resource providers that it is managing as one-time-use. This"}],"source_content_type":"text/x-rst","patch_set":3,"id":"0ecf09e9_b0115d94","line":71,"range":{"start_line":70,"start_character":39,"end_line":71,"end_character":65},"in_reply_to":"bf51635b_cc745e38","updated":"2025-03-07 14:36:31.000000000","message":"I think I covered some of the other cases in the first paragraph, and I think the third use-case above covers any direct-passthrough storage right?","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"354a7009bd3a384b0a88d1017b02203445a50844","unresolved":false,"context_lines":[{"line_number":75,"context_line":"will make it easier for operators to survey all the device providers that are"},{"line_number":76,"context_line":"*potentially* in need of cleaning. This will not convey whether or not cleaning"},{"line_number":77,"context_line":"is required (which is signaled by total\u003d1,reserved\u003d1,count\u003d0) but rather that"},{"line_number":78,"context_line":"this device *may* need cleaning if the conditions are correct."},{"line_number":79,"context_line":""},{"line_number":80,"context_line":"Lifecycle Operations"},{"line_number":81,"context_line":"--------------------"}],"source_content_type":"text/x-rst","patch_set":3,"id":"e3e869e2_8b0635db","line":78,"updated":"2025-03-07 09:30:34.000000000","message":"yeah, would be simplier for operators or tools to know which devices need to be looked at (or for the operator to calculate the left capacity for all the devices)","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"354a7009bd3a384b0a88d1017b02203445a50844","unresolved":false,"context_lines":[{"line_number":86,"context_line":""},{"line_number":87,"context_line":"* Rebuild: The device can be re-used in place without any other action"},{"line_number":88,"context_line":"* Evacuate: The original device will be \"burned\" after the evacuation happens"},{"line_number":89,"context_line":" and a new device will be allocated on the new host."},{"line_number":90,"context_line":"* Cold migrate: The device on the source will be burned after the migration"},{"line_number":91,"context_line":" is confirmed and the \"holding allocation\" on the source is dropped. A new"},{"line_number":92,"context_line":" device will allocated on the destination, but state (i.e. data) will not"}],"source_content_type":"text/x-rst","patch_set":3,"id":"5fbf52e1_3cd0adb8","line":89,"updated":"2025-03-07 09:30:34.000000000","message":"I\u0027m OK with the proposal","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"354a7009bd3a384b0a88d1017b02203445a50844","unresolved":false,"context_lines":[{"line_number":90,"context_line":"* Cold migrate: The device on the source will be burned after the migration"},{"line_number":91,"context_line":" is confirmed and the \"holding allocation\" on the source is dropped. A new"},{"line_number":92,"context_line":" device will allocated on the destination, but state (i.e. data) will not"},{"line_number":93,"context_line":" be copied by Nova."},{"line_number":94,"context_line":"* Live migrate: If the device is already live migratable, then it will be be"},{"line_number":95,"context_line":" allowed, with the source device being \"burned\" after the operation completes."},{"line_number":96,"context_line":""}],"source_content_type":"text/x-rst","patch_set":3,"id":"a208f7f7_6389a922","line":93,"updated":"2025-03-07 09:30:34.000000000","message":"agreed too with the proposal","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"354a7009bd3a384b0a88d1017b02203445a50844","unresolved":false,"context_lines":[{"line_number":102,"context_line":" if we are technically over-subscribed, but placement does not allow this"},{"line_number":103,"context_line":" today. Until that is fixed, this will prevent cold migrate from working,"},{"line_number":104,"context_line":" which is probably fine as a first step. Note this is already broken for"},{"line_number":105,"context_line":" cold migrate in some cases and the same fix for that issue will apply here."},{"line_number":106,"context_line":""},{"line_number":107,"context_line":"Alternatives"},{"line_number":108,"context_line":"------------"}],"source_content_type":"text/x-rst","patch_set":3,"id":"764671a8_23c6ce3b","line":105,"updated":"2025-03-07 09:30:34.000000000","message":"sure, this sounds to me a related bug, not something like a depending bugfix for the implementation of this spec.","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"354a7009bd3a384b0a88d1017b02203445a50844","unresolved":true,"context_lines":[{"line_number":121,"context_line":"where support for their device does not exist in Cyborg, a simpler workflow"},{"line_number":122,"context_line":"would be easier to craft a homegrown solution. For an operator with bespoke"},{"line_number":123,"context_line":"(maybe scientific) hardware, requiring them to write a full Cyborg driver in"},{"line_number":124,"context_line":"order to call a shell script after each use is a big ask."},{"line_number":125,"context_line":""},{"line_number":126,"context_line":"Data model impact"},{"line_number":127,"context_line":"-----------------"}],"source_content_type":"text/x-rst","patch_set":3,"id":"442cd9ed_eadd6bd5","line":124,"updated":"2025-03-07 09:30:34.000000000","message":"yeah also Cyborg doesn\u0027t support all hardware for the moment, only a few specific NVMe disks or GPUs, so contributors would need to add support in Cyborg too.\nCompared to that, this is maybe a simplier case for operators to run a systemctl service that will do what they want whenever they want and unreserve the PCI device.","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"2580a7e616e05a1e05ae2b8c219259a99d8ab96e","unresolved":false,"context_lines":[{"line_number":121,"context_line":"where support for their device does not exist in Cyborg, a simpler workflow"},{"line_number":122,"context_line":"would be easier to craft a homegrown solution. For an operator with bespoke"},{"line_number":123,"context_line":"(maybe scientific) hardware, requiring them to write a full Cyborg driver in"},{"line_number":124,"context_line":"order to call a shell script after each use is a big ask."},{"line_number":125,"context_line":""},{"line_number":126,"context_line":"Data model impact"},{"line_number":127,"context_line":"-----------------"}],"source_content_type":"text/x-rst","patch_set":3,"id":"fd7ffb82_3f87466d","line":124,"in_reply_to":"3c296cad_e23759d7","updated":"2025-03-11 15:24:17.000000000","message":"Done","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"ef77bc6fa7f49a922339cecfa293d76ce513ff10","unresolved":true,"context_lines":[{"line_number":121,"context_line":"where support for their device does not exist in Cyborg, a simpler workflow"},{"line_number":122,"context_line":"would be easier to craft a homegrown solution. For an operator with bespoke"},{"line_number":123,"context_line":"(maybe scientific) hardware, requiring them to write a full Cyborg driver in"},{"line_number":124,"context_line":"order to call a shell script after each use is a big ask."},{"line_number":125,"context_line":""},{"line_number":126,"context_line":"Data model impact"},{"line_number":127,"context_line":"-----------------"}],"source_content_type":"text/x-rst","patch_set":3,"id":"3c296cad_e23759d7","line":124,"in_reply_to":"442cd9ed_eadd6bd5","updated":"2025-03-07 14:36:31.000000000","message":"Yeah, this is the \"very lightweight but not very comprehensive\" alternative. If you want full device prep, slicing, and cleanup, cyborg is it, I think.","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"354a7009bd3a384b0a88d1017b02203445a50844","unresolved":false,"context_lines":[{"line_number":129,"context_line":"There should be no data model impact if we use the existing PCI `dev_spec` to"},{"line_number":130,"context_line":"flag a device as `one_time_use\u003d(yes|no)`. This is a similar approach to the"},{"line_number":131,"context_line":"recent migrate-vfio-devices-using-kernel-variant-drivers spec which allows"},{"line_number":132,"context_line":"operators to flag them as `live_migratable\u003d(yes|no)`."},{"line_number":133,"context_line":""},{"line_number":134,"context_line":"REST API impact"},{"line_number":135,"context_line":"---------------"}],"source_content_type":"text/x-rst","patch_set":3,"id":"c4fe42d9_edf1149b","line":132,"updated":"2025-03-07 09:30:34.000000000","message":"correct, we\u0027ll just add a new key-value in an already stored JSON blob as TEXT here https://github.com/openstack/nova/blob/276685b3db6e8f2ad59c33bc254461c255700ff8/nova/db/main/models.py#L1168\n\nEventually the pci devices will be tagged when matching the device spec with that new tag.","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"354a7009bd3a384b0a88d1017b02203445a50844","unresolved":true,"context_lines":[{"line_number":158,"context_line":"------------------"},{"line_number":159,"context_line":""},{"line_number":160,"context_line":"This will involve a single additional call to placement to update the"},{"line_number":161,"context_line":"inventory after we allocate the device. This should be negligible in terms of"},{"line_number":162,"context_line":"performance impact, and the error handling will be identical to that of the"},{"line_number":163,"context_line":"case where we fail to do the allocation itself."},{"line_number":164,"context_line":""},{"line_number":165,"context_line":"Other deployer impact"}],"source_content_type":"text/x-rst","patch_set":3,"id":"0830e83e_75920c35","line":162,"range":{"start_line":161,"start_character":40,"end_line":162,"end_character":18},"updated":"2025-03-07 09:30:34.000000000","message":"literally equal to zero for each of the nodes that don\u0027t have OTU flagged devices.","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"ef77bc6fa7f49a922339cecfa293d76ce513ff10","unresolved":false,"context_lines":[{"line_number":158,"context_line":"------------------"},{"line_number":159,"context_line":""},{"line_number":160,"context_line":"This will involve a single additional call to placement to update the"},{"line_number":161,"context_line":"inventory after we allocate the device. This should be negligible in terms of"},{"line_number":162,"context_line":"performance impact, and the error handling will be identical to that of the"},{"line_number":163,"context_line":"case where we fail to do the allocation itself."},{"line_number":164,"context_line":""},{"line_number":165,"context_line":"Other deployer impact"}],"source_content_type":"text/x-rst","patch_set":3,"id":"588f1dd9_f0d03b03","line":162,"range":{"start_line":161,"start_character":40,"end_line":162,"end_character":18},"in_reply_to":"0830e83e_75920c35","updated":"2025-03-07 14:36:31.000000000","message":"Acknowledged","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"354a7009bd3a384b0a88d1017b02203445a50844","unresolved":true,"context_lines":[{"line_number":160,"context_line":"This will involve a single additional call to placement to update the"},{"line_number":161,"context_line":"inventory after we allocate the device. This should be negligible in terms of"},{"line_number":162,"context_line":"performance impact, and the error handling will be identical to that of the"},{"line_number":163,"context_line":"case where we fail to do the allocation itself."},{"line_number":164,"context_line":""},{"line_number":165,"context_line":"Other deployer impact"},{"line_number":166,"context_line":"---------------------"}],"source_content_type":"text/x-rst","patch_set":3,"id":"ef411d1d_515ac6fc","line":163,"updated":"2025-03-07 09:30:34.000000000","message":"there could also be a capacity management problem if operators aren\u0027t able to consume the reserved devices and put them back, but that seems an acceptable risk as opt-in.","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"2580a7e616e05a1e05ae2b8c219259a99d8ab96e","unresolved":false,"context_lines":[{"line_number":160,"context_line":"This will involve a single additional call to placement to update the"},{"line_number":161,"context_line":"inventory after we allocate the device. This should be negligible in terms of"},{"line_number":162,"context_line":"performance impact, and the error handling will be identical to that of the"},{"line_number":163,"context_line":"case where we fail to do the allocation itself."},{"line_number":164,"context_line":""},{"line_number":165,"context_line":"Other deployer impact"},{"line_number":166,"context_line":"---------------------"}],"source_content_type":"text/x-rst","patch_set":3,"id":"c6e34900_5c14c4fb","line":163,"in_reply_to":"330c6e6e_d388ed82","updated":"2025-03-11 15:24:17.000000000","message":"Done","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"ef77bc6fa7f49a922339cecfa293d76ce513ff10","unresolved":true,"context_lines":[{"line_number":160,"context_line":"This will involve a single additional call to placement to update the"},{"line_number":161,"context_line":"inventory after we allocate the device. This should be negligible in terms of"},{"line_number":162,"context_line":"performance impact, and the error handling will be identical to that of the"},{"line_number":163,"context_line":"case where we fail to do the allocation itself."},{"line_number":164,"context_line":""},{"line_number":165,"context_line":"Other deployer impact"},{"line_number":166,"context_line":"---------------------"}],"source_content_type":"text/x-rst","patch_set":3,"id":"330c6e6e_d388ed82","line":163,"in_reply_to":"ef411d1d_515ac6fc","updated":"2025-03-07 14:36:31.000000000","message":"Well, that\u0027s what they\u0027re opting into here. But if anything, the capacity going to zero means their cleaning script(s) fell over :)","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"354a7009bd3a384b0a88d1017b02203445a50844","unresolved":false,"context_lines":[{"line_number":168,"context_line":"Deployers who do not wish to use this feature will not be impacted. Those"},{"line_number":169,"context_line":"that do will be able to enable this via config for their PCI devices and"},{"line_number":170,"context_line":"write their own external integrations based on the assumption that devices"},{"line_number":171,"context_line":"will remain reserved after allocation."},{"line_number":172,"context_line":""},{"line_number":173,"context_line":"Developer impact"},{"line_number":174,"context_line":"----------------"}],"source_content_type":"text/x-rst","patch_set":3,"id":"252b3dd0_542132ba","line":171,"updated":"2025-03-07 09:30:34.000000000","message":"purely opt-in indeed.","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"354a7009bd3a384b0a88d1017b02203445a50844","unresolved":true,"context_lines":[{"line_number":210,"context_line":""},{"line_number":211,"context_line":"This has a soft dependency on a fix to Placement that allows swapping an"},{"line_number":212,"context_line":"allocation while over-subscribed. While not strictly required, fixing this"},{"line_number":213,"context_line":"long-standing issue will enable cold migration of one-time-use devices."},{"line_number":214,"context_line":""},{"line_number":215,"context_line":"Testing"},{"line_number":216,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":3,"id":"a7d2df9d_cc758a8b","line":213,"updated":"2025-03-07 09:30:34.000000000","message":"as I said, sounds to me related but not really dependent (ie. can be merged in parallel)","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"ef77bc6fa7f49a922339cecfa293d76ce513ff10","unresolved":false,"context_lines":[{"line_number":210,"context_line":""},{"line_number":211,"context_line":"This has a soft dependency on a fix to Placement that allows swapping an"},{"line_number":212,"context_line":"allocation while over-subscribed. While not strictly required, fixing this"},{"line_number":213,"context_line":"long-standing issue will enable cold migration of one-time-use devices."},{"line_number":214,"context_line":""},{"line_number":215,"context_line":"Testing"},{"line_number":216,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":3,"id":"c188a196_c8ce909d","line":213,"in_reply_to":"a7d2df9d_cc758a8b","updated":"2025-03-07 14:36:31.000000000","message":"Acknowledged","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"354a7009bd3a384b0a88d1017b02203445a50844","unresolved":true,"context_lines":[{"line_number":216,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":217,"context_line":""},{"line_number":218,"context_line":"This will be tested fully in unit/functional tests since it requires a real"},{"line_number":219,"context_line":"device to test with tempest."},{"line_number":220,"context_line":""},{"line_number":221,"context_line":"One-off testing with real devices will be performed locally during review and"},{"line_number":222,"context_line":"submission."}],"source_content_type":"text/x-rst","patch_set":3,"id":"cbb57221_8dd9124d","line":219,"updated":"2025-03-07 09:30:34.000000000","message":"gibi would say I think we could use igb devices for faking this in Tempest.","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"2580a7e616e05a1e05ae2b8c219259a99d8ab96e","unresolved":false,"context_lines":[{"line_number":216,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":217,"context_line":""},{"line_number":218,"context_line":"This will be tested fully in unit/functional tests since it requires a real"},{"line_number":219,"context_line":"device to test with tempest."},{"line_number":220,"context_line":""},{"line_number":221,"context_line":"One-off testing with real devices will be performed locally during review and"},{"line_number":222,"context_line":"submission."}],"source_content_type":"text/x-rst","patch_set":3,"id":"9ea3fdbf_7e0de172","line":219,"in_reply_to":"7d3dc7b9_7413b8a9","updated":"2025-03-11 15:24:17.000000000","message":"let\u0027s make this not a dependency then","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"4d9c1bb213bb0c5a52fb6f06f0e1fb8c936d51ec","unresolved":true,"context_lines":[{"line_number":216,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":217,"context_line":""},{"line_number":218,"context_line":"This will be tested fully in unit/functional tests since it requires a real"},{"line_number":219,"context_line":"device to test with tempest."},{"line_number":220,"context_line":""},{"line_number":221,"context_line":"One-off testing with real devices will be performed locally during review and"},{"line_number":222,"context_line":"submission."}],"source_content_type":"text/x-rst","patch_set":3,"id":"7d3dc7b9_7413b8a9","line":219,"in_reply_to":"9e434654_f584ad36","updated":"2025-03-07 15:04:15.000000000","message":"in nova yes in our ci providers not yet\n\nwe can precreate some test for this in tempest or whitebox in general but we can commit to runignthem in any kind fo first party ci in the shrot term.\n\nonce we get some ci proviedres upgraded to 2025.1 we can have that converstaion with the infra teams and the ci cloud providers about create a node set that will use an image with igb and vioummu for virtualised sriov testing.\n\ni dont exepct that to happen until late in the 2025.2 cycle.","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"ef77bc6fa7f49a922339cecfa293d76ce513ff10","unresolved":true,"context_lines":[{"line_number":216,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":217,"context_line":""},{"line_number":218,"context_line":"This will be tested fully in unit/functional tests since it requires a real"},{"line_number":219,"context_line":"device to test with tempest."},{"line_number":220,"context_line":""},{"line_number":221,"context_line":"One-off testing with real devices will be performed locally during review and"},{"line_number":222,"context_line":"submission."}],"source_content_type":"text/x-rst","patch_set":3,"id":"9e434654_f584ad36","line":219,"in_reply_to":"cbb57221_8dd9124d","updated":"2025-03-07 14:36:31.000000000","message":"He said \"eventually\".. Is that available now?","commit_id":"3fd6e1bc6fe55b4fd744a9cc37f4c8de8eff1260"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"22c00398d4db0d464eafddfa0a1e34b6184cf1fa","unresolved":false,"context_lines":[{"line_number":99,"context_line":"calls `_update()` synchronously after the local claims are completed. It should"},{"line_number":100,"context_line":"be noted that a move of an instance with a one-time-use device will \"burn\" the"},{"line_number":101,"context_line":"device on the destination as soon as it starts running there (i.e. when it"},{"line_number":102,"context_line":"reaches the verify state) and a revert will not \"un-burn\" it."},{"line_number":103,"context_line":""},{"line_number":104,"context_line":""},{"line_number":105,"context_line":"Lifecycle Operations"}],"source_content_type":"text/x-rst","patch_set":4,"id":"5e56231c_66340e2b","line":102,"updated":"2025-03-11 15:26:22.000000000","message":"cool with the implementation proposal, yes to do the call to Placement by update() method.","commit_id":"99beb3c1de9ac76753cf26ad19abf927b689d80a"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"b267dabfa818da33ff7c0bb79911269d7bcba0a3","unresolved":true,"context_lines":[{"line_number":63,"context_line":""},{"line_number":64,"context_line":" This is expected to be used for PCI-in-placement and PF devices only due to"},{"line_number":65,"context_line":" the one-to-one resource provider accounting. A future change could enable"},{"line_number":66,"context_line":" this foe VFs through another mechanism if we determine a need."},{"line_number":67,"context_line":""},{"line_number":68,"context_line":"Through whatever workflow the operator decides, they can clean the device, and"},{"line_number":69,"context_line":"decrement the `reserved` count once they are ready for the device to rejoin"}],"source_content_type":"text/x-rst","patch_set":5,"id":"5e9a8122_7e30e35e","line":66,"range":{"start_line":66,"start_character":7,"end_line":66,"end_character":10},"updated":"2025-03-12 21:34:44.000000000","message":"for","commit_id":"a33243fe620ddc255a2a89fc7a485fa88ffbf830"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"ebb2169cfab465e0b4ccd9549d15fe2f1af8d0be","unresolved":true,"context_lines":[{"line_number":63,"context_line":""},{"line_number":64,"context_line":" This is expected to be used for PCI-in-placement and PF devices only due to"},{"line_number":65,"context_line":" the one-to-one resource provider accounting. A future change could enable"},{"line_number":66,"context_line":" this foe VFs through another mechanism if we determine a need."},{"line_number":67,"context_line":""},{"line_number":68,"context_line":"Through whatever workflow the operator decides, they can clean the device, and"},{"line_number":69,"context_line":"decrement the `reserved` count once they are ready for the device to rejoin"}],"source_content_type":"text/x-rst","patch_set":5,"id":"cbf98ede_d0f00eaf","line":66,"range":{"start_line":66,"start_character":7,"end_line":66,"end_character":10},"in_reply_to":"5e9a8122_7e30e35e","updated":"2025-03-13 13:35:20.000000000","message":"No, see, in the epic battle for bus dominance, it\u0027s the PFs against the VFs, each fighting to the death against their mortal foe...\n\nEr, yeah, thanks :)","commit_id":"a33243fe620ddc255a2a89fc7a485fa88ffbf830"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"fadf1286b254524dfb227b4cb9e61c60d39f9eb2","unresolved":false,"context_lines":[{"line_number":63,"context_line":""},{"line_number":64,"context_line":" This is expected to be used for PCI-in-placement and PF devices only due to"},{"line_number":65,"context_line":" the one-to-one resource provider accounting. A future change could enable"},{"line_number":66,"context_line":" this foe VFs through another mechanism if we determine a need."},{"line_number":67,"context_line":""},{"line_number":68,"context_line":"Through whatever workflow the operator decides, they can clean the device, and"},{"line_number":69,"context_line":"decrement the `reserved` count once they are ready for the device to rejoin"}],"source_content_type":"text/x-rst","patch_set":5,"id":"d2367413_4a16b45d","line":66,"range":{"start_line":66,"start_character":7,"end_line":66,"end_character":10},"in_reply_to":"cbf98ede_d0f00eaf","updated":"2025-03-14 18:07:49.000000000","message":"Done","commit_id":"a33243fe620ddc255a2a89fc7a485fa88ffbf830"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"b267dabfa818da33ff7c0bb79911269d7bcba0a3","unresolved":true,"context_lines":[{"line_number":68,"context_line":"Through whatever workflow the operator decides, they can clean the device, and"},{"line_number":69,"context_line":"decrement the `reserved` count once they are ready for the device to rejoin"},{"line_number":70,"context_line":"the pool of allocatable devices again. This would likely be listening to"},{"line_number":71,"context_line":"notifications for deleted instances and scheduling such cleaning."},{"line_number":72,"context_line":""},{"line_number":73,"context_line":"We will also introduce a new trait (tentatively called `HW_ONE_TIME_USE`) that"},{"line_number":74,"context_line":"nova will add to resource providers that it is managing as one-time-use. This"}],"source_content_type":"text/x-rst","patch_set":5,"id":"c4308266_6d142ea3","line":71,"updated":"2025-03-12 21:34:44.000000000","message":"I think another nice thing is in the event of missed notifications (rabbitmq trouble, etc) the operator could have something like a cron job running that checks Placement API for whether reserved one-time-use devices still have a live instance and if not, do the needed cleaning and release of the device.\n\nJust noting that this model enables operators to add their own additional resiliency if they want, which is good.","commit_id":"a33243fe620ddc255a2a89fc7a485fa88ffbf830"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"ebb2169cfab465e0b4ccd9549d15fe2f1af8d0be","unresolved":false,"context_lines":[{"line_number":68,"context_line":"Through whatever workflow the operator decides, they can clean the device, and"},{"line_number":69,"context_line":"decrement the `reserved` count once they are ready for the device to rejoin"},{"line_number":70,"context_line":"the pool of allocatable devices again. This would likely be listening to"},{"line_number":71,"context_line":"notifications for deleted instances and scheduling such cleaning."},{"line_number":72,"context_line":""},{"line_number":73,"context_line":"We will also introduce a new trait (tentatively called `HW_ONE_TIME_USE`) that"},{"line_number":74,"context_line":"nova will add to resource providers that it is managing as one-time-use. This"}],"source_content_type":"text/x-rst","patch_set":5,"id":"8fbefc43_265cadb9","line":71,"in_reply_to":"c4308266_6d142ea3","updated":"2025-03-13 13:35:20.000000000","message":"Yep this both allows for lazy and imperfect cleaning as well as lazy healing if something gets marked as one-time-use after the instance is already built.","commit_id":"a33243fe620ddc255a2a89fc7a485fa88ffbf830"},{"author":{"_account_id":4690,"name":"melanie witt","display_name":"melwitt","email":"melwittt@gmail.com","username":"melwitt"},"change_message_id":"b267dabfa818da33ff7c0bb79911269d7bcba0a3","unresolved":true,"context_lines":[{"line_number":166,"context_line":""},{"line_number":167,"context_line":"No direct security impact, although it will theoretically allow operators to"},{"line_number":168,"context_line":"improve security of device-passthrough workloads by sanitizing or"},{"line_number":169,"context_line":"re-initializing their devices between uses."},{"line_number":170,"context_line":""},{"line_number":171,"context_line":"Notifications impact"},{"line_number":172,"context_line":"--------------------"}],"source_content_type":"text/x-rst","patch_set":5,"id":"17e21306_64b4a112","line":169,"updated":"2025-03-12 21:34:44.000000000","message":"+1 I think this is a significant benefit.","commit_id":"a33243fe620ddc255a2a89fc7a485fa88ffbf830"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"ebb2169cfab465e0b4ccd9549d15fe2f1af8d0be","unresolved":false,"context_lines":[{"line_number":166,"context_line":""},{"line_number":167,"context_line":"No direct security impact, although it will theoretically allow operators to"},{"line_number":168,"context_line":"improve security of device-passthrough workloads by sanitizing or"},{"line_number":169,"context_line":"re-initializing their devices between uses."},{"line_number":170,"context_line":""},{"line_number":171,"context_line":"Notifications impact"},{"line_number":172,"context_line":"--------------------"}],"source_content_type":"text/x-rst","patch_set":5,"id":"b43e1bb1_f4150087","line":169,"in_reply_to":"17e21306_64b4a112","updated":"2025-03-13 13:35:20.000000000","message":"Another use-case I thought of yesterday is an operator that wants to check the write-wear-indicator on an NVMe after each customer, before handing it to another. They do wear out and the impact of having one die while PCI-attached to an instance could be a bad day for everyone. Being able to clean it and also check the \"should I replace this or let someone else put their data on it?\" before returning it to the pool would be nice.","commit_id":"a33243fe620ddc255a2a89fc7a485fa88ffbf830"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"fadf1286b254524dfb227b4cb9e61c60d39f9eb2","unresolved":true,"context_lines":[{"line_number":40,"context_line":""},{"line_number":41,"context_line":"As a cloud operator, I want to check the write-wear indicator on my passthrough"},{"line_number":42,"context_line":"NVMe devices after each user to avoid returning devices over the safety"},{"line_number":43,"context_line":"threshold to be allocated."},{"line_number":44,"context_line":""},{"line_number":45,"context_line":""},{"line_number":46,"context_line":"Proposed change"}],"source_content_type":"text/x-rst","patch_set":6,"id":"dca0b2a2_965fe7a9","line":43,"updated":"2025-03-14 18:07:49.000000000","message":"this is not very compelling to me becasue nova only supprot statelest device partly for reasons like that. stateless device can ware out too but at a much longer time frame. \na similar case would be an fpga, they actully have a maxium programing cycle too in general and evenutly start failign after many cycles. i dont knwo if manufacutres actully advertise what that is butits a thing that happens.\n\ni can see how marking an nvme deivce as one time use can help with that so im fine with keeping it as a motivating usecase as long as we are not going to claim we actually support pass-through of stateful devices in our official docs.\n\nto me this feature is just allowing a checkpoint where an external tool can intospect the device and determin if it ready for use or prepare it for use before it returned. ironic actually has a cleaning fphase for this type of thing but i get we don\u0027t want to add that to nova.\n\n\nthat is the compromies that i think is reasonable we allow marking things as one time use but done make claims to supprot stateful device in docs.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"47ec13b03075b46b0a0a9dd43e08f16f4eddbada","unresolved":true,"context_lines":[{"line_number":40,"context_line":""},{"line_number":41,"context_line":"As a cloud operator, I want to check the write-wear indicator on my passthrough"},{"line_number":42,"context_line":"NVMe devices after each user to avoid returning devices over the safety"},{"line_number":43,"context_line":"threshold to be allocated."},{"line_number":44,"context_line":""},{"line_number":45,"context_line":""},{"line_number":46,"context_line":"Proposed change"}],"source_content_type":"text/x-rst","patch_set":6,"id":"e1619cfa_7d101b87","line":43,"in_reply_to":"dca0b2a2_965fe7a9","updated":"2025-03-17 16:34:42.000000000","message":"well, this is just an use case, not sure whether we need to discuss a lot about it.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"a17e4b26731c6479f0af71f202510473fe656813","unresolved":false,"context_lines":[{"line_number":40,"context_line":""},{"line_number":41,"context_line":"As a cloud operator, I want to check the write-wear indicator on my passthrough"},{"line_number":42,"context_line":"NVMe devices after each user to avoid returning devices over the safety"},{"line_number":43,"context_line":"threshold to be allocated."},{"line_number":44,"context_line":""},{"line_number":45,"context_line":""},{"line_number":46,"context_line":"Proposed change"}],"source_content_type":"text/x-rst","patch_set":6,"id":"8da770ff_38e9550c","line":43,"in_reply_to":"e1619cfa_7d101b87","updated":"2025-03-17 16:50:07.000000000","message":"Acknowledged","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"fadf1286b254524dfb227b4cb9e61c60d39f9eb2","unresolved":true,"context_lines":[{"line_number":54,"context_line":"such a device is not re-allocatable (until cleaned) is a potentially very"},{"line_number":55,"context_line":"security-sensitive step that can not be missed, and it makes sense for Nova"},{"line_number":56,"context_line":"to do this itself, even though it will not take on the actual task of doing"},{"line_number":57,"context_line":"any device cleaning."},{"line_number":58,"context_line":""},{"line_number":59,"context_line":"The annotation mechanism here will utilize the `reserved` inventory count,"},{"line_number":60,"context_line":"on top of PCI-in-placement. Basically, when Nova goes to allocate the device"}],"source_content_type":"text/x-rst","patch_set":6,"id":"2315f378_95df6706","line":57,"updated":"2025-03-14 18:07:49.000000000","message":"+1 for calling out the security element.\n\neven for a stateless device, there may be cases wehre your security requiremnt would need the first usecase (ensuring the firmware is a knwo good verion) for nics or any other passthrough device.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"47ec13b03075b46b0a0a9dd43e08f16f4eddbada","unresolved":false,"context_lines":[{"line_number":54,"context_line":"such a device is not re-allocatable (until cleaned) is a potentially very"},{"line_number":55,"context_line":"security-sensitive step that can not be missed, and it makes sense for Nova"},{"line_number":56,"context_line":"to do this itself, even though it will not take on the actual task of doing"},{"line_number":57,"context_line":"any device cleaning."},{"line_number":58,"context_line":""},{"line_number":59,"context_line":"The annotation mechanism here will utilize the `reserved` inventory count,"},{"line_number":60,"context_line":"on top of PCI-in-placement. Basically, when Nova goes to allocate the device"}],"source_content_type":"text/x-rst","patch_set":6,"id":"acb86fa5_c84f847b","line":57,"in_reply_to":"2315f378_95df6706","updated":"2025-03-17 16:34:42.000000000","message":"Acknowledged","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"9534616a86770c959955aa55a4596114970f0452","unresolved":true,"context_lines":[{"line_number":74,"context_line":"the pool of allocatable devices again. This would likely be listening to"},{"line_number":75,"context_line":"notifications for deleted instances and scheduling such cleaning."},{"line_number":76,"context_line":""},{"line_number":77,"context_line":"We will also introduce a new trait (tentatively called `HW_ONE_TIME_USE`) that"},{"line_number":78,"context_line":"nova will add to resource providers that it is managing as one-time-use. This"},{"line_number":79,"context_line":"will make it easier for operators to survey all the device providers that are"},{"line_number":80,"context_line":"*potentially* in need of cleaning. This will not convey whether or not cleaning"}],"source_content_type":"text/x-rst","patch_set":6,"id":"a0c99c31_e4b10ceb","line":77,"range":{"start_line":77,"start_character":56,"end_line":77,"end_character":71},"updated":"2025-03-17 16:16:12.000000000","message":"I suggest HW_PCI_ONE_TIME_USE if we are focusing on PCI devices only but it is OK to hash out the name during the impl","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"a17e4b26731c6479f0af71f202510473fe656813","unresolved":false,"context_lines":[{"line_number":74,"context_line":"the pool of allocatable devices again. This would likely be listening to"},{"line_number":75,"context_line":"notifications for deleted instances and scheduling such cleaning."},{"line_number":76,"context_line":""},{"line_number":77,"context_line":"We will also introduce a new trait (tentatively called `HW_ONE_TIME_USE`) that"},{"line_number":78,"context_line":"nova will add to resource providers that it is managing as one-time-use. This"},{"line_number":79,"context_line":"will make it easier for operators to survey all the device providers that are"},{"line_number":80,"context_line":"*potentially* in need of cleaning. This will not convey whether or not cleaning"}],"source_content_type":"text/x-rst","patch_set":6,"id":"9b18d6cf_a841c856","line":77,"range":{"start_line":77,"start_character":56,"end_line":77,"end_character":71},"in_reply_to":"198f9c10_ad0adebe","updated":"2025-03-17 16:50:07.000000000","message":"Acknowledged","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"550290f2dc55973cc3f67c9a0ea91cb840bab933","unresolved":false,"context_lines":[{"line_number":74,"context_line":"the pool of allocatable devices again. This would likely be listening to"},{"line_number":75,"context_line":"notifications for deleted instances and scheduling such cleaning."},{"line_number":76,"context_line":""},{"line_number":77,"context_line":"We will also introduce a new trait (tentatively called `HW_ONE_TIME_USE`) that"},{"line_number":78,"context_line":"nova will add to resource providers that it is managing as one-time-use. This"},{"line_number":79,"context_line":"will make it easier for operators to survey all the device providers that are"},{"line_number":80,"context_line":"*potentially* in need of cleaning. This will not convey whether or not cleaning"}],"source_content_type":"text/x-rst","patch_set":6,"id":"d44a14bb_c8f07524","line":77,"range":{"start_line":77,"start_character":56,"end_line":77,"end_character":71},"in_reply_to":"9b18d6cf_a841c856","updated":"2025-03-17 17:10:45.000000000","message":"Marking other devices OTU might make sense in some cases, but the current marking with a trait on the RP only works because PCI PFs are represented as RPs in 1:1 mapping. The trait is on the RP not on the PCI inventory. As soon as the inventory has more than one resource (i.e. in case of VFs) or the RP has more than one inventory (i.e. VCPU an PCPU) the trait cannot be used as a mark for the resource. So while having a generic trait is not a problem per se, I don\u0027t see a wide future usage of it outside of PCI.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"6968c9f1d5f535bc1882afc00f570c31069b3bbc","unresolved":true,"context_lines":[{"line_number":74,"context_line":"the pool of allocatable devices again. This would likely be listening to"},{"line_number":75,"context_line":"notifications for deleted instances and scheduling such cleaning."},{"line_number":76,"context_line":""},{"line_number":77,"context_line":"We will also introduce a new trait (tentatively called `HW_ONE_TIME_USE`) that"},{"line_number":78,"context_line":"nova will add to resource providers that it is managing as one-time-use. This"},{"line_number":79,"context_line":"will make it easier for operators to survey all the device providers that are"},{"line_number":80,"context_line":"*potentially* in need of cleaning. This will not convey whether or not cleaning"}],"source_content_type":"text/x-rst","patch_set":6,"id":"e746a762_3d1c3589","line":77,"range":{"start_line":77,"start_character":56,"end_line":77,"end_character":71},"in_reply_to":"a0c99c31_e4b10ceb","updated":"2025-03-17 16:26:37.000000000","message":"Well, I just didn\u0027t see any reason why we needed to scope it to PCI as we could potentially have other ways to mark non-PCI devices as OTU in the future. But sure, the implementation patches are up (as is the trait add) so we can continue discussion there.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"47ec13b03075b46b0a0a9dd43e08f16f4eddbada","unresolved":true,"context_lines":[{"line_number":74,"context_line":"the pool of allocatable devices again. This would likely be listening to"},{"line_number":75,"context_line":"notifications for deleted instances and scheduling such cleaning."},{"line_number":76,"context_line":""},{"line_number":77,"context_line":"We will also introduce a new trait (tentatively called `HW_ONE_TIME_USE`) that"},{"line_number":78,"context_line":"nova will add to resource providers that it is managing as one-time-use. This"},{"line_number":79,"context_line":"will make it easier for operators to survey all the device providers that are"},{"line_number":80,"context_line":"*potentially* in need of cleaning. This will not convey whether or not cleaning"}],"source_content_type":"text/x-rst","patch_set":6,"id":"198f9c10_ad0adebe","line":77,"range":{"start_line":77,"start_character":56,"end_line":77,"end_character":71},"in_reply_to":"e746a762_3d1c3589","updated":"2025-03-17 16:34:42.000000000","message":"looks to me this is can be an implementation question 😊","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"9534616a86770c959955aa55a4596114970f0452","unresolved":true,"context_lines":[{"line_number":78,"context_line":"nova will add to resource providers that it is managing as one-time-use. This"},{"line_number":79,"context_line":"will make it easier for operators to survey all the device providers that are"},{"line_number":80,"context_line":"*potentially* in need of cleaning. This will not convey whether or not cleaning"},{"line_number":81,"context_line":"is required (which is signaled by total\u003d1,reserved\u003d1,count\u003d0) but rather that"},{"line_number":82,"context_line":"this device *may* need cleaning if the conditions are correct."},{"line_number":83,"context_line":""},{"line_number":84,"context_line":"Implementation"}],"source_content_type":"text/x-rst","patch_set":6,"id":"4476f1e1_43cfe5bc","line":81,"range":{"start_line":81,"start_character":53,"end_line":81,"end_character":58},"updated":"2025-03-17 16:16:12.000000000","message":"nit: used\u003d0","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"a17e4b26731c6479f0af71f202510473fe656813","unresolved":false,"context_lines":[{"line_number":78,"context_line":"nova will add to resource providers that it is managing as one-time-use. This"},{"line_number":79,"context_line":"will make it easier for operators to survey all the device providers that are"},{"line_number":80,"context_line":"*potentially* in need of cleaning. This will not convey whether or not cleaning"},{"line_number":81,"context_line":"is required (which is signaled by total\u003d1,reserved\u003d1,count\u003d0) but rather that"},{"line_number":82,"context_line":"this device *may* need cleaning if the conditions are correct."},{"line_number":83,"context_line":""},{"line_number":84,"context_line":"Implementation"}],"source_content_type":"text/x-rst","patch_set":6,"id":"e77b424e_ff52ae19","line":81,"range":{"start_line":81,"start_character":53,"end_line":81,"end_character":58},"in_reply_to":"00c91e32_e23a1e2e","updated":"2025-03-17 16:50:07.000000000","message":"Done","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"47ec13b03075b46b0a0a9dd43e08f16f4eddbada","unresolved":true,"context_lines":[{"line_number":78,"context_line":"nova will add to resource providers that it is managing as one-time-use. This"},{"line_number":79,"context_line":"will make it easier for operators to survey all the device providers that are"},{"line_number":80,"context_line":"*potentially* in need of cleaning. This will not convey whether or not cleaning"},{"line_number":81,"context_line":"is required (which is signaled by total\u003d1,reserved\u003d1,count\u003d0) but rather that"},{"line_number":82,"context_line":"this device *may* need cleaning if the conditions are correct."},{"line_number":83,"context_line":""},{"line_number":84,"context_line":"Implementation"}],"source_content_type":"text/x-rst","patch_set":6,"id":"f012c2b5_f6c39c6d","line":81,"range":{"start_line":81,"start_character":53,"end_line":81,"end_character":58},"in_reply_to":"00c91e32_e23a1e2e","updated":"2025-03-17 16:34:42.000000000","message":"can you modify that by a FUP ?","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"6968c9f1d5f535bc1882afc00f570c31069b3bbc","unresolved":true,"context_lines":[{"line_number":78,"context_line":"nova will add to resource providers that it is managing as one-time-use. This"},{"line_number":79,"context_line":"will make it easier for operators to survey all the device providers that are"},{"line_number":80,"context_line":"*potentially* in need of cleaning. This will not convey whether or not cleaning"},{"line_number":81,"context_line":"is required (which is signaled by total\u003d1,reserved\u003d1,count\u003d0) but rather that"},{"line_number":82,"context_line":"this device *may* need cleaning if the conditions are correct."},{"line_number":83,"context_line":""},{"line_number":84,"context_line":"Implementation"}],"source_content_type":"text/x-rst","patch_set":6,"id":"00c91e32_e23a1e2e","line":81,"range":{"start_line":81,"start_character":53,"end_line":81,"end_character":58},"in_reply_to":"4476f1e1_43cfe5bc","updated":"2025-03-17 16:26:37.000000000","message":"Indeed, thanks.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"fadf1286b254524dfb227b4cb9e61c60d39f9eb2","unresolved":true,"context_lines":[{"line_number":79,"context_line":"will make it easier for operators to survey all the device providers that are"},{"line_number":80,"context_line":"*potentially* in need of cleaning. This will not convey whether or not cleaning"},{"line_number":81,"context_line":"is required (which is signaled by total\u003d1,reserved\u003d1,count\u003d0) but rather that"},{"line_number":82,"context_line":"this device *may* need cleaning if the conditions are correct."},{"line_number":83,"context_line":""},{"line_number":84,"context_line":"Implementation"},{"line_number":85,"context_line":"--------------"}],"source_content_type":"text/x-rst","patch_set":6,"id":"4e60387b_67021736","line":82,"updated":"2025-03-14 18:07:49.000000000","message":"+1 thanks for adding this.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"47ec13b03075b46b0a0a9dd43e08f16f4eddbada","unresolved":false,"context_lines":[{"line_number":79,"context_line":"will make it easier for operators to survey all the device providers that are"},{"line_number":80,"context_line":"*potentially* in need of cleaning. This will not convey whether or not cleaning"},{"line_number":81,"context_line":"is required (which is signaled by total\u003d1,reserved\u003d1,count\u003d0) but rather that"},{"line_number":82,"context_line":"this device *may* need cleaning if the conditions are correct."},{"line_number":83,"context_line":""},{"line_number":84,"context_line":"Implementation"},{"line_number":85,"context_line":"--------------"}],"source_content_type":"text/x-rst","patch_set":6,"id":"c4af2047_9be24dc8","line":82,"in_reply_to":"4e60387b_67021736","updated":"2025-03-17 16:34:42.000000000","message":"Acknowledged","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"9534616a86770c959955aa55a4596114970f0452","unresolved":true,"context_lines":[{"line_number":114,"context_line":"cases are discussed below:"},{"line_number":115,"context_line":""},{"line_number":116,"context_line":"* Rebuild: The device can be re-used in place without any other action"},{"line_number":117,"context_line":"* Evacuate: The original device will be \"burned\" after the evacuation happens"},{"line_number":118,"context_line":" and a new device will be allocated on the new host."},{"line_number":119,"context_line":"* Cold migrate: The device on the source will be burned after the migration"},{"line_number":120,"context_line":" is confirmed and the \"holding allocation\" on the source is dropped. A new"}],"source_content_type":"text/x-rst","patch_set":6,"id":"2b27df15_45017c5f","line":117,"range":{"start_line":117,"start_character":12,"end_line":117,"end_character":77},"updated":"2025-03-17 16:16:12.000000000","message":"no, the original device\u0027s one-time-use is burned right after the original allocation happened, waaay before any future evacuation. The new device\u0027s otu is burned as soon as the evacuation allocated the device on the target compute.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"6968c9f1d5f535bc1882afc00f570c31069b3bbc","unresolved":true,"context_lines":[{"line_number":114,"context_line":"cases are discussed below:"},{"line_number":115,"context_line":""},{"line_number":116,"context_line":"* Rebuild: The device can be re-used in place without any other action"},{"line_number":117,"context_line":"* Evacuate: The original device will be \"burned\" after the evacuation happens"},{"line_number":118,"context_line":" and a new device will be allocated on the new host."},{"line_number":119,"context_line":"* Cold migrate: The device on the source will be burned after the migration"},{"line_number":120,"context_line":" is confirmed and the \"holding allocation\" on the source is dropped. A new"}],"source_content_type":"text/x-rst","patch_set":6,"id":"88237f20_a262c288","line":117,"range":{"start_line":117,"start_character":12,"end_line":117,"end_character":77},"in_reply_to":"2b27df15_45017c5f","updated":"2025-03-17 16:26:37.000000000","message":"Heh yeah, sorry. What I meant here is that the original device will already be burned and the evacuation will burn a new one once it\u0027s allocated on the host. But, now that you point it out I see why this sounds different.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"47ec13b03075b46b0a0a9dd43e08f16f4eddbada","unresolved":true,"context_lines":[{"line_number":114,"context_line":"cases are discussed below:"},{"line_number":115,"context_line":""},{"line_number":116,"context_line":"* Rebuild: The device can be re-used in place without any other action"},{"line_number":117,"context_line":"* Evacuate: The original device will be \"burned\" after the evacuation happens"},{"line_number":118,"context_line":" and a new device will be allocated on the new host."},{"line_number":119,"context_line":"* Cold migrate: The device on the source will be burned after the migration"},{"line_number":120,"context_line":" is confirmed and the \"holding allocation\" on the source is dropped. A new"}],"source_content_type":"text/x-rst","patch_set":6,"id":"fcba3177_4402f083","line":117,"range":{"start_line":117,"start_character":12,"end_line":117,"end_character":77},"in_reply_to":"88237f20_a262c288","updated":"2025-03-17 16:34:42.000000000","message":"well, I understood what Dan was saying : when evacuating, the original device was burned and we\u0027ll reserve another one of course as a target.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"a17e4b26731c6479f0af71f202510473fe656813","unresolved":false,"context_lines":[{"line_number":114,"context_line":"cases are discussed below:"},{"line_number":115,"context_line":""},{"line_number":116,"context_line":"* Rebuild: The device can be re-used in place without any other action"},{"line_number":117,"context_line":"* Evacuate: The original device will be \"burned\" after the evacuation happens"},{"line_number":118,"context_line":" and a new device will be allocated on the new host."},{"line_number":119,"context_line":"* Cold migrate: The device on the source will be burned after the migration"},{"line_number":120,"context_line":" is confirmed and the \"holding allocation\" on the source is dropped. A new"}],"source_content_type":"text/x-rst","patch_set":6,"id":"456865ab_ec3a004d","line":117,"range":{"start_line":117,"start_character":12,"end_line":117,"end_character":77},"in_reply_to":"fcba3177_4402f083","updated":"2025-03-17 16:50:07.000000000","message":"Done","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"9534616a86770c959955aa55a4596114970f0452","unresolved":true,"context_lines":[{"line_number":116,"context_line":"* Rebuild: The device can be re-used in place without any other action"},{"line_number":117,"context_line":"* Evacuate: The original device will be \"burned\" after the evacuation happens"},{"line_number":118,"context_line":" and a new device will be allocated on the new host."},{"line_number":119,"context_line":"* Cold migrate: The device on the source will be burned after the migration"},{"line_number":120,"context_line":" is confirmed and the \"holding allocation\" on the source is dropped. A new"},{"line_number":121,"context_line":" device will allocated on the destination, but state (i.e. data) will not"},{"line_number":122,"context_line":" be copied by Nova."},{"line_number":123,"context_line":"* Live migrate: If the device is already live migratable, then it will be be"}],"source_content_type":"text/x-rst","patch_set":6,"id":"9159ecc2_5f337b38","line":120,"range":{"start_line":119,"start_character":16,"end_line":120,"end_character":14},"updated":"2025-03-17 16:16:12.000000000","message":"not the device\u0027s otu is already burned when the VM is created waay before the move. Do we have some terminology issue here (and in evac)? I see that define burning as:\n\n\u003e The reservation of a device (i.e. \"burning\" its one-time-use) will happen\n\nso based on that definition the burning happens at original allocation not at the start of the move operation.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"6968c9f1d5f535bc1882afc00f570c31069b3bbc","unresolved":true,"context_lines":[{"line_number":116,"context_line":"* Rebuild: The device can be re-used in place without any other action"},{"line_number":117,"context_line":"* Evacuate: The original device will be \"burned\" after the evacuation happens"},{"line_number":118,"context_line":" and a new device will be allocated on the new host."},{"line_number":119,"context_line":"* Cold migrate: The device on the source will be burned after the migration"},{"line_number":120,"context_line":" is confirmed and the \"holding allocation\" on the source is dropped. A new"},{"line_number":121,"context_line":" device will allocated on the destination, but state (i.e. data) will not"},{"line_number":122,"context_line":" be copied by Nova."},{"line_number":123,"context_line":"* Live migrate: If the device is already live migratable, then it will be be"}],"source_content_type":"text/x-rst","patch_set":6,"id":"c431c408_a0e0722d","line":120,"range":{"start_line":119,"start_character":16,"end_line":120,"end_character":14},"in_reply_to":"9159ecc2_5f337b38","updated":"2025-03-17 16:26:37.000000000","message":"Same here, I just need to be clearer with my wording. I think we all know what this is _supposed_ to do.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"47ec13b03075b46b0a0a9dd43e08f16f4eddbada","unresolved":true,"context_lines":[{"line_number":116,"context_line":"* Rebuild: The device can be re-used in place without any other action"},{"line_number":117,"context_line":"* Evacuate: The original device will be \"burned\" after the evacuation happens"},{"line_number":118,"context_line":" and a new device will be allocated on the new host."},{"line_number":119,"context_line":"* Cold migrate: The device on the source will be burned after the migration"},{"line_number":120,"context_line":" is confirmed and the \"holding allocation\" on the source is dropped. A new"},{"line_number":121,"context_line":" device will allocated on the destination, but state (i.e. data) will not"},{"line_number":122,"context_line":" be copied by Nova."},{"line_number":123,"context_line":"* Live migrate: If the device is already live migratable, then it will be be"}],"source_content_type":"text/x-rst","patch_set":6,"id":"ffde4259_19c26ed0","line":120,"range":{"start_line":119,"start_character":16,"end_line":120,"end_character":14},"in_reply_to":"c431c408_a0e0722d","updated":"2025-03-17 16:34:42.000000000","message":"honestly, we can discuss that in a FUP.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"a17e4b26731c6479f0af71f202510473fe656813","unresolved":false,"context_lines":[{"line_number":116,"context_line":"* Rebuild: The device can be re-used in place without any other action"},{"line_number":117,"context_line":"* Evacuate: The original device will be \"burned\" after the evacuation happens"},{"line_number":118,"context_line":" and a new device will be allocated on the new host."},{"line_number":119,"context_line":"* Cold migrate: The device on the source will be burned after the migration"},{"line_number":120,"context_line":" is confirmed and the \"holding allocation\" on the source is dropped. A new"},{"line_number":121,"context_line":" device will allocated on the destination, but state (i.e. data) will not"},{"line_number":122,"context_line":" be copied by Nova."},{"line_number":123,"context_line":"* Live migrate: If the device is already live migratable, then it will be be"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf3d3b8f_d43b51b3","line":120,"range":{"start_line":119,"start_character":16,"end_line":120,"end_character":14},"in_reply_to":"ffde4259_19c26ed0","updated":"2025-03-17 16:50:07.000000000","message":"Done","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"9534616a86770c959955aa55a4596114970f0452","unresolved":true,"context_lines":[{"line_number":121,"context_line":" device will allocated on the destination, but state (i.e. data) will not"},{"line_number":122,"context_line":" be copied by Nova."},{"line_number":123,"context_line":"* Live migrate: If the device is already live migratable, then it will be be"},{"line_number":124,"context_line":" allowed, with the source device being \"burned\" after the operation completes."},{"line_number":125,"context_line":""},{"line_number":126,"context_line":".. note::"},{"line_number":127,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"fad42130_7b54233f","line":124,"range":{"start_line":124,"start_character":11,"end_line":124,"end_character":71},"updated":"2025-03-17 16:16:12.000000000","message":"ditto","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"a17e4b26731c6479f0af71f202510473fe656813","unresolved":false,"context_lines":[{"line_number":121,"context_line":" device will allocated on the destination, but state (i.e. data) will not"},{"line_number":122,"context_line":" be copied by Nova."},{"line_number":123,"context_line":"* Live migrate: If the device is already live migratable, then it will be be"},{"line_number":124,"context_line":" allowed, with the source device being \"burned\" after the operation completes."},{"line_number":125,"context_line":""},{"line_number":126,"context_line":".. note::"},{"line_number":127,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"c50276a1_15e00304","line":124,"range":{"start_line":124,"start_character":11,"end_line":124,"end_character":71},"in_reply_to":"105c59f5_8643a41f","updated":"2025-03-17 16:50:07.000000000","message":"Done","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"47ec13b03075b46b0a0a9dd43e08f16f4eddbada","unresolved":true,"context_lines":[{"line_number":121,"context_line":" device will allocated on the destination, but state (i.e. data) will not"},{"line_number":122,"context_line":" be copied by Nova."},{"line_number":123,"context_line":"* Live migrate: If the device is already live migratable, then it will be be"},{"line_number":124,"context_line":" allowed, with the source device being \"burned\" after the operation completes."},{"line_number":125,"context_line":""},{"line_number":126,"context_line":".. note::"},{"line_number":127,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"105c59f5_8643a41f","line":124,"range":{"start_line":124,"start_character":11,"end_line":124,"end_character":71},"in_reply_to":"fad42130_7b54233f","updated":"2025-03-17 16:34:42.000000000","message":"maybe we should explain what \u0027burned\u0027 means but I was able to understand this when reading this spec, maybe because I was already knowing what Dan was wanting to do.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"fadf1286b254524dfb227b4cb9e61c60d39f9eb2","unresolved":true,"context_lines":[{"line_number":122,"context_line":" be copied by Nova."},{"line_number":123,"context_line":"* Live migrate: If the device is already live migratable, then it will be be"},{"line_number":124,"context_line":" allowed, with the source device being \"burned\" after the operation completes."},{"line_number":125,"context_line":""},{"line_number":126,"context_line":".. note::"},{"line_number":127,"context_line":""},{"line_number":128,"context_line":" We will need a change in placement to allow our swap allocation operation to"}],"source_content_type":"text/x-rst","patch_set":6,"id":"aa355bfe_5508e9b4","line":125,"updated":"2025-03-14 18:07:49.000000000","message":"so the above is logically true but because of the curernt placement limiation in the callout below some of the move operations will fail as the souce rp will be considerd over subscriped.\n\ni dont know if we want to mention shelve but obviously the device will be bruned when we shelve_offload and a new device will be allocated on unshleve form a shelve offloaded state.\n\nresize is the same as cold migrate with the caveat that it only applies if the souce or dest flavor uses a one time use device but that kind of implied.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"9534616a86770c959955aa55a4596114970f0452","unresolved":true,"context_lines":[{"line_number":126,"context_line":".. note::"},{"line_number":127,"context_line":""},{"line_number":128,"context_line":" We will need a change in placement to allow our swap allocation operation to"},{"line_number":129,"context_line":" not fail due to the resource provider being over-subscribed. It is reasonable"},{"line_number":130,"context_line":" to expect to be able to swap one allocation for an equally-sized one, even"},{"line_number":131,"context_line":" if we are technically over-subscribed, but placement does not allow this"},{"line_number":132,"context_line":" today. Until that is fixed, this will prevent cold migrate from working,"}],"source_content_type":"text/x-rst","patch_set":6,"id":"4ffef12d_1af10e4f","line":129,"range":{"start_line":129,"start_character":21,"end_line":129,"end_character":62},"updated":"2025-03-17 16:16:12.000000000","message":"nit: resource inventory is either over-subscribed, or both allocated and reserved at the same time.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"6968c9f1d5f535bc1882afc00f570c31069b3bbc","unresolved":true,"context_lines":[{"line_number":126,"context_line":".. note::"},{"line_number":127,"context_line":""},{"line_number":128,"context_line":" We will need a change in placement to allow our swap allocation operation to"},{"line_number":129,"context_line":" not fail due to the resource provider being over-subscribed. It is reasonable"},{"line_number":130,"context_line":" to expect to be able to swap one allocation for an equally-sized one, even"},{"line_number":131,"context_line":" if we are technically over-subscribed, but placement does not allow this"},{"line_number":132,"context_line":" today. Until that is fixed, this will prevent cold migrate from working,"}],"source_content_type":"text/x-rst","patch_set":6,"id":"faa9e676_3a9ca5c6","line":129,"range":{"start_line":129,"start_character":21,"end_line":129,"end_character":62},"in_reply_to":"4ffef12d_1af10e4f","updated":"2025-03-17 16:26:37.000000000","message":"Are you saying you don\u0027t want to use \"over-subscribed\" to mean \"reserved\u003d1,used\u003d1,total\u003d1\" ? To me, that\u0027s over-subscribed and I think that\u0027s how Sean had referred to it as well.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"42919a2934634958be609c901df27b64f9345ecc","unresolved":false,"context_lines":[{"line_number":126,"context_line":".. note::"},{"line_number":127,"context_line":""},{"line_number":128,"context_line":" We will need a change in placement to allow our swap allocation operation to"},{"line_number":129,"context_line":" not fail due to the resource provider being over-subscribed. It is reasonable"},{"line_number":130,"context_line":" to expect to be able to swap one allocation for an equally-sized one, even"},{"line_number":131,"context_line":" if we are technically over-subscribed, but placement does not allow this"},{"line_number":132,"context_line":" today. Until that is fixed, this will prevent cold migrate from working,"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bbb2af5e_c562f803","line":129,"range":{"start_line":129,"start_character":21,"end_line":129,"end_character":62},"in_reply_to":"a761a331_f9db9032","updated":"2025-03-17 18:00:49.000000000","message":"Had a high-bandwidth chat with gibi about this and I think we\u0027re on the same page now. I\u0027ll rephrase this to cover the actual problem of \"progressing towards safety\" for over-subscribed providers and we\u0027ll leave any discussion about the allocation-ratio semantics part of the existing bug to a separate effort.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"333a6da325ff70161e322ca7a222a4fedea4c079","unresolved":true,"context_lines":[{"line_number":126,"context_line":".. note::"},{"line_number":127,"context_line":""},{"line_number":128,"context_line":" We will need a change in placement to allow our swap allocation operation to"},{"line_number":129,"context_line":" not fail due to the resource provider being over-subscribed. It is reasonable"},{"line_number":130,"context_line":" to expect to be able to swap one allocation for an equally-sized one, even"},{"line_number":131,"context_line":" if we are technically over-subscribed, but placement does not allow this"},{"line_number":132,"context_line":" today. Until that is fixed, this will prevent cold migrate from working,"}],"source_content_type":"text/x-rst","patch_set":6,"id":"a761a331_f9db9032","line":129,"range":{"start_line":129,"start_character":21,"end_line":129,"end_character":62},"in_reply_to":"abf97be4_62125eaf","updated":"2025-03-17 17:14:33.000000000","message":"\u003e * I see two different meaning of \"over-subscribed\" so I would like to clarify that we mean the first but not the second.\n\u003e 1. total\u003d1,used\u003d1,reserved\u003d1\n\u003e 2. total\u003d1,used\u003d2,reserved\u003d0 (this can happen after e.g. an allocation ratio decrease for VCPU from 2.0 to 1.0)\n\nBoth of these are over-subscribed to me. I think of \"reserved\" as meaning \"allocation for the system\" and so if used+reserved\u003etotal then you\u0027re over-subscribed.\n\nI don\u0027t think we need to argue about it and if everyone else thinks we should separate the use of over-subscribed to only mean the allocation part then that\u0027s fine, but that\u0027s definitely not how I think about it.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"550290f2dc55973cc3f67c9a0ea91cb840bab933","unresolved":true,"context_lines":[{"line_number":126,"context_line":".. note::"},{"line_number":127,"context_line":""},{"line_number":128,"context_line":" We will need a change in placement to allow our swap allocation operation to"},{"line_number":129,"context_line":" not fail due to the resource provider being over-subscribed. It is reasonable"},{"line_number":130,"context_line":" to expect to be able to swap one allocation for an equally-sized one, even"},{"line_number":131,"context_line":" if we are technically over-subscribed, but placement does not allow this"},{"line_number":132,"context_line":" today. Until that is fixed, this will prevent cold migrate from working,"}],"source_content_type":"text/x-rst","patch_set":6,"id":"abf97be4_62125eaf","line":129,"range":{"start_line":129,"start_character":21,"end_line":129,"end_character":62},"in_reply_to":"faa9e676_3a9ca5c6","updated":"2025-03-17 17:10:45.000000000","message":"I suggest two things:\n* user resource inventory instead of resource provider in the sentence. In this case it happens to be the same granularity as we have one PCI PF inventory per RP, but in general not the RP becomes over-subscribed just a given resource inventory on it.\n* I see two different meaning of \"over-subscribed\" so I would like to clarify that we mean the first but not the second.\n 1. total\u003d1,used\u003d1,reserved\u003d1\n 2. total\u003d1,used\u003d2,reserved\u003d0 (this can happen after e.g. an allocation ratio decrease for VCPU from 2.0 to 1.0)","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"9534616a86770c959955aa55a4596114970f0452","unresolved":true,"context_lines":[{"line_number":129,"context_line":" not fail due to the resource provider being over-subscribed. It is reasonable"},{"line_number":130,"context_line":" to expect to be able to swap one allocation for an equally-sized one, even"},{"line_number":131,"context_line":" if we are technically over-subscribed, but placement does not allow this"},{"line_number":132,"context_line":" today. Until that is fixed, this will prevent cold migrate from working,"},{"line_number":133,"context_line":" which is probably fine as a first step. Note this is already broken for"},{"line_number":134,"context_line":" cold migrate in some cases and the same fix for that issue will apply here."},{"line_number":135,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"22820153_d83c2e70","line":132,"range":{"start_line":132,"start_character":30,"end_line":132,"end_character":73},"updated":"2025-03-17 16:16:12.000000000","message":"this will prevent cold migration, resize, live-migration from working\n\nAlso we need to double check if this will prevent evac from working or not as it is a bit different regarding the allocation management","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"42919a2934634958be609c901df27b64f9345ecc","unresolved":false,"context_lines":[{"line_number":129,"context_line":" not fail due to the resource provider being over-subscribed. It is reasonable"},{"line_number":130,"context_line":" to expect to be able to swap one allocation for an equally-sized one, even"},{"line_number":131,"context_line":" if we are technically over-subscribed, but placement does not allow this"},{"line_number":132,"context_line":" today. Until that is fixed, this will prevent cold migrate from working,"},{"line_number":133,"context_line":" which is probably fine as a first step. Note this is already broken for"},{"line_number":134,"context_line":" cold migrate in some cases and the same fix for that issue will apply here."},{"line_number":135,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"fb9a623d_114c7ed7","line":132,"range":{"start_line":132,"start_character":30,"end_line":132,"end_character":73},"in_reply_to":"043b7420_4202e83a","updated":"2025-03-17 18:00:49.000000000","message":"Ack, live migration uses the migration uuid strategy, just not move_claim, so I\u0027ll tweak this.","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"550290f2dc55973cc3f67c9a0ea91cb840bab933","unresolved":true,"context_lines":[{"line_number":129,"context_line":" not fail due to the resource provider being over-subscribed. It is reasonable"},{"line_number":130,"context_line":" to expect to be able to swap one allocation for an equally-sized one, even"},{"line_number":131,"context_line":" if we are technically over-subscribed, but placement does not allow this"},{"line_number":132,"context_line":" today. Until that is fixed, this will prevent cold migrate from working,"},{"line_number":133,"context_line":" which is probably fine as a first step. Note this is already broken for"},{"line_number":134,"context_line":" cold migrate in some cases and the same fix for that issue will apply here."},{"line_number":135,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"dbe9273d_3320029c","line":132,"range":{"start_line":132,"start_character":30,"end_line":132,"end_character":73},"in_reply_to":"0ac216c0_233b044d","updated":"2025-03-17 17:10:45.000000000","message":"I still mention not just cold migrate but also live migrate and resize here","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"6968c9f1d5f535bc1882afc00f570c31069b3bbc","unresolved":true,"context_lines":[{"line_number":129,"context_line":" not fail due to the resource provider being over-subscribed. It is reasonable"},{"line_number":130,"context_line":" to expect to be able to swap one allocation for an equally-sized one, even"},{"line_number":131,"context_line":" if we are technically over-subscribed, but placement does not allow this"},{"line_number":132,"context_line":" today. Until that is fixed, this will prevent cold migrate from working,"},{"line_number":133,"context_line":" which is probably fine as a first step. Note this is already broken for"},{"line_number":134,"context_line":" cold migrate in some cases and the same fix for that issue will apply here."},{"line_number":135,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"d0783d48_e591159f","line":132,"range":{"start_line":132,"start_character":30,"end_line":132,"end_character":73},"in_reply_to":"22820153_d83c2e70","updated":"2025-03-17 16:26:37.000000000","message":"I\u0027m not sure why this would not work for evacuation.. what are you concerned about specifically?","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"a17e4b26731c6479f0af71f202510473fe656813","unresolved":false,"context_lines":[{"line_number":129,"context_line":" not fail due to the resource provider being over-subscribed. It is reasonable"},{"line_number":130,"context_line":" to expect to be able to swap one allocation for an equally-sized one, even"},{"line_number":131,"context_line":" if we are technically over-subscribed, but placement does not allow this"},{"line_number":132,"context_line":" today. Until that is fixed, this will prevent cold migrate from working,"},{"line_number":133,"context_line":" which is probably fine as a first step. Note this is already broken for"},{"line_number":134,"context_line":" cold migrate in some cases and the same fix for that issue will apply here."},{"line_number":135,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"0ac216c0_233b044d","line":132,"range":{"start_line":132,"start_character":30,"end_line":132,"end_character":73},"in_reply_to":"d0783d48_e591159f","updated":"2025-03-17 16:50:07.000000000","message":"Done","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"333a6da325ff70161e322ca7a222a4fedea4c079","unresolved":true,"context_lines":[{"line_number":129,"context_line":" not fail due to the resource provider being over-subscribed. It is reasonable"},{"line_number":130,"context_line":" to expect to be able to swap one allocation for an equally-sized one, even"},{"line_number":131,"context_line":" if we are technically over-subscribed, but placement does not allow this"},{"line_number":132,"context_line":" today. Until that is fixed, this will prevent cold migrate from working,"},{"line_number":133,"context_line":" which is probably fine as a first step. Note this is already broken for"},{"line_number":134,"context_line":" cold migrate in some cases and the same fix for that issue will apply here."},{"line_number":135,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"043b7420_4202e83a","line":132,"range":{"start_line":132,"start_character":30,"end_line":132,"end_character":73},"in_reply_to":"dbe9273d_3320029c","updated":"2025-03-17 17:14:33.000000000","message":"Cold migrate is resize to me, but sure I can add it.\n\nI\u0027m not quite sure about live migration as I thought we don\u0027t do the same swap-to-migration-uuid on the source in that case. But, did you confirm that we do for PCI-i-p?","commit_id":"c3f6d69f337b07196585b4fc157181ff4e9175ae"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"550290f2dc55973cc3f67c9a0ea91cb840bab933","unresolved":false,"context_lines":[{"line_number":99,"context_line":" has been reserved such that it will not be re-allocated. This happens before"},{"line_number":100,"context_line":" the point at which the instance is able to run with it (in all situations)"},{"line_number":101,"context_line":" and remains in that state until an external action drops the reserved count"},{"line_number":102,"context_line":" back to zero. In other words, \"burned\" means ``reserved\u003dtotal``."},{"line_number":103,"context_line":""},{"line_number":104,"context_line":"By doing this in the above described way we will get synchronous reservation"},{"line_number":105,"context_line":"of the devices (i.e. it will happen before the instance starts running) as"}],"source_content_type":"text/x-rst","patch_set":7,"id":"6adb1724_b8520876","line":102,"updated":"2025-03-17 17:10:45.000000000","message":"+1","commit_id":"93ce5916e01aafc796071997ca261ef83fe7b1d8"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"550290f2dc55973cc3f67c9a0ea91cb840bab933","unresolved":false,"context_lines":[{"line_number":155,"context_line":" sure that whatever happens on the compute node in this case will fail before"},{"line_number":156,"context_line":" assigning the device to an instance (which should happen during"},{"line_number":157,"context_line":" ``ResourceTracker._update()`` as part of the allocation healing)."},{"line_number":158,"context_line":""},{"line_number":159,"context_line":"Alternatives"},{"line_number":160,"context_line":"------------"},{"line_number":161,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"be85de54_d21821ec","line":158,"updated":"2025-03-17 17:10:45.000000000","message":"thanks","commit_id":"93ce5916e01aafc796071997ca261ef83fe7b1d8"}]}