)]}'
{"/PATCHSET_LEVEL":[{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7de0f87a57b7b876465a0a9aeff8c720c7bb8ffd","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":6,"id":"25534311_255de62a","updated":"2022-11-15 10:40:50.000000000","message":"I have a coupe questions inline and a bunch of nits.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"4d97b760e96f0daaeed5c734f52518f471710f96","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":6,"id":"00b4b0a6_5caeff42","updated":"2022-11-17 22:34:45.000000000","message":"Overall, I think this is a great evolution and actually lays a solid foundation to fix a lot of the headaches in the existing code paths. I have some feedback, and likely some confusion to resolve.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":6,"id":"126f0e48_b47c1502","updated":"2022-11-25 10:41:27.000000000","message":"Thanks all, I will try refresh this now.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7d1d7c4d652593f5cf80e6d2efcd897602a51103","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":6,"id":"ffb75e30_db099c1e","updated":"2022-11-21 16:31:18.000000000","message":"Thanks for the answers so far.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"3ad10fe95623c5edae6f57f8a5ff98b2a9f2a8db","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":6,"id":"520d47bf_7de656de","updated":"2022-11-17 14:17:28.000000000","message":"i happy with the general direction of this.\n\nsome comment inline but nothing major.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"dcad706645707728dfde6792b435fcca7a53d7cf","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":6,"id":"ffcb17d6_b13db12a","updated":"2022-11-15 18:30:01.000000000","message":"i wont get to this today but ill try and take a look tomorrow","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"d631cb37319d8d5da9c668a34647f3660044e286","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":7,"id":"978d0844_bc80a69c","updated":"2022-12-15 15:22:49.000000000","message":"I\u0027m honestly not an Ironic expert but I was able to understand the problem and the proposed solutions 😊\n\nSo, basically, while I think we could have some concerns about upgrade and move operations, this spec is I guess eventually fine for being merged.\nLet\u0027s just do this now and we can continue to discuss about this at the implementation. \nEventually, if we move to another direction that changes from what\u0027s said in the spec, meh, we can provide a spec modification if so.\n\nThanks all anyway for having worked on it !  ","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"dc293501e47fcffa10189eb3c10a1bf9ab9914f8","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":7,"id":"78f73482_cbe77adc","updated":"2022-12-14 12:46:59.000000000","message":"This looks good to me.","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"01ac0f5b882b705f540943e7abc12e269a2603dc","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":7,"id":"a3cbf735_17462c86","updated":"2022-12-15 10:31:14.000000000","message":"im going to check with sylvain to see if he wants to review.\ni tought this merged yesterday actullly so i told him it did not need to be reviews so let me correct that.\n\nthere are some nits inline but i think im happy to merge this as is and we can adress those in a followup\n\nif sylvain does not want to review this ill add +w shortly otherwise ill leave it to them.","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"a3c3511e4f4d31a61ae6a7adb6340fa23b271c49","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":7,"id":"4c350508_3b7727d8","updated":"2022-11-29 18:06:26.000000000","message":"ok ill see if i can make time to review this this week just skimed some of the commens since i last looked.","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"}],"specs/2023.1/approved/ironic-shards.rst":[{"author":{"_account_id":10342,"name":"Jay Faulkner","display_name":"JayF","email":"jay@jvf.cc","username":"JayF","status":"youtube.com/@oss-gr / podcast.gr-oss.io"},"change_message_id":"cc771779c2c8bb66dba2f4b814564a3d25089a86","unresolved":true,"context_lines":[{"line_number":34,"context_line":"  managed, i.e. reboot will fail"},{"line_number":35,"context_line":"* moreover, when the old nova-compute comes back up, which might take"},{"line_number":36,"context_line":"  some time, there are lots of bugs as the hash ring slowly rebalances"},{"line_number":37,"context_line":""},{"line_number":38,"context_line":"This spec about tweaking the way we shard Ironic compute nodes."},{"line_number":39,"context_line":"We need to stop violating deep assumptions in the compute manager"},{"line_number":40,"context_line":"code by moving to a more static ironic node partitions."}],"source_content_type":"text/x-rst","patch_set":3,"id":"2c9f06ca_b1756771","line":37,"updated":"2022-11-02 15:35:01.000000000","message":"I\u0027d even mention, explicitly, that at extreme scale nova-compute hash ring rebalancing can take upwards of 24 hours after a restart. It\u0027s really awful :(","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"954c89821bafdbcbcfe397030ca9877be12f9a3e","unresolved":false,"context_lines":[{"line_number":34,"context_line":"  managed, i.e. reboot will fail"},{"line_number":35,"context_line":"* moreover, when the old nova-compute comes back up, which might take"},{"line_number":36,"context_line":"  some time, there are lots of bugs as the hash ring slowly rebalances"},{"line_number":37,"context_line":""},{"line_number":38,"context_line":"This spec about tweaking the way we shard Ironic compute nodes."},{"line_number":39,"context_line":"We need to stop violating deep assumptions in the compute manager"},{"line_number":40,"context_line":"code by moving to a more static ironic node partitions."}],"source_content_type":"text/x-rst","patch_set":3,"id":"0222412d_13d53435","line":37,"in_reply_to":"2c9f06ca_b1756771","updated":"2022-11-07 12:32:45.000000000","message":"Done","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":10342,"name":"Jay Faulkner","display_name":"JayF","email":"jay@jvf.cc","username":"JayF","status":"youtube.com/@oss-gr / podcast.gr-oss.io"},"change_message_id":"cc771779c2c8bb66dba2f4b814564a3d25089a86","unresolved":true,"context_lines":[{"line_number":54,"context_line":"need better documentation:"},{"line_number":55,"context_line":""},{"line_number":56,"context_line":"* active-passive failover for nova-compute services"},{"line_number":57,"context_line":"  running the ironic driver"},{"line_number":58,"context_line":"* sharding ironic-conductors and nova-computes using"},{"line_number":59,"context_line":"  ironic conductor groups, at the same time as using"},{"line_number":60,"context_line":"  active-passive failover"}],"source_content_type":"text/x-rst","patch_set":3,"id":"7b754a31_9d40633d","line":57,"updated":"2022-11-02 15:35:01.000000000","message":"This does not work, due to startup issue noted above: active/passive is only a sane config once we limit the number of nodes pulled from the Ironic API on startup, enabling the compute service to startup in less than all of the hours 😊","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"954c89821bafdbcbcfe397030ca9877be12f9a3e","unresolved":true,"context_lines":[{"line_number":54,"context_line":"need better documentation:"},{"line_number":55,"context_line":""},{"line_number":56,"context_line":"* active-passive failover for nova-compute services"},{"line_number":57,"context_line":"  running the ironic driver"},{"line_number":58,"context_line":"* sharding ironic-conductors and nova-computes using"},{"line_number":59,"context_line":"  ironic conductor groups, at the same time as using"},{"line_number":60,"context_line":"  active-passive failover"}],"source_content_type":"text/x-rst","patch_set":3,"id":"b9113ca8_93170e89","line":57,"in_reply_to":"7b754a31_9d40633d","updated":"2022-11-07 12:32:45.000000000","message":"So I will some notes. This totally works OK-ish at small enough scales, using conductor group sharding, after Julia\u0027s VIF fix. But I agree we need to raise that massive limitation here.","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":false,"context_lines":[{"line_number":54,"context_line":"need better documentation:"},{"line_number":55,"context_line":""},{"line_number":56,"context_line":"* active-passive failover for nova-compute services"},{"line_number":57,"context_line":"  running the ironic driver"},{"line_number":58,"context_line":"* sharding ironic-conductors and nova-computes using"},{"line_number":59,"context_line":"  ironic conductor groups, at the same time as using"},{"line_number":60,"context_line":"  active-passive failover"}],"source_content_type":"text/x-rst","patch_set":3,"id":"0671dc5d_41b70d85","line":57,"in_reply_to":"b9113ca8_93170e89","updated":"2022-11-25 10:41:27.000000000","message":"Ack","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"25e0e57b4e38444b80963d6aaff16787dc7140a9","unresolved":true,"context_lines":[{"line_number":96,"context_line":""},{"line_number":97,"context_line":"We could attempt to keep fixing the hash ring recovery within the ironic"},{"line_number":98,"context_line":"driver, but its very unclear what will break next due to all the deep"},{"line_number":99,"context_line":"assumptions made about the nova-compute process."},{"line_number":100,"context_line":""},{"line_number":101,"context_line":"Data model impact"},{"line_number":102,"context_line":"-----------------"}],"source_content_type":"text/x-rst","patch_set":3,"id":"6db4fa57_e9b0594f","line":99,"updated":"2022-11-02 16:26:11.000000000","message":"It should likely be worthwhile to highlight what it would be, and the high level concern as to why we\u0027ve not done this.","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":false,"context_lines":[{"line_number":96,"context_line":""},{"line_number":97,"context_line":"We could attempt to keep fixing the hash ring recovery within the ironic"},{"line_number":98,"context_line":"driver, but its very unclear what will break next due to all the deep"},{"line_number":99,"context_line":"assumptions made about the nova-compute process."},{"line_number":100,"context_line":""},{"line_number":101,"context_line":"Data model impact"},{"line_number":102,"context_line":"-----------------"}],"source_content_type":"text/x-rst","patch_set":3,"id":"42baa02b_571382a1","line":99,"in_reply_to":"26ad7412_3485a01e","updated":"2022-11-25 10:41:27.000000000","message":"Ack","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"954c89821bafdbcbcfe397030ca9877be12f9a3e","unresolved":true,"context_lines":[{"line_number":96,"context_line":""},{"line_number":97,"context_line":"We could attempt to keep fixing the hash ring recovery within the ironic"},{"line_number":98,"context_line":"driver, but its very unclear what will break next due to all the deep"},{"line_number":99,"context_line":"assumptions made about the nova-compute process."},{"line_number":100,"context_line":""},{"line_number":101,"context_line":"Data model impact"},{"line_number":102,"context_line":"-----------------"}],"source_content_type":"text/x-rst","patch_set":3,"id":"26ad7412_3485a01e","line":99,"in_reply_to":"6db4fa57_e9b0594f","updated":"2022-11-07 12:32:45.000000000","message":"yes, good point, I can try :)","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"25e0e57b4e38444b80963d6aaff16787dc7140a9","unresolved":true,"context_lines":[{"line_number":152,"context_line":"--------------"},{"line_number":153,"context_line":""},{"line_number":154,"context_line":"For those currenly using peer_list, we need to document how they"},{"line_number":155,"context_line":"can move to the new sharding approach."},{"line_number":156,"context_line":""},{"line_number":157,"context_line":"Implementation"},{"line_number":158,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":3,"id":"26b304d7_f05afbb1","line":155,"updated":"2022-11-02 16:26:11.000000000","message":"we\u0027ve got an idea of this in the ironic spec, tl;dr:\n\n1) shutdown the nova-compute\n2) set the shard key\n3) run the magical nova-manage command which does the needful reconciliation.\n4) restart nova-compute.\n\nAs nova-compute continues to run, other nova-computes will get upgraded, it\u0027s responsible instances will decline based upon the shard key application. magic()","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"4d97b760e96f0daaeed5c734f52518f471710f96","unresolved":true,"context_lines":[{"line_number":152,"context_line":"--------------"},{"line_number":153,"context_line":""},{"line_number":154,"context_line":"For those currenly using peer_list, we need to document how they"},{"line_number":155,"context_line":"can move to the new sharding approach."},{"line_number":156,"context_line":""},{"line_number":157,"context_line":"Implementation"},{"line_number":158,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":3,"id":"79145d1a_961d0c44","line":155,"in_reply_to":"150a4d49_c89a0835","updated":"2022-11-17 22:34:45.000000000","message":"Re: in nova\u0027s DB, I believe the consensus is not to and just keep it transparent. \n\nIt can warn, and I think some of the logic John has proposed provides an improved guardrail to hopefully keep things from going sideways too much for operators should they do something unexpected mid-migration.","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"c839fbda9c85edfbf3676fa84779a5df8725f00e","unresolved":true,"context_lines":[{"line_number":152,"context_line":"--------------"},{"line_number":153,"context_line":""},{"line_number":154,"context_line":"For those currenly using peer_list, we need to document how they"},{"line_number":155,"context_line":"can move to the new sharding approach."},{"line_number":156,"context_line":""},{"line_number":157,"context_line":"Implementation"},{"line_number":158,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":3,"id":"a684457b_4d588720","line":155,"in_reply_to":"26b304d7_f05afbb1","updated":"2022-11-02 17:23:54.000000000","message":"In discussing with John, we\u0027re concerned there may be dragons in the compute manager.py code, specifically centered around compute node\u0027s destroy() method being called when a compute node does not appear in the list. The reason we\u0027re concerned here, and ultimately we need to carefully walk the compute manager code as it interacts with the nova.virt.ironic driver, is to ensure that we don\u0027t end up in a situation where we somehow, mid-upgrade, accidentally cause compute node records to be destroyed.\n\nGranted, we believe the nova-manage command should repair/heal the compute node records because it provide a upgrade/migration means which also automatically heals the pitfalls.\n\ntl;dr carefully check nova/compute/manager.py and nova/virt/ironic/driver.py and make sure people won\u0027t have super bad things happen with ComputeNode records.","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"3ad10fe95623c5edae6f57f8a5ff98b2a9f2a8db","unresolved":true,"context_lines":[{"line_number":152,"context_line":"--------------"},{"line_number":153,"context_line":""},{"line_number":154,"context_line":"For those currenly using peer_list, we need to document how they"},{"line_number":155,"context_line":"can move to the new sharding approach."},{"line_number":156,"context_line":""},{"line_number":157,"context_line":"Implementation"},{"line_number":158,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":3,"id":"150a4d49_c89a0835","line":155,"in_reply_to":"29d76793_e9bb761d","updated":"2022-11-17 14:17:28.000000000","message":"john, i did not see this expclitly althouhg i might have missed it but we dont currently plan ot store the shard key in novas db correct it will only be in the config so nova wont actully be able to detect that it has changed if you change it in our config unless we use the service id/host value to find  teh computes that are currently managed by it and check those in ironic to see if the shard key is the same. the compute service will not be able to tell if the change was in it config or the ironic node if there was mismathc between the too.\n\ni guess in either case it can warn.","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":true,"context_lines":[{"line_number":152,"context_line":"--------------"},{"line_number":153,"context_line":""},{"line_number":154,"context_line":"For those currenly using peer_list, we need to document how they"},{"line_number":155,"context_line":"can move to the new sharding approach."},{"line_number":156,"context_line":""},{"line_number":157,"context_line":"Implementation"},{"line_number":158,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":3,"id":"cf1a4e13_0d6eae71","line":155,"in_reply_to":"79145d1a_961d0c44","updated":"2022-11-25 10:41:27.000000000","message":"Sean, yes, currently not thinking about the DB. I know we are changing our mind about the host name, but that isn\u0027t quite the same deal.\n\nJulia, +1, I have attempted to add guardrails to detect state drift between ironic and nova, and it didn\u0027t seem to need this shard in the DB.","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"ccb9039bef0a9b5bc3ab0ee08a5702a1fe40864f","unresolved":true,"context_lines":[{"line_number":152,"context_line":"--------------"},{"line_number":153,"context_line":""},{"line_number":154,"context_line":"For those currenly using peer_list, we need to document how they"},{"line_number":155,"context_line":"can move to the new sharding approach."},{"line_number":156,"context_line":""},{"line_number":157,"context_line":"Implementation"},{"line_number":158,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":3,"id":"b99d5d38_32d05329","line":155,"in_reply_to":"a684457b_4d588720","updated":"2022-11-02 18:29:15.000000000","message":"re-summary: We need to be defensive that in case someone accidentally changes the shard key in either place.","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"954c89821bafdbcbcfe397030ca9877be12f9a3e","unresolved":true,"context_lines":[{"line_number":152,"context_line":"--------------"},{"line_number":153,"context_line":""},{"line_number":154,"context_line":"For those currenly using peer_list, we need to document how they"},{"line_number":155,"context_line":"can move to the new sharding approach."},{"line_number":156,"context_line":""},{"line_number":157,"context_line":"Implementation"},{"line_number":158,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":3,"id":"29d76793_e9bb761d","line":155,"in_reply_to":"b99d5d38_32d05329","updated":"2022-11-07 12:32:45.000000000","message":"I have tried to write down the process I had in my head, it is slightly different, but agreed with your concerns here.","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"87d7be69030fb642cffef746bce7bbd5f6653936","unresolved":true,"context_lines":[{"line_number":152,"context_line":"--------------"},{"line_number":153,"context_line":""},{"line_number":154,"context_line":"For those currenly using peer_list, we need to document how they"},{"line_number":155,"context_line":"can move to the new sharding approach."},{"line_number":156,"context_line":""},{"line_number":157,"context_line":"Implementation"},{"line_number":158,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":3,"id":"508f334d_d80d5e4a","line":155,"in_reply_to":"cf1a4e13_0d6eae71","updated":"2022-11-29 16:57:34.000000000","message":"John, I expected as much, since we have matched the compute to the baremetal node uuid since... ocata?\n\nThanks!","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"25e0e57b4e38444b80963d6aaff16787dc7140a9","unresolved":true,"context_lines":[{"line_number":174,"context_line":"Work Items"},{"line_number":175,"context_line":"----------"},{"line_number":176,"context_line":""},{"line_number":177,"context_line":"* add deprecation logs, rename conductor group partition key config"},{"line_number":178,"context_line":"* add compute node move protections"},{"line_number":179,"context_line":"* add new sharding config and ironic query"},{"line_number":180,"context_line":"* add nova-manage tool to move ironic instances to a new host"}],"source_content_type":"text/x-rst","patch_set":3,"id":"80d8e5d9_3235cfcf","line":177,"range":{"start_line":177,"start_character":24,"end_line":177,"end_character":67},"updated":"2022-11-02 16:26:11.000000000","message":"We should likely just keep the conductor group partition key, and ignore it on this.\n\nYes it get misused, but it solves another cases where the failure domain *really* needs to be represented.\n\n*plus* operators *hate* having multiple ironic deployments, they want the single pane of glass where possible, so conductor group helps with that.\n\nAlternative, after discussing with John, rename the setting.","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":10342,"name":"Jay Faulkner","display_name":"JayF","email":"jay@jvf.cc","username":"JayF","status":"youtube.com/@oss-gr / podcast.gr-oss.io"},"change_message_id":"322f1f8c884556824cad9ac388829ca430fb0f22","unresolved":true,"context_lines":[{"line_number":174,"context_line":"Work Items"},{"line_number":175,"context_line":"----------"},{"line_number":176,"context_line":""},{"line_number":177,"context_line":"* add deprecation logs, rename conductor group partition key config"},{"line_number":178,"context_line":"* add compute node move protections"},{"line_number":179,"context_line":"* add new sharding config and ironic query"},{"line_number":180,"context_line":"* add nova-manage tool to move ironic instances to a new host"}],"source_content_type":"text/x-rst","patch_set":3,"id":"e7a4417a_3fa239bd","line":177,"updated":"2022-11-02 16:22:07.000000000","message":"be clear: we\u0027re not getting rid of segregating nova computes by conductor groups","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"954c89821bafdbcbcfe397030ca9877be12f9a3e","unresolved":true,"context_lines":[{"line_number":174,"context_line":"Work Items"},{"line_number":175,"context_line":"----------"},{"line_number":176,"context_line":""},{"line_number":177,"context_line":"* add deprecation logs, rename conductor group partition key config"},{"line_number":178,"context_line":"* add compute node move protections"},{"line_number":179,"context_line":"* add new sharding config and ironic query"},{"line_number":180,"context_line":"* add nova-manage tool to move ironic instances to a new host"}],"source_content_type":"text/x-rst","patch_set":3,"id":"9d0baaab_af988cd9","line":177,"range":{"start_line":177,"start_character":24,"end_line":177,"end_character":67},"in_reply_to":"80d8e5d9_3235cfcf","updated":"2022-11-07 12:32:45.000000000","message":"This was very unclear here, I have reworded it.\n\nMy working assumption was we support conductor group filtering and shard keys at the same time.","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":false,"context_lines":[{"line_number":174,"context_line":"Work Items"},{"line_number":175,"context_line":"----------"},{"line_number":176,"context_line":""},{"line_number":177,"context_line":"* add deprecation logs, rename conductor group partition key config"},{"line_number":178,"context_line":"* add compute node move protections"},{"line_number":179,"context_line":"* add new sharding config and ironic query"},{"line_number":180,"context_line":"* add nova-manage tool to move ironic instances to a new host"}],"source_content_type":"text/x-rst","patch_set":3,"id":"9c6b91fc_4e9876fb","line":177,"range":{"start_line":177,"start_character":24,"end_line":177,"end_character":67},"in_reply_to":"9d0baaab_af988cd9","updated":"2022-11-25 10:41:27.000000000","message":"Ack","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"954c89821bafdbcbcfe397030ca9877be12f9a3e","unresolved":false,"context_lines":[{"line_number":174,"context_line":"Work Items"},{"line_number":175,"context_line":"----------"},{"line_number":176,"context_line":""},{"line_number":177,"context_line":"* add deprecation logs, rename conductor group partition key config"},{"line_number":178,"context_line":"* add compute node move protections"},{"line_number":179,"context_line":"* add new sharding config and ironic query"},{"line_number":180,"context_line":"* add nova-manage tool to move ironic instances to a new host"}],"source_content_type":"text/x-rst","patch_set":3,"id":"76b15cfa_91ac1c21","line":177,"in_reply_to":"e7a4417a_3fa239bd","updated":"2022-11-07 12:32:45.000000000","message":"Ack","commit_id":"49af08e5f7d64714b13b62681ee6f235ecc33f07"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7de0f87a57b7b876465a0a9aeff8c720c7bb8ffd","unresolved":true,"context_lines":[{"line_number":8,"context_line":"Ironic Shards"},{"line_number":9,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":10,"context_line":""},{"line_number":11,"context_line":"https://blueprints.launchpad.net/nova/+spec/ironic-shards"},{"line_number":12,"context_line":""},{"line_number":13,"context_line":"Problem description"},{"line_number":14,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":6,"id":"c7171349_07c42198","line":11,"updated":"2022-11-15 10:40:50.000000000","message":"This bp does not exists yet.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"f51f3d80564fda812b82d08594c21972f1239cbd","unresolved":false,"context_lines":[{"line_number":8,"context_line":"Ironic Shards"},{"line_number":9,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":10,"context_line":""},{"line_number":11,"context_line":"https://blueprints.launchpad.net/nova/+spec/ironic-shards"},{"line_number":12,"context_line":""},{"line_number":13,"context_line":"Problem description"},{"line_number":14,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":6,"id":"89ed6287_dfb9deec","line":11,"in_reply_to":"c7171349_07c42198","updated":"2022-11-25 12:18:55.000000000","message":"Added.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7de0f87a57b7b876465a0a9aeff8c720c7bb8ffd","unresolved":true,"context_lines":[{"line_number":20,"context_line":""},{"line_number":21,"context_line":"Currently we support setting a partition key, where nova-compute only"},{"line_number":22,"context_line":"cares about a subset of ironic nodes, those associated with a specific"},{"line_number":23,"context_line":"conductor group. Howerver, some conductor groups can be very large,"},{"line_number":24,"context_line":"servered by many ironic-conductor services."},{"line_number":25,"context_line":""},{"line_number":26,"context_line":"To help with this, Nova has attempted to dynamically spread ironic"}],"source_content_type":"text/x-rst","patch_set":6,"id":"eb421b9c_0f359a6b","line":23,"range":{"start_line":23,"start_character":17,"end_line":23,"end_character":25},"updated":"2022-11-15 10:40:50.000000000","message":"nit: However","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"f51f3d80564fda812b82d08594c21972f1239cbd","unresolved":false,"context_lines":[{"line_number":20,"context_line":""},{"line_number":21,"context_line":"Currently we support setting a partition key, where nova-compute only"},{"line_number":22,"context_line":"cares about a subset of ironic nodes, those associated with a specific"},{"line_number":23,"context_line":"conductor group. Howerver, some conductor groups can be very large,"},{"line_number":24,"context_line":"servered by many ironic-conductor services."},{"line_number":25,"context_line":""},{"line_number":26,"context_line":"To help with this, Nova has attempted to dynamically spread ironic"}],"source_content_type":"text/x-rst","patch_set":6,"id":"67276a7d_7a75f137","line":23,"range":{"start_line":23,"start_character":17,"end_line":23,"end_character":25},"in_reply_to":"eb421b9c_0f359a6b","updated":"2022-11-25 12:18:55.000000000","message":"Done","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"3ad10fe95623c5edae6f57f8a5ff98b2a9f2a8db","unresolved":true,"context_lines":[{"line_number":52,"context_line":"with ironic conductor groups that are greater than around"},{"line_number":53,"context_line":"1000 baremetal nodes."},{"line_number":54,"context_line":""},{"line_number":55,"context_line":"Note that many parts of this story work today but"},{"line_number":56,"context_line":"need better documentation:"},{"line_number":57,"context_line":""},{"line_number":58,"context_line":"* understanding the current scale limit of around 500-1000 ironic"}],"source_content_type":"text/x-rst","patch_set":6,"id":"71579d3a_0dd3c4d1","line":55,"updated":"2022-11-17 14:17:28.000000000","message":"it might be better to actually put this in a note \n.. NOTE::\n\nthese are not really use cases just comments on the current state of things.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"3ad10fe95623c5edae6f57f8a5ff98b2a9f2a8db","unresolved":true,"context_lines":[{"line_number":85,"context_line":"currently puts the instance into an Error state."},{"line_number":86,"context_line":"There is a proposal for this to trigger a retry to an"},{"line_number":87,"context_line":"alternative candidate here, which can help in some cases,"},{"line_number":88,"context_line":"but there is no proposal for a more robust fix yet:"},{"line_number":89,"context_line":"https://review.opendev.org/c/openstack/nova/+/842478"},{"line_number":90,"context_line":""},{"line_number":91,"context_line":"Proposed change"}],"source_content_type":"text/x-rst","patch_set":6,"id":"c5454419_77cb4888","line":88,"range":{"start_line":88,"start_character":47,"end_line":88,"end_character":50},"updated":"2022-11-17 14:17:28.000000000","message":"hehe you obviously wrote this before writing \nhttps://review.opendev.org/c/openstack/nova/+/864773\n\nwe might be able to just drop this out of scope section since this will be adress seperately as bugs but  in general i agree that resolving this bug is out of scope of the spec.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":true,"context_lines":[{"line_number":85,"context_line":"currently puts the instance into an Error state."},{"line_number":86,"context_line":"There is a proposal for this to trigger a retry to an"},{"line_number":87,"context_line":"alternative candidate here, which can help in some cases,"},{"line_number":88,"context_line":"but there is no proposal for a more robust fix yet:"},{"line_number":89,"context_line":"https://review.opendev.org/c/openstack/nova/+/842478"},{"line_number":90,"context_line":""},{"line_number":91,"context_line":"Proposed change"}],"source_content_type":"text/x-rst","patch_set":6,"id":"8eb82775_17a07a2d","line":88,"range":{"start_line":88,"start_character":47,"end_line":88,"end_character":50},"in_reply_to":"3dda864d_a8033d4e","updated":"2022-11-25 10:41:27.000000000","message":"yeah, I have fixed this since I wrote this :) I will re-work this bit.\n\nI think all I wanted to express here is some concept around the scope of what is fixed.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"f51f3d80564fda812b82d08594c21972f1239cbd","unresolved":true,"context_lines":[{"line_number":85,"context_line":"currently puts the instance into an Error state."},{"line_number":86,"context_line":"There is a proposal for this to trigger a retry to an"},{"line_number":87,"context_line":"alternative candidate here, which can help in some cases,"},{"line_number":88,"context_line":"but there is no proposal for a more robust fix yet:"},{"line_number":89,"context_line":"https://review.opendev.org/c/openstack/nova/+/842478"},{"line_number":90,"context_line":""},{"line_number":91,"context_line":"Proposed change"}],"source_content_type":"text/x-rst","patch_set":6,"id":"c38077f0_e51ae33e","line":88,"range":{"start_line":88,"start_character":47,"end_line":88,"end_character":50},"in_reply_to":"8eb82775_17a07a2d","updated":"2022-11-25 12:18:55.000000000","message":"hmm, yes, lets drop this whole section.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"4d97b760e96f0daaeed5c734f52518f471710f96","unresolved":true,"context_lines":[{"line_number":85,"context_line":"currently puts the instance into an Error state."},{"line_number":86,"context_line":"There is a proposal for this to trigger a retry to an"},{"line_number":87,"context_line":"alternative candidate here, which can help in some cases,"},{"line_number":88,"context_line":"but there is no proposal for a more robust fix yet:"},{"line_number":89,"context_line":"https://review.opendev.org/c/openstack/nova/+/842478"},{"line_number":90,"context_line":""},{"line_number":91,"context_line":"Proposed change"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3dda864d_a8033d4e","line":88,"range":{"start_line":88,"start_character":47,"end_line":88,"end_character":50},"in_reply_to":"c5454419_77cb4888","updated":"2022-11-17 22:34:45.000000000","message":"The more I think about it, the more I\u0027m kind of thinking along the same lines of Sean since that is in flight and an attempt to improve that operator experience. I don\u0027t have a good feeling for a percentage of users/operators who consult spec docs.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":10342,"name":"Jay Faulkner","display_name":"JayF","email":"jay@jvf.cc","username":"JayF","status":"youtube.com/@oss-gr / podcast.gr-oss.io"},"change_message_id":"aab4ed6d9dcb9a3afede0163417a51a5421ee8c2","unresolved":true,"context_lines":[{"line_number":99,"context_line":"expose all ironic nodes from a single nova-compute process."},{"line_number":100,"context_line":""},{"line_number":101,"context_line":"When the operator sets a shard_key, the compute-node process should"},{"line_number":102,"context_line":"use the shard_key when querying a list of nodes in Ironic. We should"},{"line_number":103,"context_line":"never try to list all Ironic nodes in the system."},{"line_number":104,"context_line":""},{"line_number":105,"context_line":"Note, we should still do a global search when looking for a specific"}],"source_content_type":"text/x-rst","patch_set":6,"id":"96cb3e19_cb8b98fc","line":102,"updated":"2022-11-08 20:46:40.000000000","message":"shall or must? Should is weak when this is literally the point that we have to scale out of to avoid headaches at scale","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"256d7659f04ed17211966ab53afd99d1197457e4","unresolved":true,"context_lines":[{"line_number":99,"context_line":"expose all ironic nodes from a single nova-compute process."},{"line_number":100,"context_line":""},{"line_number":101,"context_line":"When the operator sets a shard_key, the compute-node process should"},{"line_number":102,"context_line":"use the shard_key when querying a list of nodes in Ironic. We should"},{"line_number":103,"context_line":"never try to list all Ironic nodes in the system."},{"line_number":104,"context_line":""},{"line_number":105,"context_line":"Note, we should still do a global search when looking for a specific"}],"source_content_type":"text/x-rst","patch_set":6,"id":"dc766836_57bf9a5f","line":102,"in_reply_to":"96cb3e19_cb8b98fc","updated":"2022-11-11 09:49:21.000000000","message":"heh, good point :) I will tweak that on the next revision. I suspect I am missing a blueprint link at the top that I should add in.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"3ad10fe95623c5edae6f57f8a5ff98b2a9f2a8db","unresolved":true,"context_lines":[{"line_number":100,"context_line":""},{"line_number":101,"context_line":"When the operator sets a shard_key, the compute-node process should"},{"line_number":102,"context_line":"use the shard_key when querying a list of nodes in Ironic. We should"},{"line_number":103,"context_line":"never try to list all Ironic nodes in the system."},{"line_number":104,"context_line":""},{"line_number":105,"context_line":"Note, we should still do a global search when looking for a specific"},{"line_number":106,"context_line":"ironic node, e.g. searching by node uuid or instance uuid"}],"source_content_type":"text/x-rst","patch_set":6,"id":"032db49c_0e7100b0","line":103,"range":{"start_line":103,"start_character":48,"end_line":103,"end_character":49},"updated":"2022-11-17 14:17:28.000000000","message":"when the shard_key is defined.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":10342,"name":"Jay Faulkner","display_name":"JayF","email":"jay@jvf.cc","username":"JayF","status":"youtube.com/@oss-gr / podcast.gr-oss.io"},"change_message_id":"7a205e45bda02b7f41f25cf3ca81e2d18860a0af","unresolved":true,"context_lines":[{"line_number":100,"context_line":""},{"line_number":101,"context_line":"When the operator sets a shard_key, the compute-node process should"},{"line_number":102,"context_line":"use the shard_key when querying a list of nodes in Ironic. We should"},{"line_number":103,"context_line":"never try to list all Ironic nodes in the system."},{"line_number":104,"context_line":""},{"line_number":105,"context_line":"Note, we should still do a global search when looking for a specific"},{"line_number":106,"context_line":"ironic node, e.g. searching by node uuid or instance uuid"}],"source_content_type":"text/x-rst","patch_set":6,"id":"9a909b50_9d20ba76","line":103,"range":{"start_line":103,"start_character":48,"end_line":103,"end_character":49},"in_reply_to":"032db49c_0e7100b0","updated":"2022-11-17 17:16:53.000000000","message":"Ironic treats a null shard_key as a shard_key value (e.g. the lack of a shard key should be considered a \"shard\"). There is no case in which nova-computes should be making calls without a shard_key set AIUI.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"ef514b014b5e783a5228c27e298b1913ecc54a13","unresolved":true,"context_lines":[{"line_number":100,"context_line":""},{"line_number":101,"context_line":"When the operator sets a shard_key, the compute-node process should"},{"line_number":102,"context_line":"use the shard_key when querying a list of nodes in Ironic. We should"},{"line_number":103,"context_line":"never try to list all Ironic nodes in the system."},{"line_number":104,"context_line":""},{"line_number":105,"context_line":"Note, we should still do a global search when looking for a specific"},{"line_number":106,"context_line":"ironic node, e.g. searching by node uuid or instance uuid"}],"source_content_type":"text/x-rst","patch_set":6,"id":"f4a2755e_559ed695","line":103,"range":{"start_line":103,"start_character":48,"end_line":103,"end_character":49},"in_reply_to":"9a909b50_9d20ba76","updated":"2022-11-17 18:41:54.000000000","message":"ok that a little odd form a nova perspective but not unworkabout\nwe generally either have a non None/null defualt value or treat an unset value as not enabling the feature.\n\nbut we can just default the config to None and pass python None/json null as the shard key. we need to handel that expictly and make sure we dont default to \"\" however given the ironic behavior.\n\nsince \"\" and null are semanticaly differnt both in SQL and in json/pythyon","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":true,"context_lines":[{"line_number":100,"context_line":""},{"line_number":101,"context_line":"When the operator sets a shard_key, the compute-node process should"},{"line_number":102,"context_line":"use the shard_key when querying a list of nodes in Ironic. We should"},{"line_number":103,"context_line":"never try to list all Ironic nodes in the system."},{"line_number":104,"context_line":""},{"line_number":105,"context_line":"Note, we should still do a global search when looking for a specific"},{"line_number":106,"context_line":"ironic node, e.g. searching by node uuid or instance uuid"}],"source_content_type":"text/x-rst","patch_set":6,"id":"0dff8934_64b2738c","line":103,"range":{"start_line":103,"start_character":48,"end_line":103,"end_character":49},"in_reply_to":"a01855f6_8966d44e","updated":"2022-11-25 10:41:27.000000000","message":"Yeah, this is the opt in point. You shouldn\u0027t have to use it at small scale. I probably need to word that slightly different to avoid the confusion.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"4d97b760e96f0daaeed5c734f52518f471710f96","unresolved":true,"context_lines":[{"line_number":100,"context_line":""},{"line_number":101,"context_line":"When the operator sets a shard_key, the compute-node process should"},{"line_number":102,"context_line":"use the shard_key when querying a list of nodes in Ironic. We should"},{"line_number":103,"context_line":"never try to list all Ironic nodes in the system."},{"line_number":104,"context_line":""},{"line_number":105,"context_line":"Note, we should still do a global search when looking for a specific"},{"line_number":106,"context_line":"ironic node, e.g. searching by node uuid or instance uuid"}],"source_content_type":"text/x-rst","patch_set":6,"id":"a01855f6_8966d44e","line":103,"range":{"start_line":103,"start_character":48,"end_line":103,"end_character":49},"in_reply_to":"f4a2755e_559ed695","updated":"2022-11-17 22:34:45.000000000","message":"FWIW, Jay\u0027s comment is rooted in how we intend to treat it as a null shard key in the list endpoint we\u0027re creating for that, but that endpoint is for a consumer (or operator) outside of nova\u0027s interaction who wants to sort out what shards exist without actually sifting through the entire node list.\n\nI don\u0027t think Nova shouldn\u0027t *need* to be aware of that, just add shard_key  to the query for nodes *when* applicable/enabled, because otherwise we\u0027re creating an break in an upgrade if someone hasn\u0027t upgraded Ironic first.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"3ad10fe95623c5edae6f57f8a5ff98b2a9f2a8db","unresolved":true,"context_lines":[{"line_number":102,"context_line":"use the shard_key when querying a list of nodes in Ironic. We should"},{"line_number":103,"context_line":"never try to list all Ironic nodes in the system."},{"line_number":104,"context_line":""},{"line_number":105,"context_line":"Note, we should still do a global search when looking for a specific"},{"line_number":106,"context_line":"ironic node, e.g. searching by node uuid or instance uuid"},{"line_number":107,"context_line":"When we find the node, we should ensure it is still part of our"},{"line_number":108,"context_line":"shard, and put the associated Instance into the Error state if something"}],"source_content_type":"text/x-rst","patch_set":6,"id":"7f92fec2_7798102e","line":105,"range":{"start_line":105,"start_character":0,"end_line":105,"end_character":4},"updated":"2022-11-17 14:17:28.000000000","message":"again propably better to make this a note\n.. NOTE::","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":true,"context_lines":[{"line_number":102,"context_line":"use the shard_key when querying a list of nodes in Ironic. We should"},{"line_number":103,"context_line":"never try to list all Ironic nodes in the system."},{"line_number":104,"context_line":""},{"line_number":105,"context_line":"Note, we should still do a global search when looking for a specific"},{"line_number":106,"context_line":"ironic node, e.g. searching by node uuid or instance uuid"},{"line_number":107,"context_line":"When we find the node, we should ensure it is still part of our"},{"line_number":108,"context_line":"shard, and put the associated Instance into the Error state if something"}],"source_content_type":"text/x-rst","patch_set":6,"id":"619d2184_6a66dff8","line":105,"range":{"start_line":105,"start_character":0,"end_line":105,"end_character":4},"in_reply_to":"58245f80_0da621da","updated":"2022-11-25 10:41:27.000000000","message":"oops, yes, you did mention that before, sorry.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"4d97b760e96f0daaeed5c734f52518f471710f96","unresolved":true,"context_lines":[{"line_number":102,"context_line":"use the shard_key when querying a list of nodes in Ironic. We should"},{"line_number":103,"context_line":"never try to list all Ironic nodes in the system."},{"line_number":104,"context_line":""},{"line_number":105,"context_line":"Note, we should still do a global search when looking for a specific"},{"line_number":106,"context_line":"ironic node, e.g. searching by node uuid or instance uuid"},{"line_number":107,"context_line":"When we find the node, we should ensure it is still part of our"},{"line_number":108,"context_line":"shard, and put the associated Instance into the Error state if something"}],"source_content_type":"text/x-rst","patch_set":6,"id":"58245f80_0da621da","line":105,"range":{"start_line":105,"start_character":0,"end_line":105,"end_character":4},"in_reply_to":"7f92fec2_7798102e","updated":"2022-11-17 22:34:45.000000000","message":"++","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"4d97b760e96f0daaeed5c734f52518f471710f96","unresolved":true,"context_lines":[{"line_number":102,"context_line":"use the shard_key when querying a list of nodes in Ironic. We should"},{"line_number":103,"context_line":"never try to list all Ironic nodes in the system."},{"line_number":104,"context_line":""},{"line_number":105,"context_line":"Note, we should still do a global search when looking for a specific"},{"line_number":106,"context_line":"ironic node, e.g. searching by node uuid or instance uuid"},{"line_number":107,"context_line":"When we find the node, we should ensure it is still part of our"},{"line_number":108,"context_line":"shard, and put the associated Instance into the Error state if something"},{"line_number":109,"context_line":"has got out of sync. This is purely defensive, and should not happen"}],"source_content_type":"text/x-rst","patch_set":6,"id":"cc716336_31f922b1","line":106,"range":{"start_line":105,"start_character":46,"end_line":106,"end_character":11},"updated":"2022-11-17 22:34:45.000000000","message":"I don\u0027t think the code lists for one node outside of an instance_uuid based match. FWIW.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":true,"context_lines":[{"line_number":102,"context_line":"use the shard_key when querying a list of nodes in Ironic. We should"},{"line_number":103,"context_line":"never try to list all Ironic nodes in the system."},{"line_number":104,"context_line":""},{"line_number":105,"context_line":"Note, we should still do a global search when looking for a specific"},{"line_number":106,"context_line":"ironic node, e.g. searching by node uuid or instance uuid"},{"line_number":107,"context_line":"When we find the node, we should ensure it is still part of our"},{"line_number":108,"context_line":"shard, and put the associated Instance into the Error state if something"},{"line_number":109,"context_line":"has got out of sync. This is purely defensive, and should not happen"}],"source_content_type":"text/x-rst","patch_set":6,"id":"e8a1db6a_ffc4772d","line":106,"range":{"start_line":105,"start_character":46,"end_line":106,"end_character":11},"in_reply_to":"cc716336_31f922b1","updated":"2022-11-25 10:41:27.000000000","message":"I suspect this is a non issue, we probably just do a get for the specific uuid.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"4d97b760e96f0daaeed5c734f52518f471710f96","unresolved":true,"context_lines":[{"line_number":105,"context_line":"Note, we should still do a global search when looking for a specific"},{"line_number":106,"context_line":"ironic node, e.g. searching by node uuid or instance uuid"},{"line_number":107,"context_line":"When we find the node, we should ensure it is still part of our"},{"line_number":108,"context_line":"shard, and put the associated Instance into the Error state if something"},{"line_number":109,"context_line":"has got out of sync. This is purely defensive, and should not happen"},{"line_number":110,"context_line":"under normal operations."},{"line_number":111,"context_line":""},{"line_number":112,"context_line":"Config changes and Deprecations"}],"source_content_type":"text/x-rst","patch_set":6,"id":"97270c39_6ab5e7ed","line":109,"range":{"start_line":108,"start_character":7,"end_line":109,"end_character":19},"updated":"2022-11-17 22:34:45.000000000","message":"can we also return it from the error state? Just thinking, someone is going to break themselves somehow. Of course, a magical nova-manage command could always reconcile/fix it.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"87d7be69030fb642cffef746bce7bbd5f6653936","unresolved":true,"context_lines":[{"line_number":105,"context_line":"Note, we should still do a global search when looking for a specific"},{"line_number":106,"context_line":"ironic node, e.g. searching by node uuid or instance uuid"},{"line_number":107,"context_line":"When we find the node, we should ensure it is still part of our"},{"line_number":108,"context_line":"shard, and put the associated Instance into the Error state if something"},{"line_number":109,"context_line":"has got out of sync. This is purely defensive, and should not happen"},{"line_number":110,"context_line":"under normal operations."},{"line_number":111,"context_line":""},{"line_number":112,"context_line":"Config changes and Deprecations"}],"source_content_type":"text/x-rst","patch_set":6,"id":"c3a72d9b_b1215297","line":109,"range":{"start_line":108,"start_character":7,"end_line":109,"end_character":19},"in_reply_to":"812d8cac_e32b6a87","updated":"2022-11-29 16:57:34.000000000","message":"Yeah, we need to avoid manual actions where possible. The overall guard makes a lot of sense for stuff like this anyhow.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":true,"context_lines":[{"line_number":105,"context_line":"Note, we should still do a global search when looking for a specific"},{"line_number":106,"context_line":"ironic node, e.g. searching by node uuid or instance uuid"},{"line_number":107,"context_line":"When we find the node, we should ensure it is still part of our"},{"line_number":108,"context_line":"shard, and put the associated Instance into the Error state if something"},{"line_number":109,"context_line":"has got out of sync. This is purely defensive, and should not happen"},{"line_number":110,"context_line":"under normal operations."},{"line_number":111,"context_line":""},{"line_number":112,"context_line":"Config changes and Deprecations"}],"source_content_type":"text/x-rst","patch_set":6,"id":"812d8cac_e32b6a87","line":109,"range":{"start_line":108,"start_character":7,"end_line":109,"end_character":19},"in_reply_to":"97270c39_6ab5e7ed","updated":"2022-11-25 10:41:27.000000000","message":"I am not sure yet, this is probably too much detail for the spec. I think putting the instance into Error might be a step too far, in part as it would require a manual reset-state call to restore it.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"3ad10fe95623c5edae6f57f8a5ff98b2a9f2a8db","unresolved":true,"context_lines":[{"line_number":107,"context_line":"When we find the node, we should ensure it is still part of our"},{"line_number":108,"context_line":"shard, and put the associated Instance into the Error state if something"},{"line_number":109,"context_line":"has got out of sync. This is purely defensive, and should not happen"},{"line_number":110,"context_line":"under normal operations."},{"line_number":111,"context_line":""},{"line_number":112,"context_line":"Config changes and Deprecations"},{"line_number":113,"context_line":"-------------------------------"}],"source_content_type":"text/x-rst","patch_set":6,"id":"73e34cdd_46f0cd12","line":110,"updated":"2022-11-17 14:17:28.000000000","message":"ok, so this is to detect and defend against the shard key being changed on the ironic node if it had an instance associated with it since that would result in the ironic and nova databases being out of sync.\n\ni.e. the shard key changed on the ironic node, which would require a different compute service with the corresponding key to manage it, which would require the instance.host to be updated\n, which we do not support.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":true,"context_lines":[{"line_number":107,"context_line":"When we find the node, we should ensure it is still part of our"},{"line_number":108,"context_line":"shard, and put the associated Instance into the Error state if something"},{"line_number":109,"context_line":"has got out of sync. This is purely defensive, and should not happen"},{"line_number":110,"context_line":"under normal operations."},{"line_number":111,"context_line":""},{"line_number":112,"context_line":"Config changes and Deprecations"},{"line_number":113,"context_line":"-------------------------------"}],"source_content_type":"text/x-rst","patch_set":6,"id":"c44e7d0b_c11fbbc6","line":110,"in_reply_to":"73239597_6d7a242d","updated":"2022-11-25 10:41:27.000000000","message":"+1 Julia, this is what the new nova-manage command should be able to fix. At least in theory.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"4d97b760e96f0daaeed5c734f52518f471710f96","unresolved":true,"context_lines":[{"line_number":107,"context_line":"When we find the node, we should ensure it is still part of our"},{"line_number":108,"context_line":"shard, and put the associated Instance into the Error state if something"},{"line_number":109,"context_line":"has got out of sync. This is purely defensive, and should not happen"},{"line_number":110,"context_line":"under normal operations."},{"line_number":111,"context_line":""},{"line_number":112,"context_line":"Config changes and Deprecations"},{"line_number":113,"context_line":"-------------------------------"}],"source_content_type":"text/x-rst","patch_set":6,"id":"73239597_6d7a242d","line":110,"in_reply_to":"73e34cdd_46f0cd12","updated":"2022-11-17 22:34:45.000000000","message":"Ideally, we want them to use whatever method we reach in this spec to make the change if deemed necessary, which would align with what we discussed during the ptg in terms of operational management/implementation.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"3ad10fe95623c5edae6f57f8a5ff98b2a9f2a8db","unresolved":true,"context_lines":[{"line_number":118,"context_line":"those in both the correct `shard_key` and the correct `conductor_group`,"},{"line_number":119,"context_line":"when both are configured."},{"line_number":120,"context_line":""},{"line_number":121,"context_line":"We will deprecate the user of the `peer_list`, and log a warning"},{"line_number":122,"context_line":"to stop using this configuration."},{"line_number":123,"context_line":""},{"line_number":124,"context_line":"In addtion, we need the logic that tries to move Compute Nodes"}],"source_content_type":"text/x-rst","patch_set":6,"id":"1a9e4be2_412f2d31","line":121,"range":{"start_line":121,"start_character":22,"end_line":121,"end_character":26},"updated":"2022-11-17 14:17:28.000000000","message":"use","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7de0f87a57b7b876465a0a9aeff8c720c7bb8ffd","unresolved":true,"context_lines":[{"line_number":121,"context_line":"We will deprecate the user of the `peer_list`, and log a warning"},{"line_number":122,"context_line":"to stop using this configuration."},{"line_number":123,"context_line":""},{"line_number":124,"context_line":"In addtion, we need the logic that tries to move Compute Nodes"},{"line_number":125,"context_line":"to never work unless the peer_list is larger than one. More details"},{"line_number":126,"context_line":"in the data model impact section."},{"line_number":127,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"1e1041bd_5316eb41","line":124,"range":{"start_line":124,"start_character":3,"end_line":124,"end_character":10},"updated":"2022-11-15 10:40:50.000000000","message":"nit: addition","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"4d97b760e96f0daaeed5c734f52518f471710f96","unresolved":true,"context_lines":[{"line_number":121,"context_line":"We will deprecate the user of the `peer_list`, and log a warning"},{"line_number":122,"context_line":"to stop using this configuration."},{"line_number":123,"context_line":""},{"line_number":124,"context_line":"In addtion, we need the logic that tries to move Compute Nodes"},{"line_number":125,"context_line":"to never work unless the peer_list is larger than one. More details"},{"line_number":126,"context_line":"in the data model impact section."},{"line_number":127,"context_line":""},{"line_number":128,"context_line":"When deleting a ComputeNode object, we need to have the driver"}],"source_content_type":"text/x-rst","patch_set":6,"id":"209d338b_1cddd8c7","line":125,"range":{"start_line":124,"start_character":49,"end_line":125,"end_character":20},"updated":"2022-11-17 22:34:45.000000000","message":"This seems like a bug, fwiw.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"d631cb37319d8d5da9c668a34647f3660044e286","unresolved":true,"context_lines":[{"line_number":121,"context_line":"We will deprecate the user of the `peer_list`, and log a warning"},{"line_number":122,"context_line":"to stop using this configuration."},{"line_number":123,"context_line":""},{"line_number":124,"context_line":"In addtion, we need the logic that tries to move Compute Nodes"},{"line_number":125,"context_line":"to never work unless the peer_list is larger than one. More details"},{"line_number":126,"context_line":"in the data model impact section."},{"line_number":127,"context_line":""},{"line_number":128,"context_line":"When deleting a ComputeNode object, we need to have the driver"}],"source_content_type":"text/x-rst","patch_set":6,"id":"99917454_419c1c6b","line":125,"range":{"start_line":124,"start_character":49,"end_line":125,"end_character":20},"in_reply_to":"0da7afb1_162d36e6","updated":"2022-12-15 15:22:49.000000000","message":"LGTM for a bugfix.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":true,"context_lines":[{"line_number":121,"context_line":"We will deprecate the user of the `peer_list`, and log a warning"},{"line_number":122,"context_line":"to stop using this configuration."},{"line_number":123,"context_line":""},{"line_number":124,"context_line":"In addtion, we need the logic that tries to move Compute Nodes"},{"line_number":125,"context_line":"to never work unless the peer_list is larger than one. More details"},{"line_number":126,"context_line":"in the data model impact section."},{"line_number":127,"context_line":""},{"line_number":128,"context_line":"When deleting a ComputeNode object, we need to have the driver"}],"source_content_type":"text/x-rst","patch_set":6,"id":"541ad298_a1c6ba37","line":125,"range":{"start_line":124,"start_character":49,"end_line":125,"end_character":20},"in_reply_to":"209d338b_1cddd8c7","updated":"2022-11-25 10:41:27.000000000","message":"It might be...","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"01ac0f5b882b705f540943e7abc12e269a2603dc","unresolved":true,"context_lines":[{"line_number":121,"context_line":"We will deprecate the user of the `peer_list`, and log a warning"},{"line_number":122,"context_line":"to stop using this configuration."},{"line_number":123,"context_line":""},{"line_number":124,"context_line":"In addtion, we need the logic that tries to move Compute Nodes"},{"line_number":125,"context_line":"to never work unless the peer_list is larger than one. More details"},{"line_number":126,"context_line":"in the data model impact section."},{"line_number":127,"context_line":""},{"line_number":128,"context_line":"When deleting a ComputeNode object, we need to have the driver"}],"source_content_type":"text/x-rst","patch_set":6,"id":"0da7afb1_162d36e6","line":125,"range":{"start_line":124,"start_character":49,"end_line":125,"end_character":20},"in_reply_to":"541ad298_a1c6ba37","updated":"2022-12-15 10:31:14.000000000","message":"im not against fixing  it indepently of this spec as a bugfix but i dont think we need to cover that here beyond what is already stated.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7de0f87a57b7b876465a0a9aeff8c720c7bb8ffd","unresolved":true,"context_lines":[{"line_number":126,"context_line":"in the data model impact section."},{"line_number":127,"context_line":""},{"line_number":128,"context_line":"When deleting a ComputeNode object, we need to have the driver"},{"line_number":129,"context_line":"confrim that is safe. In the case of Ironic we will check to see if"},{"line_number":130,"context_line":"the configured Ironic has a node with that uuid, searching across all"},{"line_number":131,"context_line":"conductor groups and all shard keys. When the ComputeNode object is not"},{"line_number":132,"context_line":"deleted, we should not delete the entry in placement."}],"source_content_type":"text/x-rst","patch_set":6,"id":"4ee9515f_46287976","line":129,"range":{"start_line":129,"start_character":0,"end_line":129,"end_character":7},"updated":"2022-11-15 10:40:50.000000000","message":"nit: confirm","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"f51f3d80564fda812b82d08594c21972f1239cbd","unresolved":false,"context_lines":[{"line_number":126,"context_line":"in the data model impact section."},{"line_number":127,"context_line":""},{"line_number":128,"context_line":"When deleting a ComputeNode object, we need to have the driver"},{"line_number":129,"context_line":"confrim that is safe. In the case of Ironic we will check to see if"},{"line_number":130,"context_line":"the configured Ironic has a node with that uuid, searching across all"},{"line_number":131,"context_line":"conductor groups and all shard keys. When the ComputeNode object is not"},{"line_number":132,"context_line":"deleted, we should not delete the entry in placement."}],"source_content_type":"text/x-rst","patch_set":6,"id":"ba9ee09d_25fccd63","line":129,"range":{"start_line":129,"start_character":0,"end_line":129,"end_character":7},"in_reply_to":"4ee9515f_46287976","updated":"2022-11-25 12:18:55.000000000","message":"Ack","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7de0f87a57b7b876465a0a9aeff8c720c7bb8ffd","unresolved":true,"context_lines":[{"line_number":129,"context_line":"confrim that is safe. In the case of Ironic we will check to see if"},{"line_number":130,"context_line":"the configured Ironic has a node with that uuid, searching across all"},{"line_number":131,"context_line":"conductor groups and all shard keys. When the ComputeNode object is not"},{"line_number":132,"context_line":"deleted, we should not delete the entry in placement."},{"line_number":133,"context_line":""},{"line_number":134,"context_line":"nova-manage move ironic node"},{"line_number":135,"context_line":"----------------------------"}],"source_content_type":"text/x-rst","patch_set":6,"id":"b66c6a06_91152cc6","line":132,"updated":"2022-11-15 10:40:50.000000000","message":"Just to see I got it. If the Ironic driver sees that the ComputeNode uuid is still a valid Ironic node uuid somewhere in the whole deployment then the Ironic driver will not allow the compute manager to delete the ComputeNode object. And if the ComputeNode object is not deleted then the associated Placement inventory is not deleted either. Do I understand it correctly?","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"4d97b760e96f0daaeed5c734f52518f471710f96","unresolved":true,"context_lines":[{"line_number":129,"context_line":"confrim that is safe. In the case of Ironic we will check to see if"},{"line_number":130,"context_line":"the configured Ironic has a node with that uuid, searching across all"},{"line_number":131,"context_line":"conductor groups and all shard keys. When the ComputeNode object is not"},{"line_number":132,"context_line":"deleted, we should not delete the entry in placement."},{"line_number":133,"context_line":""},{"line_number":134,"context_line":"nova-manage move ironic node"},{"line_number":135,"context_line":"----------------------------"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bcf925f9_07545c2c","line":132,"in_reply_to":"50e89d65_7c4066d8","updated":"2022-11-17 22:34:45.000000000","message":"I think that is along the lines of what John is saying, and I think he is stating it as a dual guard to help prevent things going insanely out of sync due to operator error/mistake on upgrade, and further agitate a huge operator pain point.\n\nTypically operators path has been to restart nova-compute to recreate the ComputeNode records when this has occurred.\n\nIn other words, I think this is a good idea and makes a lot of sense, since we can very easily check what is going on, and raise more appropriate errors.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"3ad10fe95623c5edae6f57f8a5ff98b2a9f2a8db","unresolved":true,"context_lines":[{"line_number":129,"context_line":"confrim that is safe. In the case of Ironic we will check to see if"},{"line_number":130,"context_line":"the configured Ironic has a node with that uuid, searching across all"},{"line_number":131,"context_line":"conductor groups and all shard keys. When the ComputeNode object is not"},{"line_number":132,"context_line":"deleted, we should not delete the entry in placement."},{"line_number":133,"context_line":""},{"line_number":134,"context_line":"nova-manage move ironic node"},{"line_number":135,"context_line":"----------------------------"}],"source_content_type":"text/x-rst","patch_set":6,"id":"50e89d65_7c4066d8","line":132,"in_reply_to":"b66c6a06_91152cc6","updated":"2022-11-17 14:17:28.000000000","message":"that is what i was getting form the above too although you said it more clearly.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7d1d7c4d652593f5cf80e6d2efcd897602a51103","unresolved":false,"context_lines":[{"line_number":129,"context_line":"confrim that is safe. In the case of Ironic we will check to see if"},{"line_number":130,"context_line":"the configured Ironic has a node with that uuid, searching across all"},{"line_number":131,"context_line":"conductor groups and all shard keys. When the ComputeNode object is not"},{"line_number":132,"context_line":"deleted, we should not delete the entry in placement."},{"line_number":133,"context_line":""},{"line_number":134,"context_line":"nova-manage move ironic node"},{"line_number":135,"context_line":"----------------------------"}],"source_content_type":"text/x-rst","patch_set":6,"id":"a3b1fada_e8ab715b","line":132,"in_reply_to":"bcf925f9_07545c2c","updated":"2022-11-21 16:31:18.000000000","message":"Cool then.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":10342,"name":"Jay Faulkner","display_name":"JayF","email":"jay@jvf.cc","username":"JayF","status":"youtube.com/@oss-gr / podcast.gr-oss.io"},"change_message_id":"aab4ed6d9dcb9a3afede0163417a51a5421ee8c2","unresolved":true,"context_lines":[{"line_number":137,"context_line":"We will create a new nova-manage command::"},{"line_number":138,"context_line":""},{"line_number":139,"context_line":"  nova-manage ironic-compute-node-move \u003cironic-node-uuid\u003e \\"},{"line_number":140,"context_line":"      --service \u003cdestination-service\u003e"},{"line_number":141,"context_line":""},{"line_number":142,"context_line":"This command will do the following:"},{"line_number":143,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"b0a1a8b2_da9723ae","line":140,"updated":"2022-11-08 20:46:40.000000000","message":"This is going to be painful at extreme scale to coordinate; but I suspect it\u0027s a matter of if the pain is operator-side or nova-manage side. This way at least large installations can do things slow and steady.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"01ac0f5b882b705f540943e7abc12e269a2603dc","unresolved":true,"context_lines":[{"line_number":137,"context_line":"We will create a new nova-manage command::"},{"line_number":138,"context_line":""},{"line_number":139,"context_line":"  nova-manage ironic-compute-node-move \u003cironic-node-uuid\u003e \\"},{"line_number":140,"context_line":"      --service \u003cdestination-service\u003e"},{"line_number":141,"context_line":""},{"line_number":142,"context_line":"This command will do the following:"},{"line_number":143,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"edd9c0d7_0b1fb531","line":140,"in_reply_to":"79eccf01_d503b1a0","updated":"2022-12-15 10:31:14.000000000","message":"im ok with enabling \n\n nova-manage ironic-compute-node-move \u003cdestination-service\u003e \u003cironic-node-uuid\u003e [\u003cironic-node-uuid\u003e...]\n \nsince we always need the service instead of --service we should take that as the first positional argument and then take 1+ ironic nodes uuids.\nwe can make it exactly 1 if we want too but im fine with leaving that to an implemation detail that we can review later.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"a4df4bf9fc758336ae21e748cdb20e4fcb8c48ec","unresolved":true,"context_lines":[{"line_number":137,"context_line":"We will create a new nova-manage command::"},{"line_number":138,"context_line":""},{"line_number":139,"context_line":"  nova-manage ironic-compute-node-move \u003cironic-node-uuid\u003e \\"},{"line_number":140,"context_line":"      --service \u003cdestination-service\u003e"},{"line_number":141,"context_line":""},{"line_number":142,"context_line":"This command will do the following:"},{"line_number":143,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"9e161cbb_a9fc6c38","line":140,"in_reply_to":"abb42f48_13d88328","updated":"2022-12-15 16:01:24.000000000","message":"Sean, that is a nice middle ground, I like it! Didn\u0027t think of that option before.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"256d7659f04ed17211966ab53afd99d1197457e4","unresolved":true,"context_lines":[{"line_number":137,"context_line":"We will create a new nova-manage command::"},{"line_number":138,"context_line":""},{"line_number":139,"context_line":"  nova-manage ironic-compute-node-move \u003cironic-node-uuid\u003e \\"},{"line_number":140,"context_line":"      --service \u003cdestination-service\u003e"},{"line_number":141,"context_line":""},{"line_number":142,"context_line":"This command will do the following:"},{"line_number":143,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"d7f24b8e_467916be","line":140,"in_reply_to":"b0a1a8b2_da9723ae","updated":"2022-11-11 09:49:21.000000000","message":"Yeah, agreed. My gut tells me to do the really simple thing first.\n\nMaking operators doing a Bash for loop seems like a reasonable comprimise. For me the key is to make them nice individual DB transactions, so we shouldn\u0027t accidentally lock up the table for a few hours, etc.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"4d97b760e96f0daaeed5c734f52518f471710f96","unresolved":true,"context_lines":[{"line_number":137,"context_line":"We will create a new nova-manage command::"},{"line_number":138,"context_line":""},{"line_number":139,"context_line":"  nova-manage ironic-compute-node-move \u003cironic-node-uuid\u003e \\"},{"line_number":140,"context_line":"      --service \u003cdestination-service\u003e"},{"line_number":141,"context_line":""},{"line_number":142,"context_line":"This command will do the following:"},{"line_number":143,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"79eccf01_d503b1a0","line":140,"in_reply_to":"d7f24b8e_467916be","updated":"2022-11-17 22:34:45.000000000","message":"I concur, single rows makes more sense, and if well planned can be an upgrade of fairly minimal pain other than running the command however many times required. :\\","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"d631cb37319d8d5da9c668a34647f3660044e286","unresolved":true,"context_lines":[{"line_number":137,"context_line":"We will create a new nova-manage command::"},{"line_number":138,"context_line":""},{"line_number":139,"context_line":"  nova-manage ironic-compute-node-move \u003cironic-node-uuid\u003e \\"},{"line_number":140,"context_line":"      --service \u003cdestination-service\u003e"},{"line_number":141,"context_line":""},{"line_number":142,"context_line":"This command will do the following:"},{"line_number":143,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"abb42f48_13d88328","line":140,"in_reply_to":"edd9c0d7_0b1fb531","updated":"2022-12-15 15:22:49.000000000","message":"Yeah agreed with Sean, but we could discuss about it in the implementation changes.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7de0f87a57b7b876465a0a9aeff8c720c7bb8ffd","unresolved":true,"context_lines":[{"line_number":145,"context_line":"* Error if the ComputeNode type does not match the ironic driver."},{"line_number":146,"context_line":"* Find the related Service object for the above ComputeNode"},{"line_number":147,"context_line":"  (i.e. the host)"},{"line_number":148,"context_line":"* Error is the service object is not forced down"},{"line_number":149,"context_line":"  (similar to evactuate)"},{"line_number":150,"context_line":"* Check the Service object for the destination service host exists"},{"line_number":151,"context_line":"* Find all non-deleted instances for this (host,node)"},{"line_number":152,"context_line":"* Error if there is more than 1 non-deleted instance found."}],"source_content_type":"text/x-rst","patch_set":6,"id":"5fe4f845_3db783e0","line":149,"range":{"start_line":148,"start_character":2,"end_line":149,"end_character":24},"updated":"2022-11-15 10:40:50.000000000","message":"Will we required an explicit forced_down or the implicit compute service group timeout will be accepted too?","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":true,"context_lines":[{"line_number":145,"context_line":"* Error if the ComputeNode type does not match the ironic driver."},{"line_number":146,"context_line":"* Find the related Service object for the above ComputeNode"},{"line_number":147,"context_line":"  (i.e. the host)"},{"line_number":148,"context_line":"* Error is the service object is not forced down"},{"line_number":149,"context_line":"  (similar to evactuate)"},{"line_number":150,"context_line":"* Check the Service object for the destination service host exists"},{"line_number":151,"context_line":"* Find all non-deleted instances for this (host,node)"},{"line_number":152,"context_line":"* Error if there is more than 1 non-deleted instance found."}],"source_content_type":"text/x-rst","patch_set":6,"id":"9614aba2_79af99fe","line":149,"range":{"start_line":148,"start_character":2,"end_line":149,"end_character":24},"in_reply_to":"190054ab_3c6cd52e","updated":"2022-11-25 10:41:27.000000000","message":"The probably with not explicitly forced down is the service might still be running, and just in some network partition. The downside of force down, is the only way back is deleting the nova-compute node. My current thinking is to start with the \"safest\" option, i.e. require manual force down, then loosen that later, if it turns out to make life too hard.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"4d97b760e96f0daaeed5c734f52518f471710f96","unresolved":true,"context_lines":[{"line_number":145,"context_line":"* Error if the ComputeNode type does not match the ironic driver."},{"line_number":146,"context_line":"* Find the related Service object for the above ComputeNode"},{"line_number":147,"context_line":"  (i.e. the host)"},{"line_number":148,"context_line":"* Error is the service object is not forced down"},{"line_number":149,"context_line":"  (similar to evactuate)"},{"line_number":150,"context_line":"* Check the Service object for the destination service host exists"},{"line_number":151,"context_line":"* Find all non-deleted instances for this (host,node)"},{"line_number":152,"context_line":"* Error if there is more than 1 non-deleted instance found."}],"source_content_type":"text/x-rst","patch_set":6,"id":"190054ab_3c6cd52e","line":149,"range":{"start_line":148,"start_character":2,"end_line":149,"end_character":24},"in_reply_to":"1968da46_60204a6b","updated":"2022-11-17 22:34:45.000000000","message":"Not explicitly forced would allow for a fairly simple rolling upgrade pattern with simplified operator interaction.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"3ad10fe95623c5edae6f57f8a5ff98b2a9f2a8db","unresolved":true,"context_lines":[{"line_number":145,"context_line":"* Error if the ComputeNode type does not match the ironic driver."},{"line_number":146,"context_line":"* Find the related Service object for the above ComputeNode"},{"line_number":147,"context_line":"  (i.e. the host)"},{"line_number":148,"context_line":"* Error is the service object is not forced down"},{"line_number":149,"context_line":"  (similar to evactuate)"},{"line_number":150,"context_line":"* Check the Service object for the destination service host exists"},{"line_number":151,"context_line":"* Find all non-deleted instances for this (host,node)"},{"line_number":152,"context_line":"* Error if there is more than 1 non-deleted instance found."}],"source_content_type":"text/x-rst","patch_set":6,"id":"1968da46_60204a6b","line":149,"range":{"start_line":148,"start_character":2,"end_line":149,"end_character":24},"in_reply_to":"5fe4f845_3db783e0","updated":"2022-11-17 14:17:28.000000000","message":"i kind of prefer explict but i guess either could work if we were to do this after the host that was running the old compute service had failed.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"87d7be69030fb642cffef746bce7bbd5f6653936","unresolved":true,"context_lines":[{"line_number":145,"context_line":"* Error if the ComputeNode type does not match the ironic driver."},{"line_number":146,"context_line":"* Find the related Service object for the above ComputeNode"},{"line_number":147,"context_line":"  (i.e. the host)"},{"line_number":148,"context_line":"* Error is the service object is not forced down"},{"line_number":149,"context_line":"  (similar to evactuate)"},{"line_number":150,"context_line":"* Check the Service object for the destination service host exists"},{"line_number":151,"context_line":"* Find all non-deleted instances for this (host,node)"},{"line_number":152,"context_line":"* Error if there is more than 1 non-deleted instance found."}],"source_content_type":"text/x-rst","patch_set":6,"id":"73ffd33c_7722892f","line":149,"range":{"start_line":148,"start_character":2,"end_line":149,"end_character":24},"in_reply_to":"9614aba2_79af99fe","updated":"2022-11-29 16:57:34.000000000","message":"We\u0027ve been in the business of making operator\u0027s lives hard, so anything we can do to make it easier improves their overall experience. I guess I\u0027m really not a fan of the forced down bit since as stated that is unrecoverable, and the reality is operators need to be able to manage their infrastructure.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7de0f87a57b7b876465a0a9aeff8c720c7bb8ffd","unresolved":true,"context_lines":[{"line_number":150,"context_line":"* Check the Service object for the destination service host exists"},{"line_number":151,"context_line":"* Find all non-deleted instances for this (host,node)"},{"line_number":152,"context_line":"* Error if there is more than 1 non-deleted instance found."},{"line_number":153,"context_line":"  It is OK if we find zero or 1 instances."},{"line_number":154,"context_line":"* Move ComputeNode object to the new service host"},{"line_number":155,"context_line":"* Move the instance to the destination service host"},{"line_number":156,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"eb11b10d_0b9baed4","line":153,"updated":"2022-11-15 10:40:50.000000000","message":"Is there any task_state value that would prevent a clean move of the instance to the different service? Does ironic support any kind of move operations? If yes then I think we should not move an instance to a different service while it is in that transitional state of being moved to another node.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7d1d7c4d652593f5cf80e6d2efcd897602a51103","unresolved":false,"context_lines":[{"line_number":150,"context_line":"* Check the Service object for the destination service host exists"},{"line_number":151,"context_line":"* Find all non-deleted instances for this (host,node)"},{"line_number":152,"context_line":"* Error if there is more than 1 non-deleted instance found."},{"line_number":153,"context_line":"  It is OK if we find zero or 1 instances."},{"line_number":154,"context_line":"* Move ComputeNode object to the new service host"},{"line_number":155,"context_line":"* Move the instance to the destination service host"},{"line_number":156,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"82f16806_d568cc35","line":153,"in_reply_to":"a71c000e_3008d7f0","updated":"2022-11-21 16:31:18.000000000","message":"Ack","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"3ad10fe95623c5edae6f57f8a5ff98b2a9f2a8db","unresolved":true,"context_lines":[{"line_number":150,"context_line":"* Check the Service object for the destination service host exists"},{"line_number":151,"context_line":"* Find all non-deleted instances for this (host,node)"},{"line_number":152,"context_line":"* Error if there is more than 1 non-deleted instance found."},{"line_number":153,"context_line":"  It is OK if we find zero or 1 instances."},{"line_number":154,"context_line":"* Move ComputeNode object to the new service host"},{"line_number":155,"context_line":"* Move the instance to the destination service host"},{"line_number":156,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"a71c000e_3008d7f0","line":153,"in_reply_to":"de9c84bf_5e27191a","updated":"2022-11-17 14:17:28.000000000","message":"i think gibi was asking if cold migrate or evacuate or shelve was supproted by ironinc\n\nto my knoladge the answer is no.\n\nit could be implmged for ironic nodes that boot form a cidner voluem pretty simply\n\nand techncially you could use the ironic python agent to snapshot/copy the disk on non volume based hosts but to my knoladdge ironic does nto implemetne this today.\n\n\nfrom anova point physically moving the node is fine provided the shared key and conductor grop dont change and teh uuid remains the same if it has an instance on it.\n\nif it does nto have an instance on it then operators are free to move it by delete and recreate or otherwise.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":10342,"name":"Jay Faulkner","display_name":"JayF","email":"jay@jvf.cc","username":"JayF","status":"youtube.com/@oss-gr / podcast.gr-oss.io"},"change_message_id":"ce331ec97c006e0174276ac9ba59cc86cc0f971e","unresolved":true,"context_lines":[{"line_number":150,"context_line":"* Check the Service object for the destination service host exists"},{"line_number":151,"context_line":"* Find all non-deleted instances for this (host,node)"},{"line_number":152,"context_line":"* Error if there is more than 1 non-deleted instance found."},{"line_number":153,"context_line":"  It is OK if we find zero or 1 instances."},{"line_number":154,"context_line":"* Move ComputeNode object to the new service host"},{"line_number":155,"context_line":"* Move the instance to the destination service host"},{"line_number":156,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"de9c84bf_5e27191a","line":153,"in_reply_to":"eb11b10d_0b9baed4","updated":"2022-11-15 16:14:02.000000000","message":"If you\u0027re physically moving an Ironic node, we\u0027d probably tell you to delete and recreate the node. An Ironic node in a different physical location means it likely needs different ports setup and perhaps even needs to change conductor groups.\n\nI\u0027ve worked places that did it manually; but it was just that: extremely manual. \n\nNote that from Ironic\u0027s POV; any nova-compute can perform actions on any node. Part of why we introduce this idea of sharding just because a single nova-compute (or python process, generally) can\u0027t load in all the node data and act on it quickly at medium or larger scale.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7de0f87a57b7b876465a0a9aeff8c720c7bb8ffd","unresolved":true,"context_lines":[{"line_number":151,"context_line":"* Find all non-deleted instances for this (host,node)"},{"line_number":152,"context_line":"* Error if there is more than 1 non-deleted instance found."},{"line_number":153,"context_line":"  It is OK if we find zero or 1 instances."},{"line_number":154,"context_line":"* Move ComputeNode object to the new service host"},{"line_number":155,"context_line":"* Move the instance to the destination service host"},{"line_number":156,"context_line":""},{"line_number":157,"context_line":"The above tool is expected to be used as part of this wider process"},{"line_number":158,"context_line":"of migrating from the old peer_list to the new shard key. There are"}],"source_content_type":"text/x-rst","patch_set":6,"id":"09a1da5f_57d0b9ab","line":155,"range":{"start_line":154,"start_character":0,"end_line":155,"end_character":51},"updated":"2022-11-15 10:40:50.000000000","message":"Do we need to do both of these in one single DB transaction per ComputeNode? In the below example migration cases the target nova-compute service is running while the ComputeNodes are moved to it. So it can pick up a moved ComputeNode while the instances are not yet moved along with it if we are not doing the move in a single DB transaction. I\u0027m not sure if picking up a ComputeNode without it\u0027s instance will lead to any problem but it feels scary :)","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"3ad10fe95623c5edae6f57f8a5ff98b2a9f2a8db","unresolved":true,"context_lines":[{"line_number":151,"context_line":"* Find all non-deleted instances for this (host,node)"},{"line_number":152,"context_line":"* Error if there is more than 1 non-deleted instance found."},{"line_number":153,"context_line":"  It is OK if we find zero or 1 instances."},{"line_number":154,"context_line":"* Move ComputeNode object to the new service host"},{"line_number":155,"context_line":"* Move the instance to the destination service host"},{"line_number":156,"context_line":""},{"line_number":157,"context_line":"The above tool is expected to be used as part of this wider process"},{"line_number":158,"context_line":"of migrating from the old peer_list to the new shard key. There are"}],"source_content_type":"text/x-rst","patch_set":6,"id":"5f58e7ad_aa89ee15","line":155,"range":{"start_line":154,"start_character":0,"end_line":155,"end_character":51},"in_reply_to":"09a1da5f_57d0b9ab","updated":"2022-11-17 14:17:28.000000000","message":"ya we proably should do this in a singel db transction.\n\nthis will just be two row level locks on the specific instance/compute node affected so that should be fine.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"f51f3d80564fda812b82d08594c21972f1239cbd","unresolved":true,"context_lines":[{"line_number":151,"context_line":"* Find all non-deleted instances for this (host,node)"},{"line_number":152,"context_line":"* Error if there is more than 1 non-deleted instance found."},{"line_number":153,"context_line":"  It is OK if we find zero or 1 instances."},{"line_number":154,"context_line":"* Move ComputeNode object to the new service host"},{"line_number":155,"context_line":"* Move the instance to the destination service host"},{"line_number":156,"context_line":""},{"line_number":157,"context_line":"The above tool is expected to be used as part of this wider process"},{"line_number":158,"context_line":"of migrating from the old peer_list to the new shard key. There are"}],"source_content_type":"text/x-rst","patch_set":6,"id":"7d2fae26_44c05d83","line":155,"range":{"start_line":154,"start_character":0,"end_line":155,"end_character":51},"in_reply_to":"5f58e7ad_aa89ee15","updated":"2022-11-25 12:18:55.000000000","message":"good idea, +1","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"3ad10fe95623c5edae6f57f8a5ff98b2a9f2a8db","unresolved":true,"context_lines":[{"line_number":166,"context_line":"Migrate from peer_list to single nova-compute"},{"line_number":167,"context_line":"---------------------------------------------"},{"line_number":168,"context_line":""},{"line_number":169,"context_line":"Small deployments are recommended to move from a peer_list of, for example,"},{"line_number":170,"context_line":"three nova-compute services, to a single nova-compute service."},{"line_number":171,"context_line":"On failure of the nova-compute service, operators can either manually start"},{"line_number":172,"context_line":"the processes on a new host, or use an automatic active-passive HA scheme."}],"source_content_type":"text/x-rst","patch_set":6,"id":"30184e86_f8cc26df","line":169,"range":{"start_line":169,"start_character":0,"end_line":169,"end_character":5},"updated":"2022-11-17 14:17:28.000000000","message":"where small is \u003c500-1000 ironic nodes? or smaller still in the 0-250 ish range.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":true,"context_lines":[{"line_number":166,"context_line":"Migrate from peer_list to single nova-compute"},{"line_number":167,"context_line":"---------------------------------------------"},{"line_number":168,"context_line":""},{"line_number":169,"context_line":"Small deployments are recommended to move from a peer_list of, for example,"},{"line_number":170,"context_line":"three nova-compute services, to a single nova-compute service."},{"line_number":171,"context_line":"On failure of the nova-compute service, operators can either manually start"},{"line_number":172,"context_line":"the processes on a new host, or use an automatic active-passive HA scheme."}],"source_content_type":"text/x-rst","patch_set":6,"id":"0de89625_bc504635","line":169,"range":{"start_line":169,"start_character":0,"end_line":169,"end_character":5},"in_reply_to":"30184e86_f8cc26df","updated":"2022-11-25 10:41:27.000000000","message":"hehe, I was hoping not to answer that. Probably \u003c500 is what I had in my head, but it feels like a wild guess. It probably depends on if you database is really slow or not. I should add a note on the scale here.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"3ad10fe95623c5edae6f57f8a5ff98b2a9f2a8db","unresolved":true,"context_lines":[{"line_number":176,"context_line":"* ironic and nova both default to an empty_shard key by default,"},{"line_number":177,"context_line":"  such that all ironic nodes are in the same default shard"},{"line_number":178,"context_line":"* start a new nova-compute service running the ironic driver,"},{"line_number":179,"context_line":"  ideally with a syntheic value for `[DEFAULT]host` e.g. `ironic`"},{"line_number":180,"context_line":"  This will log warnings about the need to use the nova-compute"},{"line_number":181,"context_line":"  migration tool before being able to manage any nodes"},{"line_number":182,"context_line":"* stop all existing nova-compute services"}],"source_content_type":"text/x-rst","patch_set":6,"id":"9ebf9119_4bfa070c","line":179,"range":{"start_line":179,"start_character":58,"end_line":179,"end_character":64},"updated":"2022-11-17 14:17:28.000000000","message":"here you are advising to use ironic since there will only be one ironic compute service right.\n\nin general i would advise that the ironic compute service should have there host value set to the shard key if one is set.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7de0f87a57b7b876465a0a9aeff8c720c7bb8ffd","unresolved":true,"context_lines":[{"line_number":176,"context_line":"* ironic and nova both default to an empty_shard key by default,"},{"line_number":177,"context_line":"  such that all ironic nodes are in the same default shard"},{"line_number":178,"context_line":"* start a new nova-compute service running the ironic driver,"},{"line_number":179,"context_line":"  ideally with a syntheic value for `[DEFAULT]host` e.g. `ironic`"},{"line_number":180,"context_line":"  This will log warnings about the need to use the nova-compute"},{"line_number":181,"context_line":"  migration tool before being able to manage any nodes"},{"line_number":182,"context_line":"* stop all existing nova-compute services"}],"source_content_type":"text/x-rst","patch_set":6,"id":"d50e7123_91f3dfd0","line":179,"range":{"start_line":179,"start_character":17,"end_line":179,"end_character":25},"updated":"2022-11-15 10:40:50.000000000","message":"nit: synthetic","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":true,"context_lines":[{"line_number":176,"context_line":"* ironic and nova both default to an empty_shard key by default,"},{"line_number":177,"context_line":"  such that all ironic nodes are in the same default shard"},{"line_number":178,"context_line":"* start a new nova-compute service running the ironic driver,"},{"line_number":179,"context_line":"  ideally with a syntheic value for `[DEFAULT]host` e.g. `ironic`"},{"line_number":180,"context_line":"  This will log warnings about the need to use the nova-compute"},{"line_number":181,"context_line":"  migration tool before being able to manage any nodes"},{"line_number":182,"context_line":"* stop all existing nova-compute services"}],"source_content_type":"text/x-rst","patch_set":6,"id":"6f7291fb_f2573417","line":179,"range":{"start_line":179,"start_character":58,"end_line":179,"end_character":64},"in_reply_to":"9ebf9119_4bfa070c","updated":"2022-11-25 10:41:27.000000000","message":"yeah, I agree.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7de0f87a57b7b876465a0a9aeff8c720c7bb8ffd","unresolved":true,"context_lines":[{"line_number":181,"context_line":"  migration tool before being able to manage any nodes"},{"line_number":182,"context_line":"* stop all existing nova-compute services"},{"line_number":183,"context_line":"* mark them as forced-down via the API"},{"line_number":184,"context_line":"* Now loop around all ironic nodes and call this, assusming your"},{"line_number":185,"context_line":"  now service has its host value of just `ironic`:"},{"line_number":186,"context_line":"  `nova_manage ironic-compute-node-move \u003cuuid\u003e --service ironic`"},{"line_number":187,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"31f1728a_fe5f6bed","line":184,"range":{"start_line":184,"start_character":50,"end_line":184,"end_character":59},"updated":"2022-11-15 10:40:50.000000000","message":"nit: assuming","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7de0f87a57b7b876465a0a9aeff8c720c7bb8ffd","unresolved":true,"context_lines":[{"line_number":182,"context_line":"* stop all existing nova-compute services"},{"line_number":183,"context_line":"* mark them as forced-down via the API"},{"line_number":184,"context_line":"* Now loop around all ironic nodes and call this, assusming your"},{"line_number":185,"context_line":"  now service has its host value of just `ironic`:"},{"line_number":186,"context_line":"  `nova_manage ironic-compute-node-move \u003cuuid\u003e --service ironic`"},{"line_number":187,"context_line":""},{"line_number":188,"context_line":"The periodic tasks in the new nova-compute service will gradually"}],"source_content_type":"text/x-rst","patch_set":6,"id":"d9558180_875a6b97","line":185,"range":{"start_line":185,"start_character":2,"end_line":185,"end_character":5},"updated":"2022-11-15 10:40:50.000000000","message":"nit: nova?","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7de0f87a57b7b876465a0a9aeff8c720c7bb8ffd","unresolved":true,"context_lines":[{"line_number":191,"context_line":""},{"line_number":192,"context_line":"Note: you can choose to start the new nova-compute service after"},{"line_number":193,"context_line":"having migrated all the ironic compute nodes, but that would likely"},{"line_number":194,"context_line":"lead to higher downtime duiring the migration."},{"line_number":195,"context_line":""},{"line_number":196,"context_line":"Migrate from peer_list to shard_key"},{"line_number":197,"context_line":"-----------------------------------"}],"source_content_type":"text/x-rst","patch_set":6,"id":"766e619d_773e0816","line":194,"range":{"start_line":194,"start_character":24,"end_line":194,"end_character":31},"updated":"2022-11-15 10:40:50.000000000","message":"nit: during","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"4d97b760e96f0daaeed5c734f52518f471710f96","unresolved":true,"context_lines":[{"line_number":205,"context_line":"  possibly setting a synthetic `[DEFAULT]host` value that matches the"},{"line_number":206,"context_line":"  `shard_key`."},{"line_number":207,"context_line":"* Shutdown all the older nova-compute processs with `[ironic]peer_list` set"},{"line_number":208,"context_line":"* Mark them all as forced down in the Ironic API"},{"line_number":209,"context_line":"* For each shard_key in Ironic, work out which service host you have mapped"},{"line_number":210,"context_line":"  each one to above, then run this for each ironic node uuid in the shard:"},{"line_number":211,"context_line":"  `nova_manage ironic-compute-node-move \u003cuuid\u003e --service \u003chost\u003e`"}],"source_content_type":"text/x-rst","patch_set":6,"id":"efc57d78_4a0abd62","line":208,"range":{"start_line":208,"start_character":1,"end_line":208,"end_character":48},"updated":"2022-11-17 22:34:45.000000000","message":"This is not a concept in ironic. A node can be in maintenance.\n\nBut I *think* your meaning the nova-compute service state.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":true,"context_lines":[{"line_number":205,"context_line":"  possibly setting a synthetic `[DEFAULT]host` value that matches the"},{"line_number":206,"context_line":"  `shard_key`."},{"line_number":207,"context_line":"* Shutdown all the older nova-compute processs with `[ironic]peer_list` set"},{"line_number":208,"context_line":"* Mark them all as forced down in the Ironic API"},{"line_number":209,"context_line":"* For each shard_key in Ironic, work out which service host you have mapped"},{"line_number":210,"context_line":"  each one to above, then run this for each ironic node uuid in the shard:"},{"line_number":211,"context_line":"  `nova_manage ironic-compute-node-move \u003cuuid\u003e --service \u003chost\u003e`"}],"source_content_type":"text/x-rst","patch_set":6,"id":"79b61a8f_1bdae1fa","line":208,"range":{"start_line":208,"start_character":1,"end_line":208,"end_character":48},"in_reply_to":"efc57d78_4a0abd62","updated":"2022-11-25 10:41:27.000000000","message":"oops, I do mean the Nova API, very good point!","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"4d97b760e96f0daaeed5c734f52518f471710f96","unresolved":true,"context_lines":[{"line_number":210,"context_line":"  each one to above, then run this for each ironic node uuid in the shard:"},{"line_number":211,"context_line":"  `nova_manage ironic-compute-node-move \u003cuuid\u003e --service \u003chost\u003e`"},{"line_number":212,"context_line":""},{"line_number":213,"context_line":"Note: you could start the new nova-compute services after the migration,"},{"line_number":214,"context_line":"but that would lead to a slightly longer downtime."},{"line_number":215,"context_line":""},{"line_number":216,"context_line":"Adding new compute nodes"},{"line_number":217,"context_line":"------------------------"}],"source_content_type":"text/x-rst","patch_set":6,"id":"17783d9c_d044ae02","line":214,"range":{"start_line":213,"start_character":0,"end_line":214,"end_character":50},"updated":"2022-11-17 22:34:45.000000000","message":"We should just standardize on running the new nova-compute service(s) with the shard_key configuration so nodes can be used immediately upon arrival in their new compute-node service destination. Since under the model, the nodes would gradually appear as the command is executed in their internal cache.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":true,"context_lines":[{"line_number":210,"context_line":"  each one to above, then run this for each ironic node uuid in the shard:"},{"line_number":211,"context_line":"  `nova_manage ironic-compute-node-move \u003cuuid\u003e --service \u003chost\u003e`"},{"line_number":212,"context_line":""},{"line_number":213,"context_line":"Note: you could start the new nova-compute services after the migration,"},{"line_number":214,"context_line":"but that would lead to a slightly longer downtime."},{"line_number":215,"context_line":""},{"line_number":216,"context_line":"Adding new compute nodes"},{"line_number":217,"context_line":"------------------------"}],"source_content_type":"text/x-rst","patch_set":6,"id":"3c555aca_ab95e39f","line":214,"range":{"start_line":213,"start_character":0,"end_line":214,"end_character":50},"in_reply_to":"17783d9c_d044ae02","updated":"2022-11-25 10:41:27.000000000","message":"Yeah, I don\u0027t think supporting both changes the code as such. But I should rephrase this as an alternative we have discarded.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"4d97b760e96f0daaeed5c734f52518f471710f96","unresolved":true,"context_lines":[{"line_number":230,"context_line":"To move an ironic node between shards, you need to delete"},{"line_number":231,"context_line":"the node in Ironic and re-add it:"},{"line_number":232,"context_line":""},{"line_number":233,"context_line":"* drain the node of Nova instances"},{"line_number":234,"context_line":"* delete the ironic node in Ironic"},{"line_number":235,"context_line":"* this will delete the placement entry and"},{"line_number":236,"context_line":"  ComputeNode DB entry"},{"line_number":237,"context_line":"* re-add as a new ironic node, with a fresh uuid,"},{"line_number":238,"context_line":"  in the destination shard"},{"line_number":239,"context_line":"* this new node will be treated like any"},{"line_number":240,"context_line":"  other new node"},{"line_number":241,"context_line":""},{"line_number":242,"context_line":"The alternative is to force down the current nova-compute"},{"line_number":243,"context_line":"process, and move all nodes it used to manage to a new"}],"source_content_type":"text/x-rst","patch_set":6,"id":"d1d13b0d_59081fa1","line":240,"range":{"start_line":233,"start_character":0,"end_line":240,"end_character":16},"updated":"2022-11-17 22:34:45.000000000","message":"Why don\u0027t we just teach the command to do the needful for us, beyond the very first revision/MVP. In part because I think there is an assumption here that no operator front loads the system UUID... and many at scale do front load the system UUID because it then matches a physical asset inventory and system GUID.\n\nAlso, deleting an ironic node means history gets lost. It introduces lots of risk for re-population of fields, especially considering things like passwords can\u0027t easily be gotten out of the API meaning we\u0027re adding more manual processes and credential tracking.\n\nThe ironic side field, if the user requesting the field change is intended to be restricted, so as long as the user the nova environment is configured with has sufficient RBAC rights, then we could permit it to be modified and the records could be reconciled/updated with no human copy/paste/manual credential identification/re-entry risk.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":true,"context_lines":[{"line_number":230,"context_line":"To move an ironic node between shards, you need to delete"},{"line_number":231,"context_line":"the node in Ironic and re-add it:"},{"line_number":232,"context_line":""},{"line_number":233,"context_line":"* drain the node of Nova instances"},{"line_number":234,"context_line":"* delete the ironic node in Ironic"},{"line_number":235,"context_line":"* this will delete the placement entry and"},{"line_number":236,"context_line":"  ComputeNode DB entry"},{"line_number":237,"context_line":"* re-add as a new ironic node, with a fresh uuid,"},{"line_number":238,"context_line":"  in the destination shard"},{"line_number":239,"context_line":"* this new node will be treated like any"},{"line_number":240,"context_line":"  other new node"},{"line_number":241,"context_line":""},{"line_number":242,"context_line":"The alternative is to force down the current nova-compute"},{"line_number":243,"context_line":"process, and move all nodes it used to manage to a new"}],"source_content_type":"text/x-rst","patch_set":6,"id":"2e4ff3f6_09d6627c","line":240,"range":{"start_line":233,"start_character":0,"end_line":240,"end_character":16},"in_reply_to":"d1d13b0d_59081fa1","updated":"2022-11-25 10:41:27.000000000","message":"Yeah, its nasty. This is all because of force-down. I don\u0027t like it.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"4d97b760e96f0daaeed5c734f52518f471710f96","unresolved":true,"context_lines":[{"line_number":240,"context_line":"  other new node"},{"line_number":241,"context_line":""},{"line_number":242,"context_line":"The alternative is to force down the current nova-compute"},{"line_number":243,"context_line":"process, and move all nodes it used to manage to a new"},{"line_number":244,"context_line":"nova-compute service."},{"line_number":245,"context_line":""},{"line_number":246,"context_line":"Move shards between nova-compute services"},{"line_number":247,"context_line":"-----------------------------------------"}],"source_content_type":"text/x-rst","patch_set":6,"id":"0cc4800e_af1610a0","line":244,"range":{"start_line":243,"start_character":18,"end_line":244,"end_character":21},"updated":"2022-11-17 22:34:45.000000000","message":"This is not correct, they wouldn\u0027t need to move everything to change one node. They would only need to change the one in the three places where it matters. (or a single command can do it for them!)","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":true,"context_lines":[{"line_number":240,"context_line":"  other new node"},{"line_number":241,"context_line":""},{"line_number":242,"context_line":"The alternative is to force down the current nova-compute"},{"line_number":243,"context_line":"process, and move all nodes it used to manage to a new"},{"line_number":244,"context_line":"nova-compute service."},{"line_number":245,"context_line":""},{"line_number":246,"context_line":"Move shards between nova-compute services"},{"line_number":247,"context_line":"-----------------------------------------"}],"source_content_type":"text/x-rst","patch_set":6,"id":"a7703b90_64b5c831","line":244,"range":{"start_line":243,"start_character":18,"end_line":244,"end_character":21},"in_reply_to":"0cc4800e_af1610a0","updated":"2022-11-25 10:41:27.000000000","message":"So this is because above I say you need to force-down a nova-compute service before you run the nova-manage command. There is no way to un-force-down the service, so you need to delete it and create a new one.\n\nFor clarity, I don\u0027t like this, its just a consequence of the safety first approach above.\n\nMaybe we should use maintenance mode instead, as that is reversible (thinking...)","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"4d97b760e96f0daaeed5c734f52518f471710f96","unresolved":true,"context_lines":[{"line_number":247,"context_line":"-----------------------------------------"},{"line_number":248,"context_line":""},{"line_number":249,"context_line":"To move a shard between nova-compute services, you need to"},{"line_number":250,"context_line":"replace the nova-compute process with a new one:"},{"line_number":251,"context_line":""},{"line_number":252,"context_line":"* ensure the destination nova-compute is configured with the"},{"line_number":253,"context_line":"  shard you want to move, and is running"}],"source_content_type":"text/x-rst","patch_set":6,"id":"84461e6e_67d01e0c","line":250,"updated":"2022-11-17 22:34:45.000000000","message":"I think we need to clarify here. There is no state data to the nova-compute service which is running.  And if I wanted to move an entire shard to a different vm/container/host running nova-compute, then I would just match the configuration and start the service after stopping the old one. There shouldn\u0027t need to be a command, afaik, which would need to be run across all of the nodes to effect that change. I guess, unless, we\u0027re talking a case where someone is running an explicitly different CONF.host value?\n\nNow, if you wanted to change the shard one or mode nodes reside in, then it seems like we\u0027re in the prior case above.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c5f49339c932b80453d81933620ef9a6c47621de","unresolved":true,"context_lines":[{"line_number":247,"context_line":"-----------------------------------------"},{"line_number":248,"context_line":""},{"line_number":249,"context_line":"To move a shard between nova-compute services, you need to"},{"line_number":250,"context_line":"replace the nova-compute process with a new one:"},{"line_number":251,"context_line":""},{"line_number":252,"context_line":"* ensure the destination nova-compute is configured with the"},{"line_number":253,"context_line":"  shard you want to move, and is running"}],"source_content_type":"text/x-rst","patch_set":6,"id":"73777a8b_00f8a0c3","line":250,"in_reply_to":"39926402_ad42fcce","updated":"2023-01-04 22:29:31.000000000","message":"At least at the moment, you can\u0027t really change the CONF.host of these computes any more than you can with a libvirt compute right? The compute node objects are still referenced to the service (which owns CONF.host and CONF.ironic.shard_key) by the service hostname at the time of creation.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"d631cb37319d8d5da9c668a34647f3660044e286","unresolved":true,"context_lines":[{"line_number":247,"context_line":"-----------------------------------------"},{"line_number":248,"context_line":""},{"line_number":249,"context_line":"To move a shard between nova-compute services, you need to"},{"line_number":250,"context_line":"replace the nova-compute process with a new one:"},{"line_number":251,"context_line":""},{"line_number":252,"context_line":"* ensure the destination nova-compute is configured with the"},{"line_number":253,"context_line":"  shard you want to move, and is running"}],"source_content_type":"text/x-rst","patch_set":6,"id":"39926402_ad42fcce","line":250,"in_reply_to":"679a7aaf_3d3ef1cd","updated":"2022-12-15 15:22:49.000000000","message":"OK, fair enough.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":true,"context_lines":[{"line_number":247,"context_line":"-----------------------------------------"},{"line_number":248,"context_line":""},{"line_number":249,"context_line":"To move a shard between nova-compute services, you need to"},{"line_number":250,"context_line":"replace the nova-compute process with a new one:"},{"line_number":251,"context_line":""},{"line_number":252,"context_line":"* ensure the destination nova-compute is configured with the"},{"line_number":253,"context_line":"  shard you want to move, and is running"}],"source_content_type":"text/x-rst","patch_set":6,"id":"679a7aaf_3d3ef1cd","line":250,"in_reply_to":"84461e6e_67d01e0c","updated":"2022-11-25 10:41:27.000000000","message":"Ah, I was thinking about the case of different CONF.host values here. I should add a section for \"moving the nova-compute service between hosts\"","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"4d97b760e96f0daaeed5c734f52518f471710f96","unresolved":true,"context_lines":[{"line_number":297,"context_line":"In addition, we should ensure we only delete a ComputeNode object"},{"line_number":298,"context_line":"when the driver explictly says its safe to delete. In the case of"},{"line_number":299,"context_line":"the Ironic driver, we should ensure the node no longer exists in"},{"line_number":300,"context_line":"Ironic, being sure to search across all shards."},{"line_number":301,"context_line":""},{"line_number":302,"context_line":"This is all every related this spec on robustfying"},{"line_number":303,"context_line":"the Compute Node and Service object relationship:"}],"source_content_type":"text/x-rst","patch_set":6,"id":"a43d6f96_1d66582f","line":300,"range":{"start_line":300,"start_character":8,"end_line":300,"end_character":47},"updated":"2022-11-17 22:34:45.000000000","message":"Good point, and you can ask it directly by UUID which is a plus.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"3ad10fe95623c5edae6f57f8a5ff98b2a9f2a8db","unresolved":true,"context_lines":[{"line_number":299,"context_line":"the Ironic driver, we should ensure the node no longer exists in"},{"line_number":300,"context_line":"Ironic, being sure to search across all shards."},{"line_number":301,"context_line":""},{"line_number":302,"context_line":"This is all every related this spec on robustfying"},{"line_number":303,"context_line":"the Compute Node and Service object relationship:"},{"line_number":304,"context_line":"https://review.opendev.org/c/openstack/nova-specs/+/853837"},{"line_number":305,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"d42e24ca_1df18c24","line":302,"range":{"start_line":302,"start_character":12,"end_line":302,"end_character":25},"updated":"2022-11-17 14:17:28.000000000","message":"very related to","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"3ad10fe95623c5edae6f57f8a5ff98b2a9f2a8db","unresolved":true,"context_lines":[{"line_number":335,"context_line":"We will rename the \"partition_key\" configuration to be expliclity"},{"line_number":336,"context_line":"\"conductor_group\"."},{"line_number":337,"context_line":""},{"line_number":338,"context_line":"We will deprecate the peer list key. When we start up and see"},{"line_number":339,"context_line":"anything set, we ommit a warning about the bugs in using this"},{"line_number":340,"context_line":"legacy auto sharding, and recomend moving to the explicit sharding."},{"line_number":341,"context_line":""},{"line_number":342,"context_line":"There is a new `shard_key` config, as descirbed above."},{"line_number":343,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"be36d5d8_787bca49","line":340,"range":{"start_line":338,"start_character":0,"end_line":340,"end_character":67},"updated":"2022-11-17 14:17:28.000000000","message":"so for a couple of cycle we will gain experince with this but i would like us to consider removal in the C or D release.\n\n\nthe main motivation is removing tooz as a dep and resucing the complexity in general by removing the hash ring logic.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":782,"name":"John Garbutt","email":"john@johngarbutt.com","username":"johngarbutt"},"change_message_id":"ce7b0f01a5e9e93f3a2981cb08aa770a219e39e3","unresolved":true,"context_lines":[{"line_number":335,"context_line":"We will rename the \"partition_key\" configuration to be expliclity"},{"line_number":336,"context_line":"\"conductor_group\"."},{"line_number":337,"context_line":""},{"line_number":338,"context_line":"We will deprecate the peer list key. When we start up and see"},{"line_number":339,"context_line":"anything set, we ommit a warning about the bugs in using this"},{"line_number":340,"context_line":"legacy auto sharding, and recomend moving to the explicit sharding."},{"line_number":341,"context_line":""},{"line_number":342,"context_line":"There is a new `shard_key` config, as descirbed above."},{"line_number":343,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"3287ef64_ffb4c7eb","line":340,"range":{"start_line":338,"start_character":0,"end_line":340,"end_character":67},"in_reply_to":"27d3b4da_c281d2f8","updated":"2022-11-25 10:41:27.000000000","message":"I am not too fussed on the when, I don\u0027t quite understand our new upgrade approach enough yet to comment.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"a3c3511e4f4d31a61ae6a7adb6340fa23b271c49","unresolved":true,"context_lines":[{"line_number":335,"context_line":"We will rename the \"partition_key\" configuration to be expliclity"},{"line_number":336,"context_line":"\"conductor_group\"."},{"line_number":337,"context_line":""},{"line_number":338,"context_line":"We will deprecate the peer list key. When we start up and see"},{"line_number":339,"context_line":"anything set, we ommit a warning about the bugs in using this"},{"line_number":340,"context_line":"legacy auto sharding, and recomend moving to the explicit sharding."},{"line_number":341,"context_line":""},{"line_number":342,"context_line":"There is a new `shard_key` config, as descirbed above."},{"line_number":343,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"66b61f6d_3d9c8a03","line":340,"range":{"start_line":338,"start_character":0,"end_line":340,"end_character":67},"in_reply_to":"3287ef64_ffb4c7eb","updated":"2022-11-29 18:06:26.000000000","message":"with the new lifecycel we can only remove deprecate functionality if it was deprecatexed in an odd or .1 releases so \n\nso functionality deprecated in 2023.1/antilope can be removed in 2023.2/B but funcitonality deprecated in B cannot be removed in C since that is the first .1 release it was deprecated.\n\n\nso deprecations that happen in B and C can be remvoed in D\ndeprecatrions in D and E can be removed in F\n\nassumeing actully stick to the example schdule\nhttps://github.com/openstack/governance/blob/master/resolutions/20220210-release-cadence-adjustment.rst#example-sequence\n\n\nso we are free to deprecate at any time\nwe just can remove the fuctionality until that deprecation notice has been included in a \"SLURP (Skip Level Upgrade Release Process)\" release.\n\nso by deprecating it in this cycle whcih is the first \"SLURP\" release we can drop the parmater in B if we want to but we can keep it to C or D just as simply.\n\nif we dont deprecated this release we are forced to keep it until D as teh deprecation noticec will have to go out in C even if we deprecated it in B.\n\n\nthat is my understandign anyway.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"d631cb37319d8d5da9c668a34647f3660044e286","unresolved":true,"context_lines":[{"line_number":335,"context_line":"We will rename the \"partition_key\" configuration to be expliclity"},{"line_number":336,"context_line":"\"conductor_group\"."},{"line_number":337,"context_line":""},{"line_number":338,"context_line":"We will deprecate the peer list key. When we start up and see"},{"line_number":339,"context_line":"anything set, we ommit a warning about the bugs in using this"},{"line_number":340,"context_line":"legacy auto sharding, and recomend moving to the explicit sharding."},{"line_number":341,"context_line":""},{"line_number":342,"context_line":"There is a new `shard_key` config, as descirbed above."},{"line_number":343,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"09ad272c_fbee4cc6","line":340,"range":{"start_line":338,"start_character":0,"end_line":340,"end_character":67},"in_reply_to":"66b61f6d_3d9c8a03","updated":"2022-12-15 15:22:49.000000000","message":"Surely we can discuss whenever to remove those deprecations for the next PTG if you want.","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"4d97b760e96f0daaeed5c734f52518f471710f96","unresolved":true,"context_lines":[{"line_number":335,"context_line":"We will rename the \"partition_key\" configuration to be expliclity"},{"line_number":336,"context_line":"\"conductor_group\"."},{"line_number":337,"context_line":""},{"line_number":338,"context_line":"We will deprecate the peer list key. When we start up and see"},{"line_number":339,"context_line":"anything set, we ommit a warning about the bugs in using this"},{"line_number":340,"context_line":"legacy auto sharding, and recomend moving to the explicit sharding."},{"line_number":341,"context_line":""},{"line_number":342,"context_line":"There is a new `shard_key` config, as descirbed above."},{"line_number":343,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"27d3b4da_c281d2f8","line":340,"range":{"start_line":338,"start_character":0,"end_line":340,"end_character":67},"in_reply_to":"be36d5d8_787bca49","updated":"2022-11-17 22:34:45.000000000","message":"If we can move forward quickly, *and* the nova-manage command is forgiving/does the needful, I don\u0027t see why we can\u0027t be aggressive on removal of the hash ring support. We\u0027ll want to make this relatively easy to move to, and that is critical. On a plus side, we\u0027re already working on the code for the storage and visibility of the keys, so that is a plus!","commit_id":"bdc2f2e0994ed666980a29c2228fd99401dfabee"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"d631cb37319d8d5da9c668a34647f3660044e286","unresolved":true,"context_lines":[{"line_number":23,"context_line":"conductor group. However, some conductor groups can be very large,"},{"line_number":24,"context_line":"servered by many ironic-conductor services."},{"line_number":25,"context_line":""},{"line_number":26,"context_line":"To help with this, Nova has attempted to dynamically spread ironic"},{"line_number":27,"context_line":"nodes between a set of nova-compute peers. While this work some of"},{"line_number":28,"context_line":"the time, there are some major limitations:"},{"line_number":29,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"ccb65790_9510e293","line":26,"range":{"start_line":26,"start_character":19,"end_line":26,"end_character":23},"updated":"2022-12-15 15:22:49.000000000","message":"tbc, the ironic virt driver, not any other nova service AFAIK.","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c5f49339c932b80453d81933620ef9a6c47621de","unresolved":true,"context_lines":[{"line_number":24,"context_line":"servered by many ironic-conductor services."},{"line_number":25,"context_line":""},{"line_number":26,"context_line":"To help with this, Nova has attempted to dynamically spread ironic"},{"line_number":27,"context_line":"nodes between a set of nova-compute peers. While this work some of"},{"line_number":28,"context_line":"the time, there are some major limitations:"},{"line_number":29,"context_line":""},{"line_number":30,"context_line":"* when one nova-compute is down, only unassigned ironic nodes can"}],"source_content_type":"text/x-rst","patch_set":7,"id":"212b5ada_327a099c","line":27,"range":{"start_line":27,"start_character":54,"end_line":27,"end_character":58},"updated":"2023-01-04 22:29:31.000000000","message":"\"works\"","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c5f49339c932b80453d81933620ef9a6c47621de","unresolved":true,"context_lines":[{"line_number":27,"context_line":"nodes between a set of nova-compute peers. While this work some of"},{"line_number":28,"context_line":"the time, there are some major limitations:"},{"line_number":29,"context_line":""},{"line_number":30,"context_line":"* when one nova-compute is down, only unassigned ironic nodes can"},{"line_number":31,"context_line":"  move to another nova-compute service"},{"line_number":32,"context_line":"* i.e. when one nova-compute is down, all ironic nodes with nova instances"},{"line_number":33,"context_line":"  associated with the down nova-compute service are unable to be"}],"source_content_type":"text/x-rst","patch_set":7,"id":"1ba79f7d_91e274b2","line":30,"range":{"start_line":30,"start_character":38,"end_line":30,"end_character":48},"updated":"2023-01-04 22:29:31.000000000","message":"\"unassigned\" meaning \"nodes without instances\" right? Maybe \"assigned\" means something in ironic land, but the limitation is on the nova side, and is \"has an instance or not\" right?","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"01ac0f5b882b705f540943e7abc12e269a2603dc","unresolved":true,"context_lines":[{"line_number":37,"context_line":"  In part because every nova-compute fetches all nodes, in a large enough"},{"line_number":38,"context_line":"  cloud, this can take over 24 hours."},{"line_number":39,"context_line":""},{"line_number":40,"context_line":"This spec about tweaking the way we shard Ironic compute nodes."},{"line_number":41,"context_line":"We need to stop violating deep assumptions in the compute manager"},{"line_number":42,"context_line":"code by moving to a more static ironic node partitions."},{"line_number":43,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"24bfb8bd_37e1f7c7","line":40,"range":{"start_line":40,"start_character":0,"end_line":40,"end_character":24},"updated":"2022-12-15 10:31:14.000000000","message":"nit: This spec is about ...","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"d631cb37319d8d5da9c668a34647f3660044e286","unresolved":false,"context_lines":[{"line_number":46,"context_line":""},{"line_number":47,"context_line":"Any users of the ironic driver that have more than one"},{"line_number":48,"context_line":"nova-compute service per conductor group should move to an"},{"line_number":49,"context_line":"active-passive failover mode."},{"line_number":50,"context_line":""},{"line_number":51,"context_line":"The new static sharding will be of paritcular interest for clouds"},{"line_number":52,"context_line":"with ironic conductor groups that are greater than around"}],"source_content_type":"text/x-rst","patch_set":7,"id":"b04400c9_6fb40815","line":49,"updated":"2022-12-15 15:22:49.000000000","message":"++","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"d631cb37319d8d5da9c668a34647f3660044e286","unresolved":true,"context_lines":[{"line_number":48,"context_line":"nova-compute service per conductor group should move to an"},{"line_number":49,"context_line":"active-passive failover mode."},{"line_number":50,"context_line":""},{"line_number":51,"context_line":"The new static sharding will be of paritcular interest for clouds"},{"line_number":52,"context_line":"with ironic conductor groups that are greater than around"},{"line_number":53,"context_line":"1000 baremetal nodes."},{"line_number":54,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"44941f4e_30fab36c","line":51,"range":{"start_line":51,"start_character":35,"end_line":51,"end_character":45},"updated":"2022-12-15 15:22:49.000000000","message":"nit: particular","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"d631cb37319d8d5da9c668a34647f3660044e286","unresolved":false,"context_lines":[{"line_number":52,"context_line":"with ironic conductor groups that are greater than around"},{"line_number":53,"context_line":"1000 baremetal nodes."},{"line_number":54,"context_line":""},{"line_number":55,"context_line":".. NOTE: many parts of this story work today but"},{"line_number":56,"context_line":" need better documentation:"},{"line_number":57,"context_line":""},{"line_number":58,"context_line":" * understanding the current scale limit of around 500-1000 ironic"}],"source_content_type":"text/x-rst","patch_set":7,"id":"2a5f4cf5_3f296943","line":55,"range":{"start_line":55,"start_character":3,"end_line":55,"end_character":7},"updated":"2022-12-15 15:22:49.000000000","message":"nit: not needing uppercase","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"d631cb37319d8d5da9c668a34647f3660044e286","unresolved":false,"context_lines":[{"line_number":60,"context_line":" * sharding ironic-conductors and nova-computes using"},{"line_number":61,"context_line":"  ironic conductor groups."},{"line_number":62,"context_line":"  Note: conductor groups have a specific use in Ironic"},{"line_number":63,"context_line":"  and this is not it, but it works for some users."},{"line_number":64,"context_line":" * active-passive failover for nova-compute services"},{"line_number":65,"context_line":"  running the ironic driver."},{"line_number":66,"context_line":"  Note: the time to start up a new process after a"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bb2979f4_f6d706e8","line":63,"updated":"2022-12-15 15:22:49.000000000","message":"just tbc, a conductor group is purely an Ironic name, not a Nova one. If we document it, we need to make it clear to make sure our users aren\u0027t uncorrectly understanding","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"d631cb37319d8d5da9c668a34647f3660044e286","unresolved":true,"context_lines":[{"line_number":72,"context_line":""},{"line_number":73,"context_line":"We add a new configuration option:"},{"line_number":74,"context_line":""},{"line_number":75,"context_line":"* [ironic] shard_key"},{"line_number":76,"context_line":""},{"line_number":77,"context_line":"By default, there will be no shard_key set, and we will continue to"},{"line_number":78,"context_line":"expose all ironic nodes from a single nova-compute process."}],"source_content_type":"text/x-rst","patch_set":7,"id":"4b046ea8_19f30682","line":75,"updated":"2022-12-15 15:22:49.000000000","message":"this is a StrOpt I guess","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"d631cb37319d8d5da9c668a34647f3660044e286","unresolved":false,"context_lines":[{"line_number":86,"context_line":""},{"line_number":87,"context_line":"When we look up a specific ironic node via a node uuid or"},{"line_number":88,"context_line":"instance uuid, we should not restrict that to either the shard key"},{"line_number":89,"context_line":"or conductor group."},{"line_number":90,"context_line":""},{"line_number":91,"context_line":"Similar to checking the instance uuid is still present on the Ironic"},{"line_number":92,"context_line":"node before performing an action, or ensuring there is no instance uuid"}],"source_content_type":"text/x-rst","patch_set":7,"id":"45ea765a_67dd5a50","line":89,"updated":"2022-12-15 15:22:49.000000000","message":"cool, so no API behaviour modification","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c5f49339c932b80453d81933620ef9a6c47621de","unresolved":true,"context_lines":[{"line_number":91,"context_line":"Similar to checking the instance uuid is still present on the Ironic"},{"line_number":92,"context_line":"node before performing an action, or ensuring there is no instance uuid"},{"line_number":93,"context_line":"before provisioning, we should also check the node is in the correct"},{"line_number":94,"context_line":"shard (and conductor group) before doing anything with that Ironic node."},{"line_number":95,"context_line":""},{"line_number":96,"context_line":"Config changes and Deprecations"},{"line_number":97,"context_line":"-------------------------------"}],"source_content_type":"text/x-rst","patch_set":7,"id":"88ec76ca_70cb0240","line":94,"updated":"2023-01-04 22:29:31.000000000","message":"I was going to ask, but maybe this is my answer:\n\nWhat happens if someone reassigns an ironic node, on the ironic side, to a different shard when it has an instance? The original nova-compute should not choke, and I\u0027m assuming the new nova-compute that suddenly sees it should be able to properly ignore it?","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"d631cb37319d8d5da9c668a34647f3660044e286","unresolved":true,"context_lines":[{"line_number":98,"context_line":""},{"line_number":99,"context_line":"We will keep the option to target a specific conductor group,"},{"line_number":100,"context_line":"but this option will be renamed from partition_key to conductor_group."},{"line_number":101,"context_line":"This is addative to the shard_key above, the target ironic nodes are"},{"line_number":102,"context_line":"those in both the correct `shard_key` and the correct `conductor_group`,"},{"line_number":103,"context_line":"when both are configured."},{"line_number":104,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"8b96f4a5_78d4efe5","line":101,"range":{"start_line":101,"start_character":8,"end_line":101,"end_character":16},"updated":"2022-12-15 15:22:49.000000000","message":"not : additive (I guess)","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"01ac0f5b882b705f540943e7abc12e269a2603dc","unresolved":true,"context_lines":[{"line_number":133,"context_line":"* Error if the service object is not reported as down, and"},{"line_number":134,"context_line":"  has not also been put into maintanance. We do not require"},{"line_number":135,"context_line":"  forced down, because we might only be moving a subset of"},{"line_number":136,"context_line":"  nodes associated with this nova-compute service."},{"line_number":137,"context_line":"* Check the Service object for the destination service host exists"},{"line_number":138,"context_line":"* Find all non-deleted instances for this (host,node)"},{"line_number":139,"context_line":"* Error if there is more than 1 non-deleted instance found."}],"source_content_type":"text/x-rst","patch_set":7,"id":"8c19d56c_d6e32daf","line":136,"updated":"2022-12-15 10:31:14.000000000","message":"this is a littel confusing but what that means is that the compute service must be stoped to run this command or force down can be used to mark the service as down but we wont require it.\n\n\nwhen we write the docs for this procedrue its proably impoatnt to ensure that all ironic compute serices are configured with the shareding key before you stop them so that rebalnce does not happen.","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"6785a22614b9d47ecf0ec77d3cb95656721dcd7f","unresolved":true,"context_lines":[{"line_number":133,"context_line":"* Error if the service object is not reported as down, and"},{"line_number":134,"context_line":"  has not also been put into maintanance. We do not require"},{"line_number":135,"context_line":"  forced down, because we might only be moving a subset of"},{"line_number":136,"context_line":"  nodes associated with this nova-compute service."},{"line_number":137,"context_line":"* Check the Service object for the destination service host exists"},{"line_number":138,"context_line":"* Find all non-deleted instances for this (host,node)"},{"line_number":139,"context_line":"* Error if there is more than 1 non-deleted instance found."}],"source_content_type":"text/x-rst","patch_set":7,"id":"4abaa519_286d7bb7","line":136,"in_reply_to":"3f75b8b3_f43c31de","updated":"2022-12-15 15:12:23.000000000","message":"ya combining upgrade and migration would not be a recommended approch\n\nyou shoudl upsgrade then after add the shared key and remove the peer_list\n\ni think we can cover that in the release notes and docs.","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":11655,"name":"Julia Kreger","email":"juliaashleykreger@gmail.com","username":"jkreger","status":"Flying to the moon with a Jetpack!"},"change_message_id":"e358eb379d0baa19840296d5472073f77915133b","unresolved":true,"context_lines":[{"line_number":133,"context_line":"* Error if the service object is not reported as down, and"},{"line_number":134,"context_line":"  has not also been put into maintanance. We do not require"},{"line_number":135,"context_line":"  forced down, because we might only be moving a subset of"},{"line_number":136,"context_line":"  nodes associated with this nova-compute service."},{"line_number":137,"context_line":"* Check the Service object for the destination service host exists"},{"line_number":138,"context_line":"* Find all non-deleted instances for this (host,node)"},{"line_number":139,"context_line":"* Error if there is more than 1 non-deleted instance found."}],"source_content_type":"text/x-rst","patch_set":7,"id":"3f75b8b3_f43c31de","line":136,"in_reply_to":"8c19d56c_d6e32daf","updated":"2022-12-15 14:50:36.000000000","message":"Just thinking outloud here: The presence of a shard key to prevent re-balance would be a tricky thing to navigate as other computes would need to be aware of that logic and exclude automatically.  Which... I guess could work as long as someone doesn\u0027t try to combine upgrade+migration at the same time. The plus side is the current state of the code does include all nodes with a matching ComputeNode.host into the local list of cached nodes upon which the ring is generated.","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c5f49339c932b80453d81933620ef9a6c47621de","unresolved":true,"context_lines":[{"line_number":134,"context_line":"  has not also been put into maintanance. We do not require"},{"line_number":135,"context_line":"  forced down, because we might only be moving a subset of"},{"line_number":136,"context_line":"  nodes associated with this nova-compute service."},{"line_number":137,"context_line":"* Check the Service object for the destination service host exists"},{"line_number":138,"context_line":"* Find all non-deleted instances for this (host,node)"},{"line_number":139,"context_line":"* Error if there is more than 1 non-deleted instance found."},{"line_number":140,"context_line":"  It is OK if we find zero or 1 instances."}],"source_content_type":"text/x-rst","patch_set":7,"id":"7f9f8764_52f45db1","line":137,"updated":"2023-01-04 22:29:31.000000000","message":"This can be up, but the source one has to be down? Seems risky to me to be jamming nodes into a running nova-compute...","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"d631cb37319d8d5da9c668a34647f3660044e286","unresolved":false,"context_lines":[{"line_number":140,"context_line":"  It is OK if we find zero or 1 instances."},{"line_number":141,"context_line":"* In one DB transaction:"},{"line_number":142,"context_line":"  move the ComputeNode object to the destination service host and"},{"line_number":143,"context_line":"  move the Instance (if there is one) to the destination service host"},{"line_number":144,"context_line":""},{"line_number":145,"context_line":"The above tool is expected to be used as part of this wider process"},{"line_number":146,"context_line":"of migrating from the old peer_list to the new shard key. There are"}],"source_content_type":"text/x-rst","patch_set":7,"id":"2d5314f1_27af26f2","line":143,"updated":"2022-12-15 15:22:49.000000000","message":"all the above actions lgtm.","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c5f49339c932b80453d81933620ef9a6c47621de","unresolved":true,"context_lines":[{"line_number":149,"context_line":""},{"line_number":150,"context_line":"* moving from a peer_list to a single nova-compute"},{"line_number":151,"context_line":"* moving from peer_list to shard_key, while keeping multiple nova-compute"},{"line_number":152,"context_line":"  proccesses (for a single conductor group)"},{"line_number":153,"context_line":""},{"line_number":154,"context_line":"Migrate from peer_list to single nova-compute"},{"line_number":155,"context_line":"---------------------------------------------"}],"source_content_type":"text/x-rst","patch_set":7,"id":"c7709367_1fe67b39","line":152,"updated":"2023-01-04 22:29:31.000000000","message":"This is to migrate compute nodes from peer_list to shard_key, requiring a sort of maintenance mode of forcing down the old peer_list service in order to do it, right?\n\nThis is not to become a general-purpose \"I want to reswizzle my ironic nodes at runtime\" tool for the future, is that correct?\n\nAssuming so, I think we might want to choose a less-generic name like \"migrate-ironic-node\" or \"do-that-thing-that-one-time\".","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c5f49339c932b80453d81933620ef9a6c47621de","unresolved":true,"context_lines":[{"line_number":174,"context_line":"  nova-compute service has its host value of just `ironic`:"},{"line_number":175,"context_line":"  `nova_manage ironic-compute-node-move \u003cuuid\u003e --service ironic`"},{"line_number":176,"context_line":""},{"line_number":177,"context_line":"The periodic tasks in the new nova-compute service will gradually"},{"line_number":178,"context_line":"pick up the new ComputeNodes, and will start being able to recieve"},{"line_number":179,"context_line":"commands such a reboot for all the moved instances."},{"line_number":180,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"414844f4_9349f9af","line":177,"updated":"2023-01-04 22:29:31.000000000","message":"Related to above, guess, but... are you sure this will work? ISTR that the resource tracker is not very tolerant of things changing underneath it, like if it enumerates nodes in two separate places but finds a different list...","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c5f49339c932b80453d81933620ef9a6c47621de","unresolved":true,"context_lines":[{"line_number":175,"context_line":"  `nova_manage ironic-compute-node-move \u003cuuid\u003e --service ironic`"},{"line_number":176,"context_line":""},{"line_number":177,"context_line":"The periodic tasks in the new nova-compute service will gradually"},{"line_number":178,"context_line":"pick up the new ComputeNodes, and will start being able to recieve"},{"line_number":179,"context_line":"commands such a reboot for all the moved instances."},{"line_number":180,"context_line":""},{"line_number":181,"context_line":"While you could start the new nova-compute service after"}],"source_content_type":"text/x-rst","patch_set":7,"id":"75de4dc9_83021e1a","line":178,"range":{"start_line":178,"start_character":59,"end_line":178,"end_character":66},"updated":"2023-01-04 22:29:31.000000000","message":"receive","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"d631cb37319d8d5da9c668a34647f3660044e286","unresolved":true,"context_lines":[{"line_number":185,"context_line":"Migrate from peer_list to shard_key"},{"line_number":186,"context_line":"-----------------------------------"},{"line_number":187,"context_line":""},{"line_number":188,"context_line":"The proccess to move from the hash key based peer_list to the static"},{"line_number":189,"context_line":"shard_key from ironic is very similar to the above process:"},{"line_number":190,"context_line":""},{"line_number":191,"context_line":"* Set the shard_key on all your ironic nodes, such that you can spread"}],"source_content_type":"text/x-rst","patch_set":7,"id":"09c7daed_f6b9eb63","line":188,"range":{"start_line":188,"start_character":4,"end_line":188,"end_character":12},"updated":"2022-12-15 15:22:49.000000000","message":"nit: process","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c5f49339c932b80453d81933620ef9a6c47621de","unresolved":true,"context_lines":[{"line_number":232,"context_line":"  process for the shard key it was moved into:"},{"line_number":233,"context_line":"  `nova_manage ironic-compute-node-move \u003cuuid\u003e --service my_shard_key`"},{"line_number":234,"context_line":"* Now unset maintanance mode for the nova-compute,"},{"line_number":235,"context_line":"  and start that service back up"},{"line_number":236,"context_line":""},{"line_number":237,"context_line":"Move shards between nova-compute services"},{"line_number":238,"context_line":"-----------------------------------------"}],"source_content_type":"text/x-rst","patch_set":7,"id":"8185e581_609fbd64","line":235,"updated":"2023-01-04 22:29:31.000000000","message":"You don\u0027t say here whether this is intended to work with instances on these nodes, hence my question earlier. I suppose if you\u0027re quiescing the nova-compute on both ends it\u0027ll be okay, but I guess I\u0027m not super excited about people potentially doing this a lot...","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c5f49339c932b80453d81933620ef9a6c47621de","unresolved":true,"context_lines":[{"line_number":241,"context_line":"replace the nova-compute process with a new one:"},{"line_number":242,"context_line":""},{"line_number":243,"context_line":"* ensure the destination nova-compute is configured with the"},{"line_number":244,"context_line":"  shard you want to move, and is running"},{"line_number":245,"context_line":"* stop the nova-compute process currently serving the shard"},{"line_number":246,"context_line":"* force-down the service via the API"},{"line_number":247,"context_line":"* for each ironic node uuid in the shard call nova-manage"}],"source_content_type":"text/x-rst","patch_set":7,"id":"dfdc8ce8_cdc7c9ba","line":244,"updated":"2023-01-04 22:29:31.000000000","message":"As above, this makes me nervous...","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c5f49339c932b80453d81933620ef9a6c47621de","unresolved":true,"context_lines":[{"line_number":254,"context_line":"before allowing the nova-manage to move the ironic nodes about,"},{"line_number":255,"context_line":"in a similar way to evacuate."},{"line_number":256,"context_line":"But this creates problems when trying to re-balance shards as you"},{"line_number":257,"context_line":"remove nodes at the end of their life."},{"line_number":258,"context_line":""},{"line_number":259,"context_line":"We could consider a list of shard keys, rather than a single shard key"},{"line_number":260,"context_line":"per nova-compute. But for this first version, we have chosen the simpler"}],"source_content_type":"text/x-rst","patch_set":7,"id":"dd6309ae_574554b8","line":257,"updated":"2023-01-04 22:29:31.000000000","message":"I don\u0027t *want* it to be arbitrarily restrictive, I just don\u0027t think I trust the current compute manager, resource tracker, etc stuff to handle this properly. Maybe a substantial survey could convince us (me) that it\u0027s okay, but...","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c5f49339c932b80453d81933620ef9a6c47621de","unresolved":true,"context_lines":[{"line_number":296,"context_line":"the Ironic driver, we should ensure the node no longer exists in"},{"line_number":297,"context_line":"Ironic, being sure to search across all shards."},{"line_number":298,"context_line":""},{"line_number":299,"context_line":"This is all very related this spec on robustfying"},{"line_number":300,"context_line":"the Compute Node and Service object relationship:"},{"line_number":301,"context_line":"https://review.opendev.org/c/openstack/nova-specs/+/853837"},{"line_number":302,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"2ba1b654_a99cceb8","line":299,"range":{"start_line":299,"start_character":17,"end_line":299,"end_character":24},"updated":"2023-01-04 22:29:31.000000000","message":"\"related to\"","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c5f49339c932b80453d81933620ef9a6c47621de","unresolved":true,"context_lines":[{"line_number":296,"context_line":"the Ironic driver, we should ensure the node no longer exists in"},{"line_number":297,"context_line":"Ironic, being sure to search across all shards."},{"line_number":298,"context_line":""},{"line_number":299,"context_line":"This is all very related this spec on robustfying"},{"line_number":300,"context_line":"the Compute Node and Service object relationship:"},{"line_number":301,"context_line":"https://review.opendev.org/c/openstack/nova-specs/+/853837"},{"line_number":302,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"9a76e102_19dfafe0","line":299,"range":{"start_line":299,"start_character":38,"end_line":299,"end_character":49},"updated":"2023-01-04 22:29:31.000000000","message":"\"robustifying\"\n\nPlease, spell the made-up words correctly ;P","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c5f49339c932b80453d81933620ef9a6c47621de","unresolved":true,"context_lines":[{"line_number":333,"context_line":"\"conductor_group\"."},{"line_number":334,"context_line":""},{"line_number":335,"context_line":"We will deprecate the peer list key. When we start up and see"},{"line_number":336,"context_line":"anything set, we ommit a warning about the bugs in using this"},{"line_number":337,"context_line":"legacy auto sharding, and recomend moving to the explicit sharding."},{"line_number":338,"context_line":""},{"line_number":339,"context_line":"There is a new `shard_key` config, as descirbed above."}],"source_content_type":"text/x-rst","patch_set":7,"id":"c399ab6c_81c32df5","line":336,"range":{"start_line":336,"start_character":17,"end_line":336,"end_character":22},"updated":"2023-01-04 22:29:31.000000000","message":"\"will emit\"","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c5f49339c932b80453d81933620ef9a6c47621de","unresolved":true,"context_lines":[{"line_number":334,"context_line":""},{"line_number":335,"context_line":"We will deprecate the peer list key. When we start up and see"},{"line_number":336,"context_line":"anything set, we ommit a warning about the bugs in using this"},{"line_number":337,"context_line":"legacy auto sharding, and recomend moving to the explicit sharding."},{"line_number":338,"context_line":""},{"line_number":339,"context_line":"There is a new `shard_key` config, as descirbed above."},{"line_number":340,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"a210f091_14b6d4df","line":337,"range":{"start_line":337,"start_character":26,"end_line":337,"end_character":34},"updated":"2023-01-04 22:29:31.000000000","message":"\"recommend\"","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c5f49339c932b80453d81933620ef9a6c47621de","unresolved":true,"context_lines":[{"line_number":336,"context_line":"anything set, we ommit a warning about the bugs in using this"},{"line_number":337,"context_line":"legacy auto sharding, and recomend moving to the explicit sharding."},{"line_number":338,"context_line":""},{"line_number":339,"context_line":"There is a new `shard_key` config, as descirbed above."},{"line_number":340,"context_line":""},{"line_number":341,"context_line":"There is a new nova_manage CLI command to move Ironic compute nodes"},{"line_number":342,"context_line":"on forced-down nova-compute services to a new one."}],"source_content_type":"text/x-rst","patch_set":7,"id":"d55c3436_8b9d32af","line":339,"range":{"start_line":339,"start_character":38,"end_line":339,"end_character":47},"updated":"2023-01-04 22:29:31.000000000","message":"\"described\"","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"c5f49339c932b80453d81933620ef9a6c47621de","unresolved":true,"context_lines":[{"line_number":394,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":395,"context_line":""},{"line_number":396,"context_line":"We need some functional tests for the nova-manage command to ensure"},{"line_number":397,"context_line":"all of the safty guards work as expected."},{"line_number":398,"context_line":""},{"line_number":399,"context_line":"Documentation Impact"},{"line_number":400,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":7,"id":"446232ce_96f93f92","line":397,"range":{"start_line":397,"start_character":11,"end_line":397,"end_character":16},"updated":"2023-01-04 22:29:31.000000000","message":"\"safety\"","commit_id":"e51a52e3e3026c0b3ea976fc813e372e56d64855"}]}
