)]}'
{"/PATCHSET_LEVEL":[{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"aafe2bd2999fcccc1574c3f325bcce89e5478659","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"2b524f46_b1f978d1","updated":"2026-06-04 14:31:52.000000000","message":"+1 on the idea -1 is for some wording improment","commit_id":"61fc2c2bdc4e4f79c97e34f44c01d737c164b4d8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"0cad97405216f2cb6c77dd8661ba997ac5d93f41","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"4b75d05b_ed0240c6","updated":"2026-06-05 11:09:23.000000000","message":"i want to see what other think but i am ok with this directionally so +1 for now\n\nthis is small enough that i can proably reive wthe implemation if others agree","commit_id":"f8fdcafb23def136b805c9b2016a3e51d11fceba"}],"specs/2026.2/approved/reduce-failed-builds-value-by-time.rst":[{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"aafe2bd2999fcccc1574c3f325bcce89e5478659","unresolved":true,"context_lines":[{"line_number":22,"context_line":"on each failed host to reset the ``failed_builds`` value"},{"line_number":23,"context_line":"which is operationally inconvenient."},{"line_number":24,"context_line":"Otherwise, a failed host keeps the ``failed_builds`` value for a long time."},{"line_number":25,"context_line":""},{"line_number":26,"context_line":""},{"line_number":27,"context_line":"Use Cases"},{"line_number":28,"context_line":"---------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"beab4dff_0ae54d91","line":25,"updated":"2026-06-04 14:31:52.000000000","message":"technially its only reset if a build succed or i eblvie if you restart the agent\n\nyou can of couse reduce the weight of the failed build weihter or increase the randomness of the schduler to reduce the impact of this failed build count and try to use the host more often as existing mitigations\n\nwith all that said the probelm dectipon doe not really capture the pain point\n\n```\nNova has a feature to avoid schduleing instnace to host that have failed to build a vm previously, in the past this feature was implemented by disabling the host and later it was updatred to recored the count of ``failed_builds``. \nToday this value is only reset if a build succeds but the default wehigh applies a very large weight to aovid host with a failed builds.\n\nThis is a good default for large clouds but it has a drawback\nfor small cloud it can lead to a severe imbalance in workload distibution\nuntil the capstiy of the remaining nodes is consumed. this also\nhappens for large cloud but the effect is less pernouched\n\noperator currently has a few mitigation none of which are ideal.\n1 manually schedule a test vm to the relevant nodes to reset the count\n2 change the scheduler to introduce randomness\n3 reduce the weight multiplier of the failed build weigher or disable it entirely.\n\n1 is not ideal because there is not way to see which node have failed_build !\u003d0 easily\n2 leads to sub optimal scheduling in general\n3 will make it more likely that nova will select genuinely broken compute nodes\n```","commit_id":"61fc2c2bdc4e4f79c97e34f44c01d737c164b4d8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"0cad97405216f2cb6c77dd8661ba997ac5d93f41","unresolved":true,"context_lines":[{"line_number":22,"context_line":"on each failed host to reset the ``failed_builds`` value"},{"line_number":23,"context_line":"which is operationally inconvenient."},{"line_number":24,"context_line":"Otherwise, a failed host keeps the ``failed_builds`` value for a long time."},{"line_number":25,"context_line":""},{"line_number":26,"context_line":""},{"line_number":27,"context_line":"Use Cases"},{"line_number":28,"context_line":"---------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"7dd0d98a_b8e9bb3f","line":25,"in_reply_to":"7f9df3c1_1368ec28","updated":"2026-06-05 11:09:23.000000000","message":"your right ti looks like we explcitly didnt clear it on restart\n\nhttps://github.com/openstack/nova/commit/91e29079a0eac825c5f4fe793cf607cb1771467d#diff-ed9525d7ae319fd575249ed72daf634d27182e08fea7a4f740cb3164233612b7R607\n\nthat may be something we should recondier.","commit_id":"61fc2c2bdc4e4f79c97e34f44c01d737c164b4d8"},{"author":{"_account_id":35674,"name":"ChungWon Lee","display_name":"cw0306-lee","email":"cw0306.lee@samsung.com","username":"cw0306-lee"},"change_message_id":"96bde7cd141c90e75d9859801f8c3c4c7e13f3f9","unresolved":true,"context_lines":[{"line_number":22,"context_line":"on each failed host to reset the ``failed_builds`` value"},{"line_number":23,"context_line":"which is operationally inconvenient."},{"line_number":24,"context_line":"Otherwise, a failed host keeps the ``failed_builds`` value for a long time."},{"line_number":25,"context_line":""},{"line_number":26,"context_line":""},{"line_number":27,"context_line":"Use Cases"},{"line_number":28,"context_line":"---------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"7f9df3c1_1368ec28","line":25,"in_reply_to":"beab4dff_0ae54d91","updated":"2026-06-05 10:27:38.000000000","message":"https://github.com/openstack/nova/blob/master/nova/compute/resource_tracker.py#L848\nFrom a code, \u0027builds_failed\u0027 will not be initialized by restarting.\nThis may need verification.","commit_id":"61fc2c2bdc4e4f79c97e34f44c01d737c164b4d8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"aafe2bd2999fcccc1574c3f325bcce89e5478659","unresolved":true,"context_lines":[{"line_number":27,"context_line":"Use Cases"},{"line_number":28,"context_line":"---------"},{"line_number":29,"context_line":""},{"line_number":30,"context_line":"- As an operator, I may not need to create VMs"},{"line_number":31,"context_line":"  on each host to rollback ``failed_builds`` value"},{"line_number":32,"context_line":"  when several VMs fail to build on several hosts."},{"line_number":33,"context_line":""},{"line_number":34,"context_line":"Proposed change"},{"line_number":35,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":2,"id":"16c85eee_c48ad7f6","line":32,"range":{"start_line":30,"start_character":1,"end_line":32,"end_character":50},"updated":"2026-06-04 14:31:52.000000000","message":"```suggestion\nAs an operator, i would like nova to avoid host that failed to create vms\nby default based on the number of failed attempt since the last good build.\n\nAs an operator, i would like nova to automatically decay the failed build count\nso that transient failure due to load such as timeout don\u0027t make the schudler\navoid the host indefinitely unless there is no other choice.\n```","commit_id":"61fc2c2bdc4e4f79c97e34f44c01d737c164b4d8"},{"author":{"_account_id":35674,"name":"ChungWon Lee","display_name":"cw0306-lee","email":"cw0306.lee@samsung.com","username":"cw0306-lee"},"change_message_id":"96bde7cd141c90e75d9859801f8c3c4c7e13f3f9","unresolved":true,"context_lines":[{"line_number":27,"context_line":"Use Cases"},{"line_number":28,"context_line":"---------"},{"line_number":29,"context_line":""},{"line_number":30,"context_line":"- As an operator, I may not need to create VMs"},{"line_number":31,"context_line":"  on each host to rollback ``failed_builds`` value"},{"line_number":32,"context_line":"  when several VMs fail to build on several hosts."},{"line_number":33,"context_line":""},{"line_number":34,"context_line":"Proposed change"},{"line_number":35,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":2,"id":"74023a66_7f2450de","line":32,"range":{"start_line":30,"start_character":1,"end_line":32,"end_character":50},"in_reply_to":"16c85eee_c48ad7f6","updated":"2026-06-05 10:27:38.000000000","message":"@seanmooney8202@yahoo.ie, I fixed probelm discussion and use cases with you review. Thank you!","commit_id":"61fc2c2bdc4e4f79c97e34f44c01d737c164b4d8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"0cad97405216f2cb6c77dd8661ba997ac5d93f41","unresolved":false,"context_lines":[{"line_number":27,"context_line":"Use Cases"},{"line_number":28,"context_line":"---------"},{"line_number":29,"context_line":""},{"line_number":30,"context_line":"- As an operator, I may not need to create VMs"},{"line_number":31,"context_line":"  on each host to rollback ``failed_builds`` value"},{"line_number":32,"context_line":"  when several VMs fail to build on several hosts."},{"line_number":33,"context_line":""},{"line_number":34,"context_line":"Proposed change"},{"line_number":35,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":2,"id":"1506c528_f6b9a52a","line":32,"range":{"start_line":30,"start_character":1,"end_line":32,"end_character":50},"in_reply_to":"74023a66_7f2450de","updated":"2026-06-05 11:09:23.000000000","message":"Done","commit_id":"61fc2c2bdc4e4f79c97e34f44c01d737c164b4d8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"aafe2bd2999fcccc1574c3f325bcce89e5478659","unresolved":true,"context_lines":[{"line_number":33,"context_line":""},{"line_number":34,"context_line":"Proposed change"},{"line_number":35,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":36,"context_line":""},{"line_number":37,"context_line":"Reduce ``failed_builds`` value gradually not only by successful VM builds"},{"line_number":38,"context_line":"but also time-based."},{"line_number":39,"context_line":""},{"line_number":40,"context_line":"Alternatives"},{"line_number":41,"context_line":"------------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"5a212cfc_92832ecc","line":38,"range":{"start_line":36,"start_character":1,"end_line":38,"end_character":20},"updated":"2026-06-04 14:31:52.000000000","message":"The compute agent will be modified to preodiclly decay the failed build value based on a configuration option.\n\nthis will allow nova to be resilient to intermittent failure such as timeout due to load while still avoiding host that consistently fail to build vms.","commit_id":"61fc2c2bdc4e4f79c97e34f44c01d737c164b4d8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"aafe2bd2999fcccc1574c3f325bcce89e5478659","unresolved":true,"context_lines":[{"line_number":40,"context_line":"Alternatives"},{"line_number":41,"context_line":"------------"},{"line_number":42,"context_line":""},{"line_number":43,"context_line":"An admin API could be provided to change the value."},{"line_number":44,"context_line":""},{"line_number":45,"context_line":"Data model impact"},{"line_number":46,"context_line":"-----------------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"39205171_cac2a101","line":43,"updated":"2026-06-04 14:31:52.000000000","message":"yes we could expose it via the hypervisors API and add a way to reset it\nsimilarly we could provide  a nova-mange command for the same\n\ni think the periodic decay of the value is better however as nova would then self heal.","commit_id":"61fc2c2bdc4e4f79c97e34f44c01d737c164b4d8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"66ec09775040f76446383bd500f358794ace6ae3","unresolved":true,"context_lines":[{"line_number":75,"context_line":""},{"line_number":76,"context_line":"Other deployer impact"},{"line_number":77,"context_line":"---------------------"},{"line_number":78,"context_line":""},{"line_number":79,"context_line":"``[compute]build_failed_reduce_interval`` config option will be added."},{"line_number":80,"context_line":"Default is -1 (disabled) and can be enabled with a positive value or 0."},{"line_number":81,"context_line":""},{"line_number":82,"context_line":"Developer impact"},{"line_number":83,"context_line":"----------------"}],"source_content_type":"text/x-rst","patch_set":2,"id":"64feafac_45de5cdb","line":80,"range":{"start_line":78,"start_character":1,"end_line":80,"end_character":71},"updated":"2026-06-04 14:32:46.000000000","message":"so -1 should be disabeld 0 would be defualt perodic interval and then any positive value woudl be the interval to run on","commit_id":"61fc2c2bdc4e4f79c97e34f44c01d737c164b4d8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"0cad97405216f2cb6c77dd8661ba997ac5d93f41","unresolved":true,"context_lines":[{"line_number":140,"context_line":"Testing"},{"line_number":141,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":142,"context_line":""},{"line_number":143,"context_line":"* Unit test"},{"line_number":144,"context_line":""},{"line_number":145,"context_line":"Documentation Impact"},{"line_number":146,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":4,"id":"9a19a66e_8de8e583","line":143,"updated":"2026-06-05 11:09:23.000000000","message":"we can run perodics explicty in the functional test so would expect use to create a functional test that will simulate failed builes, assert the build count is non zero and then un the perod and observe teh value in the db is decremented over time.","commit_id":"f8fdcafb23def136b805c9b2016a3e51d11fceba"}]}
