)]}'
{"/COMMIT_MSG":[{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"0cfe204020f31032bb6720d3c4d2dde3840b768a","unresolved":true,"context_lines":[{"line_number":11,"context_line":"in the logs."},{"line_number":12,"context_line":""},{"line_number":13,"context_line":"This wraps the function in a safe way to return an empty array"},{"line_number":14,"context_line":"if it fails, which will clean-up the logs if the device disappears"},{"line_number":15,"context_line":""},{"line_number":16,"context_line":"Closes-Bug: #1972028"},{"line_number":17,"context_line":"Change-Id: I46d3bbe122d9f8452f168286391bab67ecea3128"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":2,"id":"c8efaba9_e94674f5","line":14,"updated":"2022-05-17 10:22:47.000000000","message":"So today if the race happens then the periodic update_available_resource call will just exit early without doing any update[1]. \n\nAfter the fix the periodic will succeed but some devices might have empty capabilities list which can result in that devices stop being considered as netdevs or vdpa devs. I\u0027m concerned that this might lead to resource tracking inconsistencies. So I suggest to create some functional test in the libvirt functional env that simulates the race with this fix and shows that we don\u0027t end up in an inconsistent state after the race.\n\n[1] https://github.com/openstack/nova/blob/4939318649650b60dd07d161b80909e70d0e093e/nova/compute/manager.py#L10134","commit_id":"14c9ebcf3442e514d6346b712814e55b22c6c210"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"031779f305749aef93abed5da8c9c1f549ace56f","unresolved":false,"context_lines":[{"line_number":11,"context_line":"in the logs."},{"line_number":12,"context_line":""},{"line_number":13,"context_line":"This wraps the function in a safe way to return an empty array"},{"line_number":14,"context_line":"if it fails, which will clean-up the logs if the device disappears"},{"line_number":15,"context_line":""},{"line_number":16,"context_line":"Closes-Bug: #1972028"},{"line_number":17,"context_line":"Change-Id: I46d3bbe122d9f8452f168286391bab67ecea3128"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":2,"id":"b02e27ea_39706c6c","line":14,"in_reply_to":"c8efaba9_e94674f5","updated":"2022-05-17 17:53:17.000000000","message":"OK it seem to be more complicated than that.\n\nThe pci_tracker once initialized does not care about the changes of the pci devices inventory during the update_available_resource() periodic. So if the compute is started without hitting the race in device caps then later races does not cause any issues in the pci inventory. \n\nIn the other hand if the race happens in during compute startup when the pci tracker is initialized, then with the current patch some device will simply missing from the tracker as they will not have the any capabilities not even \u0027pci\u0027. And this is not fixed by the periodic run as stated above.\n\nWithout this fix if the race happens at startup then the pci tracker will not be initialized but the compute will start. Then if the next periodic runs without the race then that will initialize the pci tracker properly.\n\nSo I think after this fix we can create situation where after a compute startup some pci device will missing from the tracker and the only way to fix that is to restart the compute (and hope that it will not race at that time).\n\n// later \n\n/o\\ now I realized that the whole race is about devices disappearing from libvirt perspective and the result of the race with the current fix is that we will not track the device that is disappeared. Now the whole thing make sense.\n\nCould you please extend the comment in the code with some of this explanation (suggested inline there).","commit_id":"14c9ebcf3442e514d6346b712814e55b22c6c210"}],"/PATCHSET_LEVEL":[{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"0cfe204020f31032bb6720d3c4d2dde3840b768a","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"8d0a2dbb_efc2d0fe","updated":"2022-05-17 10:22:47.000000000","message":"I have some concerns pci devices flipping between being a netdev (when no racing) then not being a netdev (when racing).","commit_id":"14c9ebcf3442e514d6346b712814e55b22c6c210"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"031779f305749aef93abed5da8c9c1f549ace56f","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"c8bb391a_b658d0d6","updated":"2022-05-17 17:53:17.000000000","message":"Keeping the -1 but just to add more explanation to the code comment. The fix itself make sense now.","commit_id":"14c9ebcf3442e514d6346b712814e55b22c6c210"},{"author":{"_account_id":1004,"name":"Mohammed Naser","email":"mnaser@vexxhost.com","username":"mnaser"},"change_message_id":"777d0f6516144403ab0ed134597b8b7f5b494c61","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"a89e526a_881ed66b","updated":"2022-05-10 15:12:21.000000000","message":"recheck","commit_id":"14c9ebcf3442e514d6346b712814e55b22c6c210"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"92c5d03d025e43af6c464b789d5044c3634c3963","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":3,"id":"5df2e307_6da7ea80","updated":"2022-05-18 13:03:00.000000000","message":"Thanks! ","commit_id":"8534499b4a76a8aaf39005f251da33a25e95a67c"}],"nova/virt/libvirt/driver.py":[{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"031779f305749aef93abed5da8c9c1f549ace56f","unresolved":true,"context_lines":[{"line_number":7953,"context_line":""},{"line_number":7954,"context_line":"        # NOTE(mnaser): The listCaps() function can raise an exception if the"},{"line_number":7955,"context_line":"        #               device disappeared while we\u0027re looping, this method"},{"line_number":7956,"context_line":"        #               returns an empty list rather than raising an exception."},{"line_number":7957,"context_line":"        def _safe_list_caps(dev):"},{"line_number":7958,"context_line":"            try:"},{"line_number":7959,"context_line":"                return dev.listCaps()"}],"source_content_type":"text/x-python","patch_set":2,"id":"c6c8e545_0ce0652e","line":7956,"updated":"2022-05-17 17:53:17.000000000","message":"Could you please add that if we are returning empty capabilities that will mean this device will not be tracked by nova as it won\u0027t have pci capability. But that is OK as the device is disappeared from libvirt too.","commit_id":"14c9ebcf3442e514d6346b712814e55b22c6c210"},{"author":{"_account_id":1004,"name":"Mohammed Naser","email":"mnaser@vexxhost.com","username":"mnaser"},"change_message_id":"4c2b835ee975b860f64a64c77f11cfe9ed3ee5bf","unresolved":false,"context_lines":[{"line_number":7953,"context_line":""},{"line_number":7954,"context_line":"        # NOTE(mnaser): The listCaps() function can raise an exception if the"},{"line_number":7955,"context_line":"        #               device disappeared while we\u0027re looping, this method"},{"line_number":7956,"context_line":"        #               returns an empty list rather than raising an exception."},{"line_number":7957,"context_line":"        def _safe_list_caps(dev):"},{"line_number":7958,"context_line":"            try:"},{"line_number":7959,"context_line":"                return dev.listCaps()"}],"source_content_type":"text/x-python","patch_set":2,"id":"4bc4c50f_0133c809","line":7956,"in_reply_to":"c6c8e545_0ce0652e","updated":"2022-05-18 12:49:47.000000000","message":"done!","commit_id":"14c9ebcf3442e514d6346b712814e55b22c6c210"}]}
