)]}'
{"/COMMIT_MSG":[{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"db7b34321a44bbcd6de4baeee9076da24cad08d9","unresolved":false,"context_lines":[{"line_number":15,"context_line":"earlier in the method."},{"line_number":16,"context_line":""},{"line_number":17,"context_line":"Change-Id: I0907a541ed27c9096d649eceef567d5501cc2591"},{"line_number":18,"context_line":"Closes-Bug: #1834691"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":1,"id":"9fb8cfa7_d338e5ee","line":18,"range":{"start_line":18,"start_character":13,"end_line":18,"end_character":20},"updated":"2019-06-28 21:42:45.000000000","message":"1834694","commit_id":"dc6a366c3d7aafb24c4167806b4f99d8d64ddf0a"}],"nova/scheduler/host_manager.py":[{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"bf32e8ebecac7ba6935016f2237b4a59305e737b","unresolved":false,"context_lines":[{"line_number":623,"context_line":"            services \u003d objects.ServiceList.get_by_binary("},{"line_number":624,"context_line":"                cctxt, \u0027nova-compute\u0027, include_disabled\u003dTrue)"},{"line_number":625,"context_line":"            if compute_uuids is None:"},{"line_number":626,"context_line":"                return services, objects.ComputeNodeList.get_all(cctxt)"},{"line_number":627,"context_line":"            else:"},{"line_number":628,"context_line":"                return services, objects.ComputeNodeList.get_all_by_uuids("},{"line_number":629,"context_line":"                    cctxt, compute_uuids)"}],"source_content_type":"text/x-python","patch_set":1,"id":"9fb8cfa7_13167da9","line":626,"updated":"2019-06-28 21:31:49.000000000","message":"We could optimize by filtering out any compute nodes where free_disk_gb is null in the DB API query here. But really this is a weird race to hit in the scheduler between the time the compute node resource provider is reported to placement and the resource tracker updates the compute node record in the DB (unless the stale node is getting cached somehow in the scheduler?).\n\nTracing the compute node creation and update and stuff in the resource tracker, the node is created in the DB here:\n\nhttps://github.com/openstack/nova/blob/324da0532f3b59aa16233a93a260d289e55860fb/nova/compute/resource_tracker.py#L595\n\nnote that at this point the free_disk_gb field isn\u0027t set, that gets set later here:\n\nhttps://github.com/openstack/nova/blob/324da0532f3b59aa16233a93a260d289e55860fb/nova/compute/resource_tracker.py#L1036\n\nAnd _update_usage looks like it\u0027s only called during claims and when periodically updating resource usage for instances and migrations on the host, which wouldn\u0027t be anything on a new compute. But then how do we ever use this compute if HostManager._update_from_compute_node requires free_disk_gb to be set? Oh it looks like RT _update_usage_from_instances will set the field even if there aren\u0027t any instances on the host:\n\nhttps://github.com/openstack/nova/blob/324da0532f3b59aa16233a93a260d289e55860fb/nova/compute/resource_tracker.py#L1259\n\nThat\u0027s pretty dumb - that should probably be handled in ComputeNode.update_from_virt_driver or something outside of _update_usage_from_instances. Anyway, we should then save those changes off here:\n\nhttps://github.com/openstack/nova/blob/324da0532f3b59aa16233a93a260d289e55860fb/nova/compute/resource_tracker.py#L1010\n\nWe\u0027ll create the resource provider here:\n\nhttps://github.com/openstack/nova/blob/324da0532f3b59aa16233a93a260d289e55860fb/nova/compute/resource_tracker.py#L948\n\nthen report the provider inventory to placement here:\n\nhttps://github.com/openstack/nova/blob/324da0532f3b59aa16233a93a260d289e55860fb/nova/compute/resource_tracker.py#L996\n\nwhich is how it can become an allocation candidate for the scheduler.\n\nAnyway, looking at the RT code the compute node record should have free_disk_gb set before the resource provider is created in placement and updated with inventory so it\u0027s a bit puzzling how we could even get here, unless maybe the compute nodes table save failed but the placement resource provider create and inventory update didn\u0027t...(this host is overloaded that I\u0027m testing on and I\u0027m hitting db connection failures).","commit_id":"dc6a366c3d7aafb24c4167806b4f99d8d64ddf0a"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"db7b34321a44bbcd6de4baeee9076da24cad08d9","unresolved":false,"context_lines":[{"line_number":623,"context_line":"            services \u003d objects.ServiceList.get_by_binary("},{"line_number":624,"context_line":"                cctxt, \u0027nova-compute\u0027, include_disabled\u003dTrue)"},{"line_number":625,"context_line":"            if compute_uuids is None:"},{"line_number":626,"context_line":"                return services, objects.ComputeNodeList.get_all(cctxt)"},{"line_number":627,"context_line":"            else:"},{"line_number":628,"context_line":"                return services, objects.ComputeNodeList.get_all_by_uuids("},{"line_number":629,"context_line":"                    cctxt, compute_uuids)"}],"source_content_type":"text/x-python","patch_set":1,"id":"9fb8cfa7_938a6dbc","line":626,"in_reply_to":"9fb8cfa7_13167da9","updated":"2019-06-28 21:42:45.000000000","message":"Failing ComputeNode.save() should actually blow up the update_available_resource call before we hit placement though so I\u0027m still not sure how this is happening but I\u0027ve a node in the DB with null free_disk_gb:\n\nmysql\u003e select host,uuid from compute_nodes where free_disk_gb is NULL;\n+------------+--------------------------------------+\n| host       | uuid                                 |\n+------------+--------------------------------------+\n| devstack34 | a251bacf-9d6a-4ad3-abfb-f723c491e857 |\n+------------+--------------------------------------+\n1 row in set (0.00 sec)\n\nand there is a resource provider for that node:\n\nstack@devstack:~$ openstack resource provider show a251bacf-9d6a-4ad3-abfb-f723c491e857\n+------------+--------------------------------------+\n| Field      | Value                                |\n+------------+--------------------------------------+\n| uuid       | a251bacf-9d6a-4ad3-abfb-f723c491e857 |\n| name       | devstack34                           |\n| generation | 2                                    |\n+------------+--------------------------------------+","commit_id":"dc6a366c3d7aafb24c4167806b4f99d8d64ddf0a"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"764abee427ffb33dee8ec3b4a1be7de0acae50ec","unresolved":false,"context_lines":[{"line_number":623,"context_line":"            services \u003d objects.ServiceList.get_by_binary("},{"line_number":624,"context_line":"                cctxt, \u0027nova-compute\u0027, include_disabled\u003dTrue)"},{"line_number":625,"context_line":"            if compute_uuids is None:"},{"line_number":626,"context_line":"                return services, objects.ComputeNodeList.get_all(cctxt)"},{"line_number":627,"context_line":"            else:"},{"line_number":628,"context_line":"                return services, objects.ComputeNodeList.get_all_by_uuids("},{"line_number":629,"context_line":"                    cctxt, compute_uuids)"}],"source_content_type":"text/x-python","patch_set":1,"id":"9fb8cfa7_ee22e0a2","line":626,"in_reply_to":"9fb8cfa7_73001903","updated":"2019-06-28 21:52:58.000000000","message":"Yup the ComputeNode.save() to the DB blew up:\n\nhttp://paste.openstack.org/show/753578/\n\nBut then how/when does the resource provider get created?\n\nAh, a later update_available_resource run creates the provider:\n\nJun 28 19:20:59 devstack nova-compute[13597]: INFO nova.scheduler.client.report [None req-d49821a3-a69a-42e7-a8d1-67da58e89a71 None None] [req-3ce3b7d4-d127-4ca8-a559-c57226fc0294] Created resource provider record via placement API for resource provider with  UUID a251bacf-9d6a-4ad3-abfb-f723c491e857 and name devstack34.\n\nAnd that\u0027s probably because we updated RT.old_resources before:\n\nhttps://github.com/openstack/nova/blob/324da0532f3b59aa16233a93a260d289e55860fb/nova/compute/resource_tracker.py#L908\n\nThen ComputeNode.save() failed so we didn\u0027t create the provider, then on the next update_available_resource run we finally got past this:\n\nhttps://github.com/openstack/nova/blob/324da0532f3b59aa16233a93a260d289e55860fb/nova/compute/resource_tracker.py#L1005\n\nBecause it didn\u0027t think there were any changes and then created the provider.\n\nThat\u0027s pretty messy...we should probably rollback the changes to RT.old_resources if we can\u0027t update the ComputeNode. That\u0027s likely a separate patch though.","commit_id":"dc6a366c3d7aafb24c4167806b4f99d8d64ddf0a"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"a40f0a2ea363f404fef35e69057f20f5f482dd93","unresolved":false,"context_lines":[{"line_number":623,"context_line":"            services \u003d objects.ServiceList.get_by_binary("},{"line_number":624,"context_line":"                cctxt, \u0027nova-compute\u0027, include_disabled\u003dTrue)"},{"line_number":625,"context_line":"            if compute_uuids is None:"},{"line_number":626,"context_line":"                return services, objects.ComputeNodeList.get_all(cctxt)"},{"line_number":627,"context_line":"            else:"},{"line_number":628,"context_line":"                return services, objects.ComputeNodeList.get_all_by_uuids("},{"line_number":629,"context_line":"                    cctxt, compute_uuids)"}],"source_content_type":"text/x-python","patch_set":1,"id":"9fb8cfa7_73001903","line":626,"in_reply_to":"9fb8cfa7_938a6dbc","updated":"2019-06-28 21:45:44.000000000","message":"And there is inventory for the provider:\n\nmysql\u003e select id from resource_providers where uuid \u003d \"a251bacf-9d6a-4ad3-abfb-f723c491e857\";\n+----+\n| id |\n+----+\n| 35 |\n+----+\n1 row in set (0.00 sec)\n\nmysql\u003e select * from inventories where resource_provider_id \u003d 35;\n+---------------------+------------+-----+----------------------+-------------------+--------+----------+----------+----------+-----------+------------------+\n| created_at          | updated_at | id  | resource_provider_id | resource_class_id | total  | reserved | min_unit | max_unit | step_size | allocation_ratio |\n+---------------------+------------+-----+----------------------+-------------------+--------+----------+----------+----------+-----------+------------------+\n| 2019-06-28 19:21:00 | NULL       | 103 |                   35 |                 0 |   1000 |        0 |        1 |     1000 |         1 |               16 |\n| 2019-06-28 19:21:00 | NULL       | 104 |                   35 |                 1 | 800000 |      512 |        1 |   800000 |         1 |              1.5 |\n| 2019-06-28 19:21:00 | NULL       | 105 |                   35 |                 2 | 600000 |        0 |        1 |   600000 |         1 |                1 |\n+---------------------+------------+-----+----------------------+-------------------+--------+----------+----------+----------+-----------+------------------+\n3 rows in set (0.00 sec)\n\nmysql\u003e","commit_id":"dc6a366c3d7aafb24c4167806b4f99d8d64ddf0a"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"81bf5a6c535dd295ee98e1bc6089207ab56e780a","unresolved":false,"context_lines":[{"line_number":623,"context_line":"            services \u003d objects.ServiceList.get_by_binary("},{"line_number":624,"context_line":"                cctxt, \u0027nova-compute\u0027, include_disabled\u003dTrue)"},{"line_number":625,"context_line":"            if compute_uuids is None:"},{"line_number":626,"context_line":"                return services, objects.ComputeNodeList.get_all(cctxt)"},{"line_number":627,"context_line":"            else:"},{"line_number":628,"context_line":"                return services, objects.ComputeNodeList.get_all_by_uuids("},{"line_number":629,"context_line":"                    cctxt, compute_uuids)"}],"source_content_type":"text/x-python","patch_set":1,"id":"9fb8cfa7_ae706873","line":626,"in_reply_to":"9fb8cfa7_ee22e0a2","updated":"2019-06-28 22:13:21.000000000","message":"Created separate bug 1834712 for the RT issue.","commit_id":"dc6a366c3d7aafb24c4167806b4f99d8d64ddf0a"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"bf32e8ebecac7ba6935016f2237b4a59305e737b","unresolved":false,"context_lines":[{"line_number":793,"context_line":"                                  self._get_aggregates_info(host),"},{"line_number":794,"context_line":"                                  self._get_instance_info(context, compute))"},{"line_number":795,"context_line":""},{"line_number":796,"context_line":"                seen_nodes.add(state_key)"},{"line_number":797,"context_line":""},{"line_number":798,"context_line":"        return (host_state_map[host] for host in seen_nodes)"},{"line_number":799,"context_line":""}],"source_content_type":"text/x-python","patch_set":1,"id":"9fb8cfa7_73ec99ba","line":796,"updated":"2019-06-28 21:31:49.000000000","message":"So I was thinking, we should just do:\n\nif host_state.updated:\n    seen_nodes.add(state_key)\n\nhere, because HostState.updated is None on init and _update_from_compute_node will return before setting self.updated if free_disk_gb is not set in the ComputeNode. That\u0027s probably the cleaner fix than the whack-a-mole thing I\u0027m doing here and in https://review.opendev.org/#/c/668243/, though https://review.opendev.org/#/c/668243/ is pretty straight-forward on its own.\n\nMaking the change to check for host_state.updated here makes some unit tests fail and it\u0027s not trivial (at least with my Friday brain) to fix them, but it\u0027s probably the cleaner thing to do.","commit_id":"dc6a366c3d7aafb24c4167806b4f99d8d64ddf0a"},{"author":{"_account_id":30127,"name":"John Hou","email":"houj@awcloud.com","username":"houj"},"change_message_id":"a0021394f7cbc3e31087a6c93b032b4f5233e1dc","unresolved":false,"context_lines":[{"line_number":187,"context_line":"    def _update_from_compute_node(self, compute):"},{"line_number":188,"context_line":"        \"\"\"Update information about a host from a ComputeNode object.\"\"\""},{"line_number":189,"context_line":""},{"line_number":190,"context_line":"        # update allocation ratios given by the ComputeNode object"},{"line_number":191,"context_line":"        self.cpu_allocation_ratio \u003d compute.cpu_allocation_ratio"},{"line_number":192,"context_line":"        self.ram_allocation_ratio \u003d compute.ram_allocation_ratio"},{"line_number":193,"context_line":"        self.disk_allocation_ratio \u003d compute.disk_allocation_ratio"},{"line_number":194,"context_line":""},{"line_number":195,"context_line":"        # NOTE(jichenjc): if the compute record is just created but not updated"},{"line_number":196,"context_line":"        # some field such as free_disk_gb can be None"}],"source_content_type":"text/x-python","patch_set":2,"id":"9fb8cfa7_e983bac9","line":193,"range":{"start_line":190,"start_character":8,"end_line":193,"end_character":66},"updated":"2019-06-29 02:32:40.000000000","message":"I don\u0027t think update here is better.","commit_id":"adeb20a34a4b4e92646dd39efbaa95e5a8d6dc8c"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"916b0f728033bda5b92fb52a0922dab9eceece45","unresolved":false,"context_lines":[{"line_number":187,"context_line":"    def _update_from_compute_node(self, compute):"},{"line_number":188,"context_line":"        \"\"\"Update information about a host from a ComputeNode object.\"\"\""},{"line_number":189,"context_line":""},{"line_number":190,"context_line":"        # update allocation ratios given by the ComputeNode object"},{"line_number":191,"context_line":"        self.cpu_allocation_ratio \u003d compute.cpu_allocation_ratio"},{"line_number":192,"context_line":"        self.ram_allocation_ratio \u003d compute.ram_allocation_ratio"},{"line_number":193,"context_line":"        self.disk_allocation_ratio \u003d compute.disk_allocation_ratio"},{"line_number":194,"context_line":""},{"line_number":195,"context_line":"        # NOTE(jichenjc): if the compute record is just created but not updated"},{"line_number":196,"context_line":"        # some field such as free_disk_gb can be None"}],"source_content_type":"text/x-python","patch_set":2,"id":"9fb8cfa7_7d5c567c","line":193,"range":{"start_line":190,"start_character":8,"end_line":193,"end_character":66},"in_reply_to":"9fb8cfa7_09c96e24","updated":"2019-07-01 13:16:45.000000000","message":"\u003e Surely the allocation ratios can\u0027t be the only thing that\u0027ll go kablooey in that case?\n\nRight, we know failed_builds was another one. Hence the TODO below, which when I tried locally on Friday that caused some unit tests to fail which weren\u0027t easy to fix, but I\u0027ll probably change this patch to implement that TODO instead.\n\nThere is also this:\n\nhttps://review.opendev.org/668263","commit_id":"adeb20a34a4b4e92646dd39efbaa95e5a8d6dc8c"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"138e9651eb05ec7b934561ce3d9255e59a559f61","unresolved":false,"context_lines":[{"line_number":187,"context_line":"    def _update_from_compute_node(self, compute):"},{"line_number":188,"context_line":"        \"\"\"Update information about a host from a ComputeNode object.\"\"\""},{"line_number":189,"context_line":""},{"line_number":190,"context_line":"        # update allocation ratios given by the ComputeNode object"},{"line_number":191,"context_line":"        self.cpu_allocation_ratio \u003d compute.cpu_allocation_ratio"},{"line_number":192,"context_line":"        self.ram_allocation_ratio \u003d compute.ram_allocation_ratio"},{"line_number":193,"context_line":"        self.disk_allocation_ratio \u003d compute.disk_allocation_ratio"},{"line_number":194,"context_line":""},{"line_number":195,"context_line":"        # NOTE(jichenjc): if the compute record is just created but not updated"},{"line_number":196,"context_line":"        # some field such as free_disk_gb can be None"}],"source_content_type":"text/x-python","patch_set":2,"id":"9fb8cfa7_09c96e24","line":193,"range":{"start_line":190,"start_character":8,"end_line":193,"end_character":66},"in_reply_to":"9fb8cfa7_e983bac9","updated":"2019-06-29 02:46:45.000000000","message":"It makes sense to me to set these before the early-bailout check below, assuming they\u0027re available...\n\n...but I guess I\u0027m more concerned that something is trying to use them in the first place if the compute service isn\u0027t fully ready yet. Surely the allocation ratios can\u0027t be the only thing that\u0027ll go kablooey in that case?","commit_id":"adeb20a34a4b4e92646dd39efbaa95e5a8d6dc8c"}],"nova/tests/unit/scheduler/test_host_manager.py":[{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"138e9651eb05ec7b934561ce3d9255e59a559f61","unresolved":false,"context_lines":[{"line_number":1578,"context_line":"        self.assertEqual(0, host.free_ram_mb)"},{"line_number":1579,"context_line":"        # same with failed_builds"},{"line_number":1580,"context_line":"        self.assertEqual(0, host.failed_builds)"},{"line_number":1581,"context_line":"        # same with cpu_allocation_ratio"},{"line_number":1582,"context_line":"        self.assertEqual(compute.cpu_allocation_ratio,"},{"line_number":1583,"context_line":"                         host.cpu_allocation_ratio)"}],"source_content_type":"text/x-python","patch_set":2,"id":"9fb8cfa7_a952a265","line":1581,"range":{"start_line":1581,"start_character":8,"end_line":1581,"end_character":40},"updated":"2019-06-29 02:46:45.000000000","message":"Shouldn\u0027t this comment say\n\n # but allocation ratios happen earlier, so they\u0027re set\n\n?","commit_id":"adeb20a34a4b4e92646dd39efbaa95e5a8d6dc8c"}]}
