)]}' {"nova/compute/manager.py":[{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"30f2542b66c939bccc8b621adfe1f90b4cddf85f","unresolved":true,"context_lines":[{"line_number":9477,"context_line":" instance.save(expected_task_state\u003d(None,))"},{"line_number":9478,"context_line":" accel_uuids \u003d []"},{"line_number":9479,"context_line":" if instance.flavor.extra_specs.get(\u0027accel:device_profile\u0027):"},{"line_number":9480,"context_line":" cyclient \u003d cyborg.get_client(context)"},{"line_number":9481,"context_line":" accel_uuids \u003d cyclient.get_arq_uuids_for_instance(instance)"},{"line_number":9482,"context_line":" self.shelve_offload_instance("},{"line_number":9483,"context_line":" context, instance, clean_shutdown\u003dFalse,"}],"source_content_type":"text/x-python","patch_set":1,"id":"82b7e88e_0ba03de1","line":9480,"updated":"2021-03-03 17:29:35.000000000","message":"you\u0027re technically recreating the client at every period in time. Any way to stop doing and either use a global object or a class object ?","commit_id":"df1a10dc82af20238c7c98c042f648d1efc6eb1e"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"30f2542b66c939bccc8b621adfe1f90b4cddf85f","unresolved":true,"context_lines":[{"line_number":9478,"context_line":" accel_uuids \u003d []"},{"line_number":9479,"context_line":" if instance.flavor.extra_specs.get(\u0027accel:device_profile\u0027):"},{"line_number":9480,"context_line":" cyclient \u003d cyborg.get_client(context)"},{"line_number":9481,"context_line":" accel_uuids \u003d cyclient.get_arq_uuids_for_instance(instance)"},{"line_number":9482,"context_line":" self.shelve_offload_instance("},{"line_number":9483,"context_line":" context, instance, clean_shutdown\u003dFalse,"},{"line_number":9484,"context_line":" accel_uuids\u003daccel_uuids)"}],"source_content_type":"text/x-python","patch_set":1,"id":"12732da0_1b3a3540","line":9481,"updated":"2021-03-03 17:29:35.000000000","message":"I\u0027m a bit afraid by the number of HTTP API hits we would make if the periodic time is short. Fortunately, the default value is 3600 secs, but I\u0027d recommend to amend the conf option documentation to mention that performance penalty if you configured Cyborg.","commit_id":"df1a10dc82af20238c7c98c042f648d1efc6eb1e"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"37a47b8735349c9c0d6eb6402efcec45f5021126","unresolved":true,"context_lines":[{"line_number":9478,"context_line":" accel_uuids \u003d []"},{"line_number":9479,"context_line":" if instance.flavor.extra_specs.get(\u0027accel:device_profile\u0027):"},{"line_number":9480,"context_line":" cyclient \u003d cyborg.get_client(context)"},{"line_number":9481,"context_line":" accel_uuids \u003d cyclient.get_arq_uuids_for_instance(instance)"},{"line_number":9482,"context_line":" self.shelve_offload_instance("},{"line_number":9483,"context_line":" context, instance, clean_shutdown\u003dFalse,"},{"line_number":9484,"context_line":" accel_uuids\u003daccel_uuids)"}],"source_content_type":"text/x-python","patch_set":1,"id":"3c625250_c8abc566","line":9481,"in_reply_to":"12732da0_1b3a3540","updated":"2021-03-03 17:35:52.000000000","message":"i dont think this is a concern.\nwe will only call to cyborg if the instance has cyborg resouces because of the if on line 9479\nand we will only do it once per instance that was shelved. so unless you are constantly shelving instnace in very large numbers the period of this will not really be a ddos vector as most instance will skip this code path.","commit_id":"df1a10dc82af20238c7c98c042f648d1efc6eb1e"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"caea1a6c7e2685ee69aa297ca2bbceb9f808c7b1","unresolved":true,"context_lines":[{"line_number":9478,"context_line":" accel_uuids \u003d []"},{"line_number":9479,"context_line":" if instance.flavor.extra_specs.get(\u0027accel:device_profile\u0027):"},{"line_number":9480,"context_line":" cyclient \u003d cyborg.get_client(context)"},{"line_number":9481,"context_line":" accel_uuids \u003d cyclient.get_arq_uuids_for_instance(instance)"},{"line_number":9482,"context_line":" self.shelve_offload_instance("},{"line_number":9483,"context_line":" context, instance, clean_shutdown\u003dFalse,"},{"line_number":9484,"context_line":" accel_uuids\u003daccel_uuids)"}],"source_content_type":"text/x-python","patch_set":1,"id":"b6b1d4fa_b320a3dd","line":9481,"in_reply_to":"3c625250_c8abc566","updated":"2021-03-03 17:46:11.000000000","message":"To be clear, I\u0027ll restate my concern with an example : \nsay a periodic time is 3600, meaning this method runs every 3600 secs.\nDuring this timeframe, instances can be created and shelved for every tenant. At the end of this timeframe, this periodic task will run and pick all instances that were shelved during the last hour. This could be large, like 10k. Then, it will iterate over those 10k instances to offload, and ask the Cyborg API nearly concurrently. This can be a serious performance penalty if the Cyborg API isn\u0027t robust to cope with 10000 connections.","commit_id":"df1a10dc82af20238c7c98c042f648d1efc6eb1e"},{"author":{"_account_id":26458,"name":"Brin Zhang","email":"zhangbailin@inspur.com","username":"zhangbailin"},"change_message_id":"a324d89759dcab8d9b884f20cbb049fe9fc8acc5","unresolved":true,"context_lines":[{"line_number":9478,"context_line":" accel_uuids \u003d []"},{"line_number":9479,"context_line":" if instance.flavor.extra_specs.get(\u0027accel:device_profile\u0027):"},{"line_number":9480,"context_line":" cyclient \u003d cyborg.get_client(context)"},{"line_number":9481,"context_line":" accel_uuids \u003d cyclient.get_arq_uuids_for_instance(instance)"},{"line_number":9482,"context_line":" self.shelve_offload_instance("},{"line_number":9483,"context_line":" context, instance, clean_shutdown\u003dFalse,"},{"line_number":9484,"context_line":" accel_uuids\u003daccel_uuids)"}],"source_content_type":"text/x-python","patch_set":1,"id":"fa69f651_1efb8bf2","line":9481,"in_reply_to":"b6b1d4fa_b320a3dd","updated":"2021-03-04 03:17:40.000000000","message":"I dont think this is a concern too.\nWe will filter the SHELVED instance in Line9463, and if the instance timeout of the shelved_offload_time, it will be set to SHELVING_OFFLOADING, and there is not really to do *this*.","commit_id":"df1a10dc82af20238c7c98c042f648d1efc6eb1e"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"9b26f48fe9b7fad4d8373b4a508b31aa41324d1a","unresolved":true,"context_lines":[{"line_number":9477,"context_line":" instance.save(expected_task_state\u003d(None,))"},{"line_number":9478,"context_line":" accel_uuids \u003d []"},{"line_number":9479,"context_line":" if instance.flavor.extra_specs.get(\u0027accel:device_profile\u0027):"},{"line_number":9480,"context_line":" cyclient \u003d cyborg.get_client(context)"},{"line_number":9481,"context_line":" accel_uuids \u003d cyclient.get_arq_uuids_for_instance(instance)"},{"line_number":9482,"context_line":" self.shelve_offload_instance("},{"line_number":9483,"context_line":" context, instance, clean_shutdown\u003dFalse,"}],"source_content_type":"text/x-python","patch_set":2,"id":"41f3f43c_8877f3ea","line":9480,"updated":"2021-03-04 09:16:57.000000000","message":"my concern from PS2 isn\u0027t addressed.","commit_id":"59dae372360dad83fa60d37969c6be6cf205a279"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"376efd7cee517d02b0f71de198ad6a8357aab6b2","unresolved":true,"context_lines":[{"line_number":9477,"context_line":" instance.save(expected_task_state\u003d(None,))"},{"line_number":9478,"context_line":" accel_uuids \u003d []"},{"line_number":9479,"context_line":" if instance.flavor.extra_specs.get(\u0027accel:device_profile\u0027):"},{"line_number":9480,"context_line":" cyclient \u003d cyborg.get_client(context)"},{"line_number":9481,"context_line":" accel_uuids \u003d cyclient.get_arq_uuids_for_instance(instance)"},{"line_number":9482,"context_line":" self.shelve_offload_instance("},{"line_number":9483,"context_line":" context, instance, clean_shutdown\u003dFalse,"}],"source_content_type":"text/x-python","patch_set":2,"id":"62edafb7_9dde299a","line":9480,"in_reply_to":"41f3f43c_8877f3ea","updated":"2021-03-16 08:33:11.000000000","message":"@Slyvain: if you are concerned that we are instantiating cyborgclient is costly and we are doing it in a loop then we can simply move the client creation top of the loop, then it will only create once per periodic run.","commit_id":"59dae372360dad83fa60d37969c6be6cf205a279"},{"author":{"_account_id":26458,"name":"Brin Zhang","email":"zhangbailin@inspur.com","username":"zhangbailin"},"change_message_id":"250dd99bc70e37899c99abacc31133963d9dd051","unresolved":true,"context_lines":[{"line_number":9477,"context_line":" instance.save(expected_task_state\u003d(None,))"},{"line_number":9478,"context_line":" accel_uuids \u003d []"},{"line_number":9479,"context_line":" if instance.flavor.extra_specs.get(\u0027accel:device_profile\u0027):"},{"line_number":9480,"context_line":" cyclient \u003d cyborg.get_client(context)"},{"line_number":9481,"context_line":" accel_uuids \u003d cyclient.get_arq_uuids_for_instance(instance)"},{"line_number":9482,"context_line":" self.shelve_offload_instance("},{"line_number":9483,"context_line":" context, instance, clean_shutdown\u003dFalse,"}],"source_content_type":"text/x-python","patch_set":2,"id":"bc5329a1_5e092187","line":9480,"in_reply_to":"62edafb7_9dde299a","updated":"2021-03-17 08:50:45.000000000","message":"ack.","commit_id":"59dae372360dad83fa60d37969c6be6cf205a279"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"9b26f48fe9b7fad4d8373b4a508b31aa41324d1a","unresolved":true,"context_lines":[{"line_number":9478,"context_line":" accel_uuids \u003d []"},{"line_number":9479,"context_line":" if instance.flavor.extra_specs.get(\u0027accel:device_profile\u0027):"},{"line_number":9480,"context_line":" cyclient \u003d cyborg.get_client(context)"},{"line_number":9481,"context_line":" accel_uuids \u003d cyclient.get_arq_uuids_for_instance(instance)"},{"line_number":9482,"context_line":" self.shelve_offload_instance("},{"line_number":9483,"context_line":" context, instance, clean_shutdown\u003dFalse,"},{"line_number":9484,"context_line":" accel_uuids\u003daccel_uuids)"}],"source_content_type":"text/x-python","patch_set":2,"id":"b563ce4e_5f7453fb","line":9481,"updated":"2021-03-04 09:16:57.000000000","message":"I looked at Cyborg API reference and unfortunately, the ARQ resource API doesn\u0027t accept more than one instance UUID (1). That being said, we could maybe get all the ARQs before (for example in L9473) and look at some dict for getting the ARQ UUIDs related to each instance here, so we would then only have one HTTP call for all the instances, and not one per instance, which I\u0027m still a bit afraid ?\n\n(1) https://docs.openstack.org/api-ref/accelerator/v2/index.html?expanded\u003dlist-accelerator-requests-detail,get-one-accelerator-request-detail","commit_id":"59dae372360dad83fa60d37969c6be6cf205a279"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"57ccd35fdd7f23641bd38a97ffefef5a82786e5d","unresolved":true,"context_lines":[{"line_number":9478,"context_line":" accel_uuids \u003d []"},{"line_number":9479,"context_line":" if instance.flavor.extra_specs.get(\u0027accel:device_profile\u0027):"},{"line_number":9480,"context_line":" cyclient \u003d cyborg.get_client(context)"},{"line_number":9481,"context_line":" accel_uuids \u003d cyclient.get_arq_uuids_for_instance(instance)"},{"line_number":9482,"context_line":" self.shelve_offload_instance("},{"line_number":9483,"context_line":" context, instance, clean_shutdown\u003dFalse,"},{"line_number":9484,"context_line":" accel_uuids\u003daccel_uuids)"}],"source_content_type":"text/x-python","patch_set":2,"id":"723e721c_8157169a","line":9481,"in_reply_to":"9c2eab9f_20eae977","updated":"2021-03-17 09:27:45.000000000","message":"Thanks Brin","commit_id":"59dae372360dad83fa60d37969c6be6cf205a279"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"376efd7cee517d02b0f71de198ad6a8357aab6b2","unresolved":true,"context_lines":[{"line_number":9478,"context_line":" accel_uuids \u003d []"},{"line_number":9479,"context_line":" if instance.flavor.extra_specs.get(\u0027accel:device_profile\u0027):"},{"line_number":9480,"context_line":" cyclient \u003d cyborg.get_client(context)"},{"line_number":9481,"context_line":" accel_uuids \u003d cyclient.get_arq_uuids_for_instance(instance)"},{"line_number":9482,"context_line":" self.shelve_offload_instance("},{"line_number":9483,"context_line":" context, instance, clean_shutdown\u003dFalse,"},{"line_number":9484,"context_line":" accel_uuids\u003daccel_uuids)"}],"source_content_type":"text/x-python","patch_set":2,"id":"d3946348_fb2afaf2","line":9481,"in_reply_to":"b563ce4e_5f7453fb","updated":"2021-03-16 08:33:11.000000000","message":"It is a tradeoff. \n\nA) If the system has a lot of ARQ spread across a lot of computes then querying every ARQ and then filtering here down to the ARQs that belongs to shelved instances on this compute is pretty costly. \n\nB) In the other hand if this compute has a lot of shelved instances with device profile then querying ARQs for each, one by one, is costly too. \n\nThe ultimate solution would be to query ARQs for a list of instances in a single request as suggested by Sylvain but that is not supported today by cyborg API. \n\nSo we have to figure out what scenario is more likely. A) or B). I think the number of accelerators per compute is the limiting factor. We cannot have 1000 of accelerators per compute as they are physical hardware but we can have 1000 computes with some accelerators. So I think scenario A) is more likely and therefore asking cyborg for each instance one by one is the better solution until a batch query is implemented in cyborg API.","commit_id":"59dae372360dad83fa60d37969c6be6cf205a279"},{"author":{"_account_id":26458,"name":"Brin Zhang","email":"zhangbailin@inspur.com","username":"zhangbailin"},"change_message_id":"250dd99bc70e37899c99abacc31133963d9dd051","unresolved":true,"context_lines":[{"line_number":9478,"context_line":" accel_uuids \u003d []"},{"line_number":9479,"context_line":" if instance.flavor.extra_specs.get(\u0027accel:device_profile\u0027):"},{"line_number":9480,"context_line":" cyclient \u003d cyborg.get_client(context)"},{"line_number":9481,"context_line":" accel_uuids \u003d cyclient.get_arq_uuids_for_instance(instance)"},{"line_number":9482,"context_line":" self.shelve_offload_instance("},{"line_number":9483,"context_line":" context, instance, clean_shutdown\u003dFalse,"},{"line_number":9484,"context_line":" accel_uuids\u003daccel_uuids)"}],"source_content_type":"text/x-python","patch_set":2,"id":"9c2eab9f_20eae977","line":9481,"in_reply_to":"d3946348_fb2afaf2","updated":"2021-03-17 08:50:45.000000000","message":"\u003e It is a tradeoff. \n\u003e \n\u003e A) If the system has a lot of ARQ spread across a lot of computes then querying every ARQ and then filtering here down to the ARQs that belongs to shelved instances on this compute is pretty costly. \n\u003e \n\u003e B) In the other hand if this compute has a lot of shelved instances with device profile then querying ARQs for each, one by one, is costly too. \n\u003e \n\u003e The ultimate solution would be to query ARQs for a list of instances in a single request as suggested by Sylvain but that is not supported today by cyborg API. \n\u003e \n\u003e So we have to figure out what scenario is more likely. A) or B). I think the number of accelerators per compute is the limiting factor. We cannot have 1000 of accelerators per compute as they are physical hardware but we can have 1000 computes with some accelerators. So I think scenario A) is more likely and therefore asking cyborg for each instance one by one is the better solution until a batch query is implemented in cyborg API.\n\nAgree, we can support the B solution firstly, then we will add the batch query ARQs for a list of the instance in a single request to cyborg list, and after we completed that feature (Maybe in Xena release, and I will take this to Cybrog PTG [1] to discuss), I will update this periodic task.\n\nAfter support the batch query ARQs API for more intances in a single request, we can reduce requesting cyborg times, but we also need to loop the ARQ instances to shelve_offload_instance().\n\n[1]https://etherpad.opendev.org/p/cyborg-xena-ptg","commit_id":"59dae372360dad83fa60d37969c6be6cf205a279"}],"nova/tests/unit/compute/test_shelve.py":[{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"fbaf5a5609738c4edfac85ce93d539e4bed0ec07","unresolved":true,"context_lines":[{"line_number":771,"context_line":" self.flags(shelved_offload_time\u003d1)"},{"line_number":772,"context_line":" mock_older.return_value \u003d True"},{"line_number":773,"context_line":" instance1 \u003d self._create_fake_instance_obj()"},{"line_number":774,"context_line":" instance1.task_state \u003d task_states.SPAWNING"},{"line_number":775,"context_line":" instance1.vm_state \u003d vm_states.SHELVED"},{"line_number":776,"context_line":" instance1.host \u003d self.compute.host"},{"line_number":777,"context_line":" instance1.system_metadata \u003d {\u0027shelved_at\u0027: \u0027\u0027}"}],"source_content_type":"text/x-python","patch_set":1,"id":"9cc82d32_44ee3fb4","line":774,"range":{"start_line":774,"start_character":43,"end_line":774,"end_character":51},"updated":"2021-03-03 12:11:01.000000000","message":"i think this is incorect it wont be in task_state swpaning when we are shelveing.\nit would be in \nfields.InstanceTaskState.SHELVING\nthen fields.InstanceTaskState.SHELVING_IMAGE_PENDING_UPLOAD then finally\nfields.InstanceTaskState.SHELVING_IMAGE_UPLOADING\nbefore going to None when its finally shelved.\n\n\nthis should be None as you have for instance2\n\nhttps://github.com/openstack/nova/blob/master/nova/compute/task_states.py#L109\n\nim not really sure why you have two instances here. either you wanted the first instance to prove it was not altered by _pool_shelved_instances in which case you should put it into a validl vm and task state or its not needed and you should just remove it.","commit_id":"df1a10dc82af20238c7c98c042f648d1efc6eb1e"},{"author":{"_account_id":26458,"name":"Brin Zhang","email":"zhangbailin@inspur.com","username":"zhangbailin"},"change_message_id":"a324d89759dcab8d9b884f20cbb049fe9fc8acc5","unresolved":true,"context_lines":[{"line_number":771,"context_line":" self.flags(shelved_offload_time\u003d1)"},{"line_number":772,"context_line":" mock_older.return_value \u003d True"},{"line_number":773,"context_line":" instance1 \u003d self._create_fake_instance_obj()"},{"line_number":774,"context_line":" instance1.task_state \u003d task_states.SPAWNING"},{"line_number":775,"context_line":" instance1.vm_state \u003d vm_states.SHELVED"},{"line_number":776,"context_line":" instance1.host \u003d self.compute.host"},{"line_number":777,"context_line":" instance1.system_metadata \u003d {\u0027shelved_at\u0027: \u0027\u0027}"}],"source_content_type":"text/x-python","patch_set":1,"id":"d16cda90_c60aca12","line":774,"range":{"start_line":774,"start_character":43,"end_line":774,"end_character":51},"in_reply_to":"0dfd1b77_ea83e32d","updated":"2021-03-04 03:17:40.000000000","message":"ACK, addressed.","commit_id":"df1a10dc82af20238c7c98c042f648d1efc6eb1e"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"30f2542b66c939bccc8b621adfe1f90b4cddf85f","unresolved":true,"context_lines":[{"line_number":771,"context_line":" self.flags(shelved_offload_time\u003d1)"},{"line_number":772,"context_line":" mock_older.return_value \u003d True"},{"line_number":773,"context_line":" instance1 \u003d self._create_fake_instance_obj()"},{"line_number":774,"context_line":" instance1.task_state \u003d task_states.SPAWNING"},{"line_number":775,"context_line":" instance1.vm_state \u003d vm_states.SHELVED"},{"line_number":776,"context_line":" instance1.host \u003d self.compute.host"},{"line_number":777,"context_line":" instance1.system_metadata \u003d {\u0027shelved_at\u0027: \u0027\u0027}"}],"source_content_type":"text/x-python","patch_set":1,"id":"0dfd1b77_ea83e32d","line":774,"range":{"start_line":774,"start_character":43,"end_line":774,"end_character":51},"in_reply_to":"9cc82d32_44ee3fb4","updated":"2021-03-03 17:29:35.000000000","message":"Good point.","commit_id":"df1a10dc82af20238c7c98c042f648d1efc6eb1e"}]}