)]}'
{"/COMMIT_MSG":[{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"6b81305404c7be102a9f75b4bd94aff15abc788c","unresolved":true,"context_lines":[{"line_number":10,"context_line":"vhostuserclient mode, Nova will delete the OVS port and then recreate it"},{"line_number":11,"context_line":"when the VM reboots. This quick transition can create a race condition"},{"line_number":12,"context_line":"between the trunk addition and deletion, causing the former to fail"},{"line_number":13,"context_line":"because the latter deletes the trunk bridge. In this change we add"},{"line_number":14,"context_line":"a dictionary to track trunks pending to be created, so the trunk"},{"line_number":15,"context_line":"deletion can back off and not delete the bridge when a creation for the"},{"line_number":16,"context_line":"same trunk arrives concurrently. This change also removes the wait"},{"line_number":17,"context_line":"introduced by [1] and [2]."},{"line_number":18,"context_line":""},{"line_number":19,"context_line":"This change is associated to [3] on the os-vif side."}],"source_content_type":"text/x-gerrit-commit-message","patch_set":4,"id":"40b6c49f_ba511fb9","line":16,"range":{"start_line":13,"start_character":45,"end_line":16,"end_character":32},"updated":"2022-03-25 11:48:04.000000000","message":"would it make sense to defer deletion until the trunk port is remove form the host.\nas in its detach form the vm, delete or the host-id is chagned.\n\nthe agent shoudl get a port status update in all 3 case and can then respond rather then triggerign it off of the ovs events.\n\nthat woudl also avoid the race entirely as the l2 agent would only clean up the trunk bridge if the trunk port was removed by a lifecycle event such as a move operation , vm or port delete or interface detach. so hardreboot would no longer\nrequire the brdge to be deleted.\n\nwhat you would do instead is install a high priorty drop flow when the port is remvoed to prevent any packets form leaving the bridge if the sub ports recive packets like arp.","commit_id":"1f9e374ac270408db7e7248edf1084d39a1544ff"},{"author":{"_account_id":8313,"name":"Lajos Katona","display_name":"lajoskatona","email":"katonalala@gmail.com","username":"elajkat","status":"Ericsson Software Technology"},"change_message_id":"9f49f7aacd873161d32f12709c49fae9af9af218","unresolved":true,"context_lines":[{"line_number":20,"context_line":""},{"line_number":21,"context_line":"[1] https://review.opendev.org/c/openstack/neutron/+/714783"},{"line_number":22,"context_line":"[2] https://review.opendev.org/c/openstack/neutron/+/827580"},{"line_number":23,"context_line":"[3] https://review.opendev.org/c/openstack/os-vif/+/830103"},{"line_number":24,"context_line":""},{"line_number":25,"context_line":"Change-Id: I10f85237926d1fc5ded7411fc11088c9f0651478"},{"line_number":26,"context_line":"Partial-Bug: #1869244"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":4,"id":"ead9e5be_040ad6ac","line":23,"range":{"start_line":23,"start_character":0,"end_line":23,"end_character":58},"updated":"2022-02-21 15:17:44.000000000","message":"Depends-On perhaps helps to test with os-vif patch together, not sure if our job definitions support that","commit_id":"1f9e374ac270408db7e7248edf1084d39a1544ff"}],"/PATCHSET_LEVEL":[{"author":{"_account_id":11975,"name":"Slawek Kaplonski","email":"skaplons@redhat.com","username":"slaweq"},"change_message_id":"ffefce187b3ee0fb6e820f390c6e97941b9f89e9","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"1ff4527e_eac4ab6e","updated":"2022-02-15 10:18:38.000000000","message":"Please check functional tests job as failure seems to be related to that patch.","commit_id":"ae77068a3b933a6aae572d2199edecee4510184c"},{"author":{"_account_id":11975,"name":"Slawek Kaplonski","email":"skaplons@redhat.com","username":"slaweq"},"change_message_id":"4782026fdc6ffd395c37f7b6976e9e49c7405c05","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"bab577ac_8958f36f","in_reply_to":"1ff4527e_eac4ab6e","updated":"2022-02-15 10:19:29.000000000","message":"Also scenario job failure seems to be related: https://633b8ea530885bf10dc1-d7024b4a9df5a3cf5a52965a2b5a469a.ssl.cf2.rackcdn.com/829139/1/check/neutron-ovs-tempest-dvr-ha-multinode-full/574b65a/testr_results.html","commit_id":"ae77068a3b933a6aae572d2199edecee4510184c"},{"author":{"_account_id":5948,"name":"Oleg Bondarev","email":"obondarev@mirantis.com","username":"obondarev"},"change_message_id":"ae16b2f708c6cc3a3e7a8a5ee8c312491dadeb6d","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"888811d8_591cf6ef","updated":"2022-02-17 09:26:36.000000000","message":"A couple of questions to better understand the issue","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":4694,"name":"Miguel Lavalle","email":"miguel@mlavalle.com","username":"minsel"},"change_message_id":"bede802ce4834a09d0e64879ac7411519a8a97bb","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"b7162f36_9607345d","updated":"2022-02-16 17:23:58.000000000","message":"Failing test case (test_rebuild_server_with_auto_disk_config) doesn\u0027t seem to be related to patch:\n\n (ServerDiskConfigTestJSON:test_rebuild_server_with_auto_disk_config) Server c579beb9-3860-47c5-a4f9-c3cfe713244e failed to reach ACTIVE status and task state \"None\" within the required time (196 s). Current status: REBUILD. Current task state: rebuild_spawning.","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":4694,"name":"Miguel Lavalle","email":"miguel@mlavalle.com","username":"minsel"},"change_message_id":"e42e72fa8f89e6ff07398a0a77f3e2ae31e2e874","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"383393c4_d4d3f6f6","updated":"2022-02-18 23:51:27.000000000","message":"Thanks for the review. Please see responses in-line","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":4694,"name":"Miguel Lavalle","email":"miguel@mlavalle.com","username":"minsel"},"change_message_id":"f0ed7a99d7b1b4335925041da910e52d04ae3e46","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"70da05e3_0d6371fd","updated":"2022-02-16 17:22:06.000000000","message":"recheck","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":4694,"name":"Miguel Lavalle","email":"miguel@mlavalle.com","username":"minsel"},"change_message_id":"4a37ce965a3c72cf751be478994de62a108df28d","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":3,"id":"dc1b1814_e5305588","updated":"2022-02-19 16:19:02.000000000","message":"recheck","commit_id":"396532c61b8407e476b3687c2b566e3209cb81df"},{"author":{"_account_id":16688,"name":"Rodolfo Alonso","email":"ralonsoh@redhat.com","username":"rodolfo-alonso-hernandez"},"change_message_id":"1d424985a79218f8a7962bda079e049f85d374a0","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"e3a63c5e_be9f4d60","updated":"2022-02-21 14:35:40.000000000","message":"-1 just for visibility ","commit_id":"1f9e374ac270408db7e7248edf1084d39a1544ff"},{"author":{"_account_id":4694,"name":"Miguel Lavalle","email":"miguel@mlavalle.com","username":"minsel"},"change_message_id":"d67581ba86874d44f221910dbfbddbfa2acd1e6a","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"b6e7944b_de992252","updated":"2022-02-25 00:45:11.000000000","message":"Another question for @Rodolfo","commit_id":"1f9e374ac270408db7e7248edf1084d39a1544ff"},{"author":{"_account_id":4694,"name":"Miguel Lavalle","email":"miguel@mlavalle.com","username":"minsel"},"change_message_id":"b5c6f32092f5bc422cad31b8061c2e01acac0b2c","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"f7d20d5f_56703b31","updated":"2022-03-01 14:43:45.000000000","message":"Follow up question","commit_id":"1f9e374ac270408db7e7248edf1084d39a1544ff"},{"author":{"_account_id":4694,"name":"Miguel Lavalle","email":"miguel@mlavalle.com","username":"minsel"},"change_message_id":"71dd01bdcdc7d76b3eeaff21762c10d4221836bc","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"928e5d99_097e39ec","updated":"2022-02-23 23:50:38.000000000","message":"Just added a question for Rodolfo in-line","commit_id":"1f9e374ac270408db7e7248edf1084d39a1544ff"},{"author":{"_account_id":4694,"name":"Miguel Lavalle","email":"miguel@mlavalle.com","username":"minsel"},"change_message_id":"4713c4d17c22b67679f8baccd9e779e75685655c","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"687710c9_8250af9d","updated":"2022-02-21 14:00:04.000000000","message":"recheck","commit_id":"1f9e374ac270408db7e7248edf1084d39a1544ff"}],"neutron/services/trunk/drivers/openvswitch/agent/ovsdb_handler.py":[{"author":{"_account_id":5948,"name":"Oleg Bondarev","email":"obondarev@mirantis.com","username":"obondarev"},"change_message_id":"69c9c41b8d8a428ff66157945d676a1601aead76","unresolved":true,"context_lines":[{"line_number":121,"context_line":"        self._context \u003d n_context.get_admin_context_without_session()"},{"line_number":122,"context_line":"        self.trunk_manager \u003d trunk_manager"},{"line_number":123,"context_line":"        self.trunk_rpc \u003d agent.TrunkStub()"},{"line_number":124,"context_line":"        self.trunks_awaiting_creation \u003d {}"},{"line_number":125,"context_line":""},{"line_number":126,"context_line":"    @property"},{"line_number":127,"context_line":"    def context(self):"}],"source_content_type":"text/x-python","patch_set":2,"id":"40dfc28c_2e9c398d","line":124,"range":{"start_line":124,"start_character":13,"end_line":124,"end_character":37},"updated":"2022-02-17 09:31:50.000000000","message":"would set() be enough here?","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":4694,"name":"Miguel Lavalle","email":"miguel@mlavalle.com","username":"minsel"},"change_message_id":"e42e72fa8f89e6ff07398a0a77f3e2ae31e2e874","unresolved":true,"context_lines":[{"line_number":121,"context_line":"        self._context \u003d n_context.get_admin_context_without_session()"},{"line_number":122,"context_line":"        self.trunk_manager \u003d trunk_manager"},{"line_number":123,"context_line":"        self.trunk_rpc \u003d agent.TrunkStub()"},{"line_number":124,"context_line":"        self.trunks_awaiting_creation \u003d {}"},{"line_number":125,"context_line":""},{"line_number":126,"context_line":"    @property"},{"line_number":127,"context_line":"    def context(self):"}],"source_content_type":"text/x-python","patch_set":2,"id":"b8500d14_e83435bb","line":124,"range":{"start_line":124,"start_character":13,"end_line":124,"end_character":37},"in_reply_to":"40dfc28c_2e9c398d","updated":"2022-02-18 23:51:27.000000000","message":"Good point! Done","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":5948,"name":"Oleg Bondarev","email":"obondarev@mirantis.com","username":"obondarev"},"change_message_id":"ae16b2f708c6cc3a3e7a8a5ee8c312491dadeb6d","unresolved":true,"context_lines":[{"line_number":223,"context_line":"        # try to mitigate the issue by checking if there is a port on the"},{"line_number":224,"context_line":"        # bridge and if so then do not remove it."},{"line_number":225,"context_line":"        bridge \u003d ovs_lib.OVSBridge(bridge_name)"},{"line_number":226,"context_line":"        if bridge_has_instance_port(bridge):"},{"line_number":227,"context_line":"            LOG.debug(\"The bridge %s has instances attached so it will not \""},{"line_number":228,"context_line":"                      \"be deleted.\", bridge_name)"},{"line_number":229,"context_line":"            return"}],"source_content_type":"text/x-python","patch_set":2,"id":"7362c8fd_4173bb22","line":226,"range":{"start_line":226,"start_character":8,"end_line":226,"end_character":44},"updated":"2022-02-17 09:26:36.000000000","message":"wondering why this does not prevent trunk bridge removal in case port is re-added?","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":4694,"name":"Miguel Lavalle","email":"miguel@mlavalle.com","username":"minsel"},"change_message_id":"e42e72fa8f89e6ff07398a0a77f3e2ae31e2e874","unresolved":true,"context_lines":[{"line_number":223,"context_line":"        # try to mitigate the issue by checking if there is a port on the"},{"line_number":224,"context_line":"        # bridge and if so then do not remove it."},{"line_number":225,"context_line":"        bridge \u003d ovs_lib.OVSBridge(bridge_name)"},{"line_number":226,"context_line":"        if bridge_has_instance_port(bridge):"},{"line_number":227,"context_line":"            LOG.debug(\"The bridge %s has instances attached so it will not \""},{"line_number":228,"context_line":"                      \"be deleted.\", bridge_name)"},{"line_number":229,"context_line":"            return"}],"source_content_type":"text/x-python","patch_set":2,"id":"cb28e2f4_ddf0f6e0","line":226,"range":{"start_line":226,"start_character":8,"end_line":226,"end_character":44},"in_reply_to":"7362c8fd_4173bb22","updated":"2022-02-18 23:51:27.000000000","message":"Looking at the definition of function bridge_has_instance_port (line 98 above), we get true here if we find a port that is not a trunk service port, i.e. if we find a port whose name doesn\u0027t start with \u0027tp\u0027 or \u0027sp\u0027. Please look at line 1 here: https://paste.openstack.org/show/bjJDTSXYj95NpaSjxHsx/. The interface \u0027vhu7003efbf-21\u0027 (the instance port) has been removed from bridge \u0027tbr-6fa74ed9-4\u0027 and that\u0027s why this greenthread is started by line 152 above in the first place. This is the only instance port, so execution continues in line 230 below. \n\nWhat happens next is that Nova (via os-vif) very quickly adds the same interface again (line 11 in https://paste.openstack.org/show/bjJDTSXYj95NpaSjxHsx/), because the corresponding instance is being rebooted. But by then, we already executed bridge_has_instance_port and we are well on our way to delete the trunk resources, as you can see in the log\u0027s lines 5 and 7, but right before we delete delete the bridge (line 13 in the same log)","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":5948,"name":"Oleg Bondarev","email":"obondarev@mirantis.com","username":"obondarev"},"change_message_id":"ae16b2f708c6cc3a3e7a8a5ee8c312491dadeb6d","unresolved":true,"context_lines":[{"line_number":237,"context_line":"            # is due to a live migration or reassociation of a trunk to a new"},{"line_number":238,"context_line":"            # VM."},{"line_number":239,"context_line":"            self.unwire_subports_for_trunk(trunk_id, subport_ids)"},{"line_number":240,"context_line":"            # We only remove the trunk if we didn\u0027t receive a notification to"},{"line_number":241,"context_line":"            # create it again after this greenthread started execution"},{"line_number":242,"context_line":"            if not self.trunks_awaiting_creation.get(bridge_name, False):"},{"line_number":243,"context_line":"                self.trunk_manager.remove_trunk(trunk_id, parent_port_id)"},{"line_number":244,"context_line":"        except tman.TrunkManagerError as te:"}],"source_content_type":"text/x-python","patch_set":2,"id":"be33fdcf_11d52f79","line":241,"range":{"start_line":240,"start_character":12,"end_line":241,"end_character":70},"updated":"2022-02-17 09:26:36.000000000","message":"What happens if we haven\u0027t yet received a notification to create it again? Where will trunk bridge be recreated?","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":4694,"name":"Miguel Lavalle","email":"miguel@mlavalle.com","username":"minsel"},"change_message_id":"71dd01bdcdc7d76b3eeaff21762c10d4221836bc","unresolved":true,"context_lines":[{"line_number":237,"context_line":"            # is due to a live migration or reassociation of a trunk to a new"},{"line_number":238,"context_line":"            # VM."},{"line_number":239,"context_line":"            self.unwire_subports_for_trunk(trunk_id, subport_ids)"},{"line_number":240,"context_line":"            # We only remove the trunk if we didn\u0027t receive a notification to"},{"line_number":241,"context_line":"            # create it again after this greenthread started execution"},{"line_number":242,"context_line":"            if not self.trunks_awaiting_creation.get(bridge_name, False):"},{"line_number":243,"context_line":"                self.trunk_manager.remove_trunk(trunk_id, parent_port_id)"},{"line_number":244,"context_line":"        except tman.TrunkManagerError as te:"}],"source_content_type":"text/x-python","patch_set":2,"id":"af900838_7411620e","line":241,"range":{"start_line":240,"start_character":12,"end_line":241,"end_character":70},"in_reply_to":"19aa89bc_12e2e38a","updated":"2022-02-23 23:50:38.000000000","message":"@Rodolfo,\n\n\"This \"unplug\" method uses a transaction context; that means the port and the trunk deletion will be done together. In the \"bridge\" register, in the \"external_ids\" dictionary field, I would include the timestamp. In the \"inplug\" transaction I would check if the timestamp has been updated recently (in the last second) and I would skip/continue depending on this.\"\n\nSo you are suggesting that here https://github.com/openstack/neutron/blob/5c47957e89062c1e99b6e7dc28e96eff52ce514e/neutron/services/trunk/drivers/openvswitch/agent/trunk_manager.py#L138-L141 in one transaction we:\n\n1) Check that the time stamp in the bridge\u0027s external_ids is less than x seconds old. If not, return. If yes then....\n2) Delete the bridge\n3) Delete the port\n\nHow do you express these three steps above in such a way that it is one OVSDB transaction?","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":4694,"name":"Miguel Lavalle","email":"miguel@mlavalle.com","username":"minsel"},"change_message_id":"3eef688b84eed48d5ea3cce4019ad13f34f925a7","unresolved":true,"context_lines":[{"line_number":237,"context_line":"            # is due to a live migration or reassociation of a trunk to a new"},{"line_number":238,"context_line":"            # VM."},{"line_number":239,"context_line":"            self.unwire_subports_for_trunk(trunk_id, subport_ids)"},{"line_number":240,"context_line":"            # We only remove the trunk if we didn\u0027t receive a notification to"},{"line_number":241,"context_line":"            # create it again after this greenthread started execution"},{"line_number":242,"context_line":"            if not self.trunks_awaiting_creation.get(bridge_name, False):"},{"line_number":243,"context_line":"                self.trunk_manager.remove_trunk(trunk_id, parent_port_id)"},{"line_number":244,"context_line":"        except tman.TrunkManagerError as te:"}],"source_content_type":"text/x-python","patch_set":2,"id":"9dc33e6e_e2926935","line":241,"range":{"start_line":240,"start_character":12,"end_line":241,"end_character":70},"in_reply_to":"365c3a4a_491db1f9","updated":"2022-02-22 00:49:20.000000000","message":"@Oleg, @Rodolfo, @Lajos,\n\nFirst of all, thanks for the comment and suggestions :-)\n\n@Oleg, in principle, you are right, the vhu interface might be re-added by Nova / os-vif right after line 242 is executed (i.e. after this green thread has decided to delete the bridge) and we end up with the same problem. That is why I proposed [1], where we ensure the existence of the bridge and add the vhu interface in one ovsdb transaction. The general aim is to leverage the ovsdb transaction isolation to fix this issue.\n\nNow, as for @Rodolfo\u0027s suggestion, looking at the trunk unplug code (https://github.com/openstack/neutron/blob/d7953381c45a058d01204aed36e4461764cbe5bc/neutron/services/trunk/drivers/openvswitch/agent/trunk_manager.py#L138-L141) the bridge and port removal are already in one ovsdb transaction. So in combination with [1], we are already water tight, aren\u0027t we? What do others think? I just don\u0027t want to over engineer the solution\n\n[1] https://review.opendev.org/c/openstack/os-vif/+/830103","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":4694,"name":"Miguel Lavalle","email":"miguel@mlavalle.com","username":"minsel"},"change_message_id":"d67581ba86874d44f221910dbfbddbfa2acd1e6a","unresolved":true,"context_lines":[{"line_number":237,"context_line":"            # is due to a live migration or reassociation of a trunk to a new"},{"line_number":238,"context_line":"            # VM."},{"line_number":239,"context_line":"            self.unwire_subports_for_trunk(trunk_id, subport_ids)"},{"line_number":240,"context_line":"            # We only remove the trunk if we didn\u0027t receive a notification to"},{"line_number":241,"context_line":"            # create it again after this greenthread started execution"},{"line_number":242,"context_line":"            if not self.trunks_awaiting_creation.get(bridge_name, False):"},{"line_number":243,"context_line":"                self.trunk_manager.remove_trunk(trunk_id, parent_port_id)"},{"line_number":244,"context_line":"        except tman.TrunkManagerError as te:"}],"source_content_type":"text/x-python","patch_set":2,"id":"f68654fe_cc5e69db","line":241,"range":{"start_line":240,"start_character":12,"end_line":241,"end_character":70},"in_reply_to":"3d315a84_f3b05a5e","updated":"2022-02-25 00:45:11.000000000","message":"Thanks Rodolfo.\n\nI may be missing something but:\n\nLet\u0027s say that we add a timestamp to the bridge in this transaction: https://review.opendev.org/c/openstack/os-vif/+/830103/1/vif_plug_ovs/ovsdb/ovsdb_lib.py#183\n\nWhat prevents the execution of this transaction in os-vif between the bridge lookup and the bridge deletion in the code you propose for the neutron agent in your previous response? The way I see it is that the following sequence is perfectly possible:\n\n1) We retrieve in the neutron agent with the bridge lookup you propose above a time stamp that is more than 1 second old\n2) The compute agent executes the code in the with block here https://review.opendev.org/c/openstack/os-vif/+/830103/1/vif_plug_ovs/ovsdb/ovsdb_lib.py#180 and upon reaching the end of the block the transaction is sent to the OVSDB server via the OVS IDL\n3) We delete the bridge anyways in the neutron agent because in step 1 we have the old time stamp, not the most recent one\n\nIn other words, the bridge lookup is not executed within the OVSDB transaction that is being created in the code you propose above. It executes independently of that transaction. It just happens to be in the same with block","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":16688,"name":"Rodolfo Alonso","email":"ralonsoh@redhat.com","username":"rodolfo-alonso-hernandez"},"change_message_id":"43e3d09a429e6bcc0122fdb4b0ac02410f064e09","unresolved":true,"context_lines":[{"line_number":237,"context_line":"            # is due to a live migration or reassociation of a trunk to a new"},{"line_number":238,"context_line":"            # VM."},{"line_number":239,"context_line":"            self.unwire_subports_for_trunk(trunk_id, subport_ids)"},{"line_number":240,"context_line":"            # We only remove the trunk if we didn\u0027t receive a notification to"},{"line_number":241,"context_line":"            # create it again after this greenthread started execution"},{"line_number":242,"context_line":"            if not self.trunks_awaiting_creation.get(bridge_name, False):"},{"line_number":243,"context_line":"                self.trunk_manager.remove_trunk(trunk_id, parent_port_id)"},{"line_number":244,"context_line":"        except tman.TrunkManagerError as te:"}],"source_content_type":"text/x-python","patch_set":2,"id":"fa5e0359_be4c05c2","line":241,"range":{"start_line":240,"start_character":12,"end_line":241,"end_character":70},"in_reply_to":"4ccdcd62_567d5f56","updated":"2022-03-08 09:40:05.000000000","message":"The chances for this to happen are very low. Even with the possibility, this method to sync os-vif and Neutron is more robust than the current one.\n\nThe next step could be to modify the OVS API (and OvsdbVsctl until we remove it), adding a new parameter to \"del_br\", that could be a deletion condition. I wouldn\u0027t make this condition too complicated: just a parameter that will be the \"ts_update\" and will be a float. ovsdbapp will check, in \"DelBridgeCommand\", the external_ids:ts_update stored and will compare it. If provided \"ts_updated\" is greater than the one stored, the bridge should be deleted. That check will happen during the txn commit.\n\nBTW, to check this you can use \"OVSBridgeTestBase\". I\u0027ve implemented this code using the existing FT code [1]. It could be more useful.\n\n[1]https://paste.opendev.org/show/btlxUlpTcWgX4ZFZTQoT/","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":5948,"name":"Oleg Bondarev","email":"obondarev@mirantis.com","username":"obondarev"},"change_message_id":"09e9e3591efdbb8f359fe597177bb3c8e02e8a18","unresolved":true,"context_lines":[{"line_number":237,"context_line":"            # is due to a live migration or reassociation of a trunk to a new"},{"line_number":238,"context_line":"            # VM."},{"line_number":239,"context_line":"            self.unwire_subports_for_trunk(trunk_id, subport_ids)"},{"line_number":240,"context_line":"            # We only remove the trunk if we didn\u0027t receive a notification to"},{"line_number":241,"context_line":"            # create it again after this greenthread started execution"},{"line_number":242,"context_line":"            if not self.trunks_awaiting_creation.get(bridge_name, False):"},{"line_number":243,"context_line":"                self.trunk_manager.remove_trunk(trunk_id, parent_port_id)"},{"line_number":244,"context_line":"        except tman.TrunkManagerError as te:"}],"source_content_type":"text/x-python","patch_set":2,"id":"c2972743_aa263f64","line":241,"range":{"start_line":240,"start_character":12,"end_line":241,"end_character":70},"in_reply_to":"4e3e4628_d449a6bd","updated":"2022-02-21 12:41:36.000000000","message":"So is my understanding correct: if new port notification comes after execution passes line 242 - the bridge will be deleted and we would still have an issue? In your opinion what\u0027s the chance that this new port notification will always come before line 242? Sorry, I might still miss something","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":8313,"name":"Lajos Katona","display_name":"lajoskatona","email":"katonalala@gmail.com","username":"elajkat","status":"Ericsson Software Technology"},"change_message_id":"9f49f7aacd873161d32f12709c49fae9af9af218","unresolved":true,"context_lines":[{"line_number":237,"context_line":"            # is due to a live migration or reassociation of a trunk to a new"},{"line_number":238,"context_line":"            # VM."},{"line_number":239,"context_line":"            self.unwire_subports_for_trunk(trunk_id, subport_ids)"},{"line_number":240,"context_line":"            # We only remove the trunk if we didn\u0027t receive a notification to"},{"line_number":241,"context_line":"            # create it again after this greenthread started execution"},{"line_number":242,"context_line":"            if not self.trunks_awaiting_creation.get(bridge_name, False):"},{"line_number":243,"context_line":"                self.trunk_manager.remove_trunk(trunk_id, parent_port_id)"},{"line_number":244,"context_line":"        except tman.TrunkManagerError as te:"}],"source_content_type":"text/x-python","patch_set":2,"id":"365c3a4a_491db1f9","line":241,"range":{"start_line":240,"start_character":12,"end_line":241,"end_character":70},"in_reply_to":"4ffa6e67_9e375fea","updated":"2022-02-21 15:17:44.000000000","message":"I see the same, with the os-vif change together the plug will be transaction like, so the situation is much better.","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":4694,"name":"Miguel Lavalle","email":"miguel@mlavalle.com","username":"minsel"},"change_message_id":"6e41d1d3e73ce30b3e253f9bc1b92f6f9f164e0e","unresolved":true,"context_lines":[{"line_number":237,"context_line":"            # is due to a live migration or reassociation of a trunk to a new"},{"line_number":238,"context_line":"            # VM."},{"line_number":239,"context_line":"            self.unwire_subports_for_trunk(trunk_id, subport_ids)"},{"line_number":240,"context_line":"            # We only remove the trunk if we didn\u0027t receive a notification to"},{"line_number":241,"context_line":"            # create it again after this greenthread started execution"},{"line_number":242,"context_line":"            if not self.trunks_awaiting_creation.get(bridge_name, False):"},{"line_number":243,"context_line":"                self.trunk_manager.remove_trunk(trunk_id, parent_port_id)"},{"line_number":244,"context_line":"        except tman.TrunkManagerError as te:"}],"source_content_type":"text/x-python","patch_set":2,"id":"19aa89bc_12e2e38a","line":241,"range":{"start_line":240,"start_character":12,"end_line":241,"end_character":70},"in_reply_to":"9dc33e6e_e2926935","updated":"2022-02-22 02:33:17.000000000","message":"Ohh! Yeah, I see @Rodolfo\u0027s point. It is always possible that the transaction on the os-vif side may completely execute after line 242 and before we execute the transaction in the trunk unplug. I\u0027ll work on this tomorrow","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":16688,"name":"Rodolfo Alonso","email":"ralonsoh@redhat.com","username":"rodolfo-alonso-hernandez"},"change_message_id":"d45ef227161981e3a116e8abf7d8fcdd49a17e11","unresolved":true,"context_lines":[{"line_number":237,"context_line":"            # is due to a live migration or reassociation of a trunk to a new"},{"line_number":238,"context_line":"            # VM."},{"line_number":239,"context_line":"            self.unwire_subports_for_trunk(trunk_id, subport_ids)"},{"line_number":240,"context_line":"            # We only remove the trunk if we didn\u0027t receive a notification to"},{"line_number":241,"context_line":"            # create it again after this greenthread started execution"},{"line_number":242,"context_line":"            if not self.trunks_awaiting_creation.get(bridge_name, False):"},{"line_number":243,"context_line":"                self.trunk_manager.remove_trunk(trunk_id, parent_port_id)"},{"line_number":244,"context_line":"        except tman.TrunkManagerError as te:"}],"source_content_type":"text/x-python","patch_set":2,"id":"3d315a84_f3b05a5e","line":241,"range":{"start_line":240,"start_character":12,"end_line":241,"end_character":70},"in_reply_to":"af900838_7411620e","updated":"2022-02-24 17:10:14.000000000","message":"I would use ovs_idl.Backend.lookup, something like this:\n\novsdb \u003d self.bridge.ovsdb\nwith ovsdb.transaction() as txn:\n     br \u003d ovsdb.lookup(\u0027Bridge\u0027, self.bridge.br_name, default\u003dNone)\n     if br:\n         ts_update \u003d float(br.external_ids.get(\u0027ts_update\u0027, 0))\n         delta \u003d compare_timestamps(ts_update)\n         if delta \u003c 1:  # This bridge has been updated right now.\n             return\n\n     txn.add(ovsdb.del_br(self.bridge.br_name))\n     txn.add(ovsdb.del_port(self.patch_port_int_name,\n             bridge.br_name))\n\n\ndef compare_timestamps(before, after\u003dNone):\n    # Input: float, float\n    # Returns a floating number, up to microseconds\n    after \u003d (datetime.datetime.timestamp(after)\n             if after or datetime.datetime.now())\n    before \u003d datetime.datetime.timestamp(before)\n    return (after - after).total_seconds()","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":4694,"name":"Miguel Lavalle","email":"miguel@mlavalle.com","username":"minsel"},"change_message_id":"e42e72fa8f89e6ff07398a0a77f3e2ae31e2e874","unresolved":true,"context_lines":[{"line_number":237,"context_line":"            # is due to a live migration or reassociation of a trunk to a new"},{"line_number":238,"context_line":"            # VM."},{"line_number":239,"context_line":"            self.unwire_subports_for_trunk(trunk_id, subport_ids)"},{"line_number":240,"context_line":"            # We only remove the trunk if we didn\u0027t receive a notification to"},{"line_number":241,"context_line":"            # create it again after this greenthread started execution"},{"line_number":242,"context_line":"            if not self.trunks_awaiting_creation.get(bridge_name, False):"},{"line_number":243,"context_line":"                self.trunk_manager.remove_trunk(trunk_id, parent_port_id)"},{"line_number":244,"context_line":"        except tman.TrunkManagerError as te:"}],"source_content_type":"text/x-python","patch_set":2,"id":"4e3e4628_d449a6bd","line":241,"range":{"start_line":240,"start_character":12,"end_line":241,"end_character":70},"in_reply_to":"be33fdcf_11d52f79","updated":"2022-02-18 23:51:27.000000000","message":"As explained in my response above (line 226) we indeed receive the notification that the instance port has been added again (line 11 in the log) but we have handle_trunk_add waiting for the trunk bridge semaphore (\"tbr-6fa74ed9-4\" in our example). When this greenthread releases the semaphore (line 19 in the log), handle_trunk_add acquires it (line 21 in the log) and then fails miserably because the bridge is gone. It is this race condition that I am trying to fix.","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":4694,"name":"Miguel Lavalle","email":"miguel@mlavalle.com","username":"minsel"},"change_message_id":"b5c6f32092f5bc422cad31b8061c2e01acac0b2c","unresolved":true,"context_lines":[{"line_number":237,"context_line":"            # is due to a live migration or reassociation of a trunk to a new"},{"line_number":238,"context_line":"            # VM."},{"line_number":239,"context_line":"            self.unwire_subports_for_trunk(trunk_id, subport_ids)"},{"line_number":240,"context_line":"            # We only remove the trunk if we didn\u0027t receive a notification to"},{"line_number":241,"context_line":"            # create it again after this greenthread started execution"},{"line_number":242,"context_line":"            if not self.trunks_awaiting_creation.get(bridge_name, False):"},{"line_number":243,"context_line":"                self.trunk_manager.remove_trunk(trunk_id, parent_port_id)"},{"line_number":244,"context_line":"        except tman.TrunkManagerError as te:"}],"source_content_type":"text/x-python","patch_set":2,"id":"cd8a5a22_ba5a08d1","line":241,"range":{"start_line":240,"start_character":12,"end_line":241,"end_character":70},"in_reply_to":"bf7593b2_bff7bbfa","updated":"2022-03-01 14:43:45.000000000","message":"@Rodolfo,\n\nBased on your previous response and after reading https://rodolfo-alonso.com/ovsdbapp-your-library-for-open-vswitch-and-ovn, I devised this experiment to run inside a devstack: https://gist.github.com/miguellavalle/2200d0c48cec0c9b7308cd44c4eb78a4.\n\nHere\u0027s how the experiment works:\n\n1) connect_ovsdb.py is just a copy of the code you have in your blog entry, so I can import it in neutron_agent.py and os_vif.py.\n\n2) I run neutron_agent.py and while it is sleeping, I run os_vif.py. My expectation was that os_vif.py was going to be delayed by the open transaction in neutron_agent.py. That\u0027s not what happens, though. os_vif.py finishes right away.\n\nWhat am I doing wrong?","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":16688,"name":"Rodolfo Alonso","email":"ralonsoh@redhat.com","username":"rodolfo-alonso-hernandez"},"change_message_id":"1d424985a79218f8a7962bda079e049f85d374a0","unresolved":true,"context_lines":[{"line_number":237,"context_line":"            # is due to a live migration or reassociation of a trunk to a new"},{"line_number":238,"context_line":"            # VM."},{"line_number":239,"context_line":"            self.unwire_subports_for_trunk(trunk_id, subport_ids)"},{"line_number":240,"context_line":"            # We only remove the trunk if we didn\u0027t receive a notification to"},{"line_number":241,"context_line":"            # create it again after this greenthread started execution"},{"line_number":242,"context_line":"            if not self.trunks_awaiting_creation.get(bridge_name, False):"},{"line_number":243,"context_line":"                self.trunk_manager.remove_trunk(trunk_id, parent_port_id)"},{"line_number":244,"context_line":"        except tman.TrunkManagerError as te:"}],"source_content_type":"text/x-python","patch_set":2,"id":"4ffa6e67_9e375fea","line":241,"range":{"start_line":240,"start_character":12,"end_line":241,"end_character":70},"in_reply_to":"c2972743_aa263f64","updated":"2022-02-21 14:35:40.000000000","message":"Right, bridge can be deleted between L242 and L243.\n\nBut this patch should be complemented with [1]. That means Nova now creates the bridge and the port in the same OVSDB txn. That will be detected by the OVS monitor and the event captured by \"process_trunk_port_events\".\n\nOf course there is chance of race condition here, as you commented. I would improve this feature using again, as in [1], the OVSDB transactionality: the \"remove_trunk\" method calls \"trunk.unplug\". This \"unplug\" method uses a transaction context; that means the port and the trunk deletion will be done together. In the \"bridge\" register, in the \"external_ids\" dictionary field, I would include the timestamp. In the \"inplug\" transaction I would check if the timestamp has been updated recently (in the last second) and I would skip/continue depending on this. Of course, that implies Nova compute and Neutron OVS agent should be sync (they are running in the same host, they should).\n\n[1]https://review.opendev.org/c/openstack/os-vif/+/830103","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":16688,"name":"Rodolfo Alonso","email":"ralonsoh@redhat.com","username":"rodolfo-alonso-hernandez"},"change_message_id":"1995741a2a5f1009b8b0af8a12aff049a9aee4f1","unresolved":true,"context_lines":[{"line_number":237,"context_line":"            # is due to a live migration or reassociation of a trunk to a new"},{"line_number":238,"context_line":"            # VM."},{"line_number":239,"context_line":"            self.unwire_subports_for_trunk(trunk_id, subport_ids)"},{"line_number":240,"context_line":"            # We only remove the trunk if we didn\u0027t receive a notification to"},{"line_number":241,"context_line":"            # create it again after this greenthread started execution"},{"line_number":242,"context_line":"            if not self.trunks_awaiting_creation.get(bridge_name, False):"},{"line_number":243,"context_line":"                self.trunk_manager.remove_trunk(trunk_id, parent_port_id)"},{"line_number":244,"context_line":"        except tman.TrunkManagerError as te:"}],"source_content_type":"text/x-python","patch_set":2,"id":"f0c75edf_9d36494b","line":241,"range":{"start_line":240,"start_character":12,"end_line":241,"end_character":70},"in_reply_to":"cd8a5a22_ba5a08d1","updated":"2022-03-01 15:57:17.000000000","message":"ovsdbapp IDL implementation is transactionally safe but with writing commands (those ones you actually add to the txn context). But the lookup methods read from the IDL local cache [1]. Thus we are txn safe for write commands but the read ones are not.\n\nThis is why in this example, \"ts_update\" will have an older value, read before \"os_vif.py\" sets \"ts_update\u003d3\".\n\nThe OVS DB transactionality will prevent from deleting and creating the bridge at the same time. And the timestamp will let Neutron know if this bridge has been created in the last second or less, that implies the creation has been triggered again just before the previous deletion and this deletion should be stopped.\n\n\n[1] from the article: \"The IDL instance (Interface Definition Language) mantains a in-memory replica of the database. Here is where we go when we execute a database read command, instead of creating a transaction, building a query and executing this query on the OVSDB server.\"","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":4694,"name":"Miguel Lavalle","email":"miguel@mlavalle.com","username":"minsel"},"change_message_id":"24d52961f4812873c40f30e4627642b8ad685d55","unresolved":true,"context_lines":[{"line_number":237,"context_line":"            # is due to a live migration or reassociation of a trunk to a new"},{"line_number":238,"context_line":"            # VM."},{"line_number":239,"context_line":"            self.unwire_subports_for_trunk(trunk_id, subport_ids)"},{"line_number":240,"context_line":"            # We only remove the trunk if we didn\u0027t receive a notification to"},{"line_number":241,"context_line":"            # create it again after this greenthread started execution"},{"line_number":242,"context_line":"            if not self.trunks_awaiting_creation.get(bridge_name, False):"},{"line_number":243,"context_line":"                self.trunk_manager.remove_trunk(trunk_id, parent_port_id)"},{"line_number":244,"context_line":"        except tman.TrunkManagerError as te:"}],"source_content_type":"text/x-python","patch_set":2,"id":"4ccdcd62_567d5f56","line":241,"range":{"start_line":240,"start_character":12,"end_line":241,"end_character":70},"in_reply_to":"f0c75edf_9d36494b","updated":"2022-03-03 01:03:42.000000000","message":"@Rodolfo,\n\nThe upshot of your latest explanation (\"\"The IDL instance (Interface Definition Language) mantains a in-memory replica of the database. Here is where we go when we execute a database read command, instead of creating a transaction, building a query and executing this query on the OVSDB server.\") is that ovsdb.lookup will not serve us to serialize the transactions in the neutron agent and os-vif. Let me explain. Please look at this gist: https://gist.github.com/miguellavalle/d04f8e660afdd5cf06b86cebfeab316e. Let\u0027s say we run the two programs in parallel. A possible scenario is one in which:\n\n1) neutron_agent.py executes up to line 12 and decides to execute line 13, but just before actually executing it...\n\n2) os_vif.py executes completely\n\n3) neutron_agent.py resumes, executes line 13 and deletes the bridge, even though it shouldn\u0027t have because the bridge has been updated very recently\n\nAs I said in one of my previous comments, in neutron_agent.py the bridge lookup and the decision to delete the bridge happen outside the transaction.\n\nBased on this, I think we have 3 alternatives:\n\n1) If in the case of vhost user each trunk bridge only has one instance port, why don\u0027t we let os-vif delete the bridge when it unplugs the vif. That way the bridge deletion and it subsequent re-addition will be perfectly serialized.\n\n2) If the previous point is not feasible, we could implement a periodic job that deletes, for the vhost user case, \"stale\" bridges that have not been used in a certain period of time. The period can be even configurable by the user\n\n3) Looking at https://datatracker.ietf.org/doc/html/rfc7047.txt#section-5.2 and more specifically at https://datatracker.ietf.org/doc/html/rfc7047.txt#section-5.2.5, it seems that the json-rpc protocol allows to specify a where clause in a delete operation. It doesn\u0027t seem to me that ovsdbapp gives access to that feature, but maybe we can explore using it, so we delete only \"stale\" bridges in a transaction","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"},{"author":{"_account_id":16688,"name":"Rodolfo Alonso","email":"ralonsoh@redhat.com","username":"rodolfo-alonso-hernandez"},"change_message_id":"587ba36f16d075bd26a57d84644f0817573f63d7","unresolved":true,"context_lines":[{"line_number":237,"context_line":"            # is due to a live migration or reassociation of a trunk to a new"},{"line_number":238,"context_line":"            # VM."},{"line_number":239,"context_line":"            self.unwire_subports_for_trunk(trunk_id, subport_ids)"},{"line_number":240,"context_line":"            # We only remove the trunk if we didn\u0027t receive a notification to"},{"line_number":241,"context_line":"            # create it again after this greenthread started execution"},{"line_number":242,"context_line":"            if not self.trunks_awaiting_creation.get(bridge_name, False):"},{"line_number":243,"context_line":"                self.trunk_manager.remove_trunk(trunk_id, parent_port_id)"},{"line_number":244,"context_line":"        except tman.TrunkManagerError as te:"}],"source_content_type":"text/x-python","patch_set":2,"id":"bf7593b2_bff7bbfa","line":241,"range":{"start_line":240,"start_character":12,"end_line":241,"end_character":70},"in_reply_to":"f68654fe_cc5e69db","updated":"2022-02-25 15:12:36.000000000","message":"(2) can\u0027t happen if you have started (1). In (1), we have already started a transaction, thus os-vif can\u0027t create the bridge at the same time.\n\nIf we are already in (1), then Neutron will delete the bridge. Then os-vif will create it again.\n\nIf os-vif creates the port some msecs before Neutron tries to delete the bridge, this condition will abort this deletion.","commit_id":"a7e5cb0e68e094a920c52d1b04ad4626634616ad"}]}
