)]}'
{"/COMMIT_MSG":[{"author":{"_account_id":9531,"name":"liuyulong","display_name":"LIU Yulong","email":"i@liuyulong.me","username":"LIU-Yulong"},"change_message_id":"25d7144e45f1f8755a59b0462acd51f06b6fff93","unresolved":false,"context_lines":[{"line_number":16,"context_line":"specified \"batch_interval\" time. If the callback method is not"},{"line_number":17,"context_line":"synchronous with the notify thread execution (what [1] implemented),"},{"line_number":18,"context_line":"the thread can finish while the RPC client is still sending the"},{"line_number":19,"context_line":"HA router states. If another HA state update is received, then both"},{"line_number":20,"context_line":"updates can be executed at the same time. It is possible then that a new"},{"line_number":21,"context_line":"router state can be overwritten with an old one still not sent or"},{"line_number":22,"context_line":"processed."},{"line_number":23,"context_line":""}],"source_content_type":"text/x-gerrit-commit-message","patch_set":1,"id":"7faddb67_f23fc684","line":20,"range":{"start_line":19,"start_character":18,"end_line":20,"end_character":41},"updated":"2019-07-24 15:25:46.000000000","message":"This is main the issue, and it is interesting, I have a patch before, which uses the ResourceProcessingQueue to prevent such issue:\nhttps://review.opendev.org/#/c/275614/\n\nThe issue I tested is concurrently creating and deleting the router.","commit_id":"1fde7512c321179db4817386c825e054e7cbeec1"},{"author":{"_account_id":16688,"name":"Rodolfo Alonso","email":"ralonsoh@redhat.com","username":"rodolfo-alonso-hernandez"},"change_message_id":"c6506c2564c3401d1f0e9b5fde70c8d505682d6e","unresolved":false,"context_lines":[{"line_number":16,"context_line":"specified \"batch_interval\" time. If the callback method is not"},{"line_number":17,"context_line":"synchronous with the notify thread execution (what [1] implemented),"},{"line_number":18,"context_line":"the thread can finish while the RPC client is still sending the"},{"line_number":19,"context_line":"HA router states. If another HA state update is received, then both"},{"line_number":20,"context_line":"updates can be executed at the same time. It is possible then that a new"},{"line_number":21,"context_line":"router state can be overwritten with an old one still not sent or"},{"line_number":22,"context_line":"processed."},{"line_number":23,"context_line":""}],"source_content_type":"text/x-gerrit-commit-message","patch_set":1,"id":"7faddb67_5df25f08","line":20,"range":{"start_line":19,"start_character":18,"end_line":20,"end_character":41},"in_reply_to":"7faddb67_f23fc684","updated":"2019-07-25 13:16:23.000000000","message":"Of course this is the main issue and that\u0027s why I\u0027m proposing this patch. We can\u0027t allow two threads to send RPC messages to update the same routers because we can have the situation when an old message overwrites a newer value.\n\nThe batch notifier is used by the L3 agent, the Ironic notifier, the Nova notifier, the Nova segment notifier and the placement report plugin. All of them use synchronized callbacks, that\u0027s the goal of the BatchNotifier. If we are sending the RPC calls in a non-blocking way, using parallel threads, what\u0027s the purpose of the BatchNotifier?","commit_id":"1fde7512c321179db4817386c825e054e7cbeec1"},{"author":{"_account_id":9531,"name":"liuyulong","display_name":"LIU Yulong","email":"i@liuyulong.me","username":"LIU-Yulong"},"change_message_id":"cce65f81b32417663046b6222b904b1bdb2d26e6","unresolved":false,"context_lines":[{"line_number":9,"context_line":"This patch is the first one of a series of patches improving how the L3"},{"line_number":10,"context_line":"agents update the router HA state to the Neutron server."},{"line_number":11,"context_line":""},{"line_number":12,"context_line":"This patch reverts the previous patch [1]. When the batch notifier sends"},{"line_number":13,"context_line":"events, it calls the callback method passed during the initialization, in"},{"line_number":14,"context_line":"this case AgentMixin.notify_server. The batch notifier spawns a new"},{"line_number":15,"context_line":"thread in charge of sending the notifications and then wait the"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":2,"id":"7faddb67_6874cdcf","line":12,"range":{"start_line":12,"start_character":11,"end_line":12,"end_character":18},"updated":"2019-08-01 15:34:10.000000000","message":"Seems this patch partially reverts [1], it is still \u0027cast\u0027 for that method.","commit_id":"197afb735a73401a767d50e8270d6af433875649"},{"author":{"_account_id":16688,"name":"Rodolfo Alonso","email":"ralonsoh@redhat.com","username":"rodolfo-alonso-hernandez"},"change_message_id":"7fb1ae5780c1de7b67996b50b89ffb6edb5d4279","unresolved":false,"context_lines":[{"line_number":9,"context_line":"This patch is the first one of a series of patches improving how the L3"},{"line_number":10,"context_line":"agents update the router HA state to the Neutron server."},{"line_number":11,"context_line":""},{"line_number":12,"context_line":"This patch reverts the previous patch [1]. When the batch notifier sends"},{"line_number":13,"context_line":"events, it calls the callback method passed during the initialization, in"},{"line_number":14,"context_line":"this case AgentMixin.notify_server. The batch notifier spawns a new"},{"line_number":15,"context_line":"thread in charge of sending the notifications and then wait the"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":2,"id":"7faddb67_b158f030","line":12,"range":{"start_line":12,"start_character":11,"end_line":12,"end_character":18},"in_reply_to":"7faddb67_6874cdcf","updated":"2019-08-01 17:10:32.000000000","message":"Done","commit_id":"197afb735a73401a767d50e8270d6af433875649"}],"neutron/agent/l3/agent.py":[{"author":{"_account_id":9531,"name":"liuyulong","display_name":"LIU Yulong","email":"i@liuyulong.me","username":"LIU-Yulong"},"change_message_id":"25d7144e45f1f8755a59b0462acd51f06b6fff93","unresolved":false,"context_lines":[{"line_number":181,"context_line":"    def update_ha_routers_states(self, context, states):"},{"line_number":182,"context_line":"        \"\"\"Update HA routers states.\"\"\""},{"line_number":183,"context_line":"        cctxt \u003d self.client.prepare(version\u003d\u00271.5\u0027)"},{"line_number":184,"context_line":"        return cctxt.call(context, \u0027update_ha_routers_states\u0027,"},{"line_number":185,"context_line":"                          host\u003dself.host, states\u003dstates)"},{"line_number":186,"context_line":""},{"line_number":187,"context_line":"    @utils.timecost"}],"source_content_type":"text/x-python","patch_set":1,"id":"7faddb67_720196af","line":184,"range":{"start_line":184,"start_character":21,"end_line":184,"end_character":25},"updated":"2019-07-24 15:25:46.000000000","message":"If \u0027call\u0027 is really needed, IMO, a return value can be added to the neutron-server side RPC method. And check the value here or elsewhere in agent side to ensure the call is really done. Then get the next queued event and call in next loop count.","commit_id":"1fde7512c321179db4817386c825e054e7cbeec1"},{"author":{"_account_id":16688,"name":"Rodolfo Alonso","email":"ralonsoh@redhat.com","username":"rodolfo-alonso-hernandez"},"change_message_id":"c6506c2564c3401d1f0e9b5fde70c8d505682d6e","unresolved":false,"context_lines":[{"line_number":181,"context_line":"    def update_ha_routers_states(self, context, states):"},{"line_number":182,"context_line":"        \"\"\"Update HA routers states.\"\"\""},{"line_number":183,"context_line":"        cctxt \u003d self.client.prepare(version\u003d\u00271.5\u0027)"},{"line_number":184,"context_line":"        return cctxt.call(context, \u0027update_ha_routers_states\u0027,"},{"line_number":185,"context_line":"                          host\u003dself.host, states\u003dstates)"},{"line_number":186,"context_line":""},{"line_number":187,"context_line":"    @utils.timecost"}],"source_content_type":"text/x-python","patch_set":1,"id":"7faddb67_1d0dc73d","line":184,"range":{"start_line":184,"start_character":21,"end_line":184,"end_character":25},"in_reply_to":"7faddb67_720196af","updated":"2019-07-25 13:16:23.000000000","message":"This will have the same effect, not waiting for the RPC reply.\n\nBut in this case, at least the RPC call is done and will be processed before next calls.","commit_id":"1fde7512c321179db4817386c825e054e7cbeec1"}],"neutron/notifiers/batch_notifier.py":[{"author":{"_account_id":9531,"name":"liuyulong","display_name":"LIU Yulong","email":"i@liuyulong.me","username":"LIU-Yulong"},"change_message_id":"25d7144e45f1f8755a59b0462acd51f06b6fff93","unresolved":false,"context_lines":[{"line_number":52,"context_line":"        self._pending_events.put(event)"},{"line_number":53,"context_line":""},{"line_number":54,"context_line":"        def synced_send():"},{"line_number":55,"context_line":"            if not self._mutex.locked():"},{"line_number":56,"context_line":"                with self._mutex:"},{"line_number":57,"context_line":"                    while not self._pending_events.empty():"},{"line_number":58,"context_line":"                        self._notify()"},{"line_number":59,"context_line":"                        # sleeping after send while holding the lock allows"}],"source_content_type":"text/x-python","patch_set":1,"id":"7faddb67_b2ce2e81","line":56,"range":{"start_line":55,"start_character":12,"end_line":56,"end_character":33},"updated":"2019-07-24 15:25:46.000000000","message":"One line \"with self._mutex\" can not work?","commit_id":"1fde7512c321179db4817386c825e054e7cbeec1"},{"author":{"_account_id":16688,"name":"Rodolfo Alonso","email":"ralonsoh@redhat.com","username":"rodolfo-alonso-hernandez"},"change_message_id":"c6506c2564c3401d1f0e9b5fde70c8d505682d6e","unresolved":false,"context_lines":[{"line_number":52,"context_line":"        self._pending_events.put(event)"},{"line_number":53,"context_line":""},{"line_number":54,"context_line":"        def synced_send():"},{"line_number":55,"context_line":"            if not self._mutex.locked():"},{"line_number":56,"context_line":"                with self._mutex:"},{"line_number":57,"context_line":"                    while not self._pending_events.empty():"},{"line_number":58,"context_line":"                        self._notify()"},{"line_number":59,"context_line":"                        # sleeping after send while holding the lock allows"}],"source_content_type":"text/x-python","patch_set":1,"id":"7faddb67_882413b1","line":56,"range":{"start_line":55,"start_character":12,"end_line":56,"end_character":33},"in_reply_to":"7faddb67_b2ce2e81","updated":"2019-07-25 13:16:23.000000000","message":"This will wait until the mutex is released. What I need is to exit this method if the mutex is taken.","commit_id":"1fde7512c321179db4817386c825e054e7cbeec1"},{"author":{"_account_id":9531,"name":"liuyulong","display_name":"LIU Yulong","email":"i@liuyulong.me","username":"LIU-Yulong"},"change_message_id":"25d7144e45f1f8755a59b0462acd51f06b6fff93","unresolved":false,"context_lines":[{"line_number":55,"context_line":"            if not self._mutex.locked():"},{"line_number":56,"context_line":"                with self._mutex:"},{"line_number":57,"context_line":"                    while not self._pending_events.empty():"},{"line_number":58,"context_line":"                        self._notify()"},{"line_number":59,"context_line":"                        # sleeping after send while holding the lock allows"},{"line_number":60,"context_line":"                        # subsequent events to batch up"},{"line_number":61,"context_line":"                        eventlet.sleep(self.batch_interval)"}],"source_content_type":"text/x-python","patch_set":1,"id":"7faddb67_f2a90690","line":58,"range":{"start_line":58,"start_character":24,"end_line":58,"end_character":38},"updated":"2019-07-24 15:25:46.000000000","message":"If this is time-consuming, all other routers\u0027 state change will be block due to that mutex. Bug #1824911 will come back?\n\n\n2019-04-12 09:38:35.435 40627 DEBUG oslo_concurrency.lockutils [-] Lock \"notifier-58a2b315-d411-4aa0-bb23-7c0da0b57a70\" acquired by \"neutron.notifiers.batch_notifier.synced_send\" :: waited 16.216s inner /usr/lib/python2.7/site-packages/oslo_concurrency/lockutils.py:273\n2019-04-12 09:38:37.435 40627 DEBUG oslo_concurrency.lockutils [-] Lock \"notifier-58a2b315-d411-4aa0-bb23-7c0da0b57a70\" released by \"neutron.notifiers.batch_notifier.synced_send\" :: held 2.000s inner /usr/lib/python2.7/site-packages/oslo_concurrency/lockutils.py:285\n2019-04-12 09:38:37.436 40627 DEBUG oslo_concurrency.lockutils [-] Lock \"notifier-58a2b315-d411-4aa0-bb23-7c0da0b57a70\" acquired by \"neutron.notifiers.batch_notifier.synced_send\" :: waited 16.639s inner /usr/lib/python2.7/site-packages/oslo_concurrency/lockutils.py:273","commit_id":"1fde7512c321179db4817386c825e054e7cbeec1"},{"author":{"_account_id":16688,"name":"Rodolfo Alonso","email":"ralonsoh@redhat.com","username":"rodolfo-alonso-hernandez"},"change_message_id":"c6506c2564c3401d1f0e9b5fde70c8d505682d6e","unresolved":false,"context_lines":[{"line_number":55,"context_line":"            if not self._mutex.locked():"},{"line_number":56,"context_line":"                with self._mutex:"},{"line_number":57,"context_line":"                    while not self._pending_events.empty():"},{"line_number":58,"context_line":"                        self._notify()"},{"line_number":59,"context_line":"                        # sleeping after send while holding the lock allows"},{"line_number":60,"context_line":"                        # subsequent events to batch up"},{"line_number":61,"context_line":"                        eventlet.sleep(self.batch_interval)"}],"source_content_type":"text/x-python","patch_set":1,"id":"7faddb67_0817c347","line":58,"range":{"start_line":58,"start_character":24,"end_line":58,"end_character":38},"in_reply_to":"7faddb67_f2a90690","updated":"2019-07-25 13:16:23.000000000","message":"No if we call \"cast\" instead of \"call\". But we IMO we should handle (this is technical debt) the same router RPC calls in the same order those calls have been received.","commit_id":"1fde7512c321179db4817386c825e054e7cbeec1"}],"neutron/tests/unit/notifiers/test_nova.py":[{"author":{"_account_id":11975,"name":"Slawek Kaplonski","email":"skaplons@redhat.com","username":"slaweq"},"change_message_id":"93992f925153830dd57101e9b0f2b8763a18290c","unresolved":false,"context_lines":[{"line_number":301,"context_line":"        event_assoc \u003d self.nova_notifier.create_port_changed_event("},{"line_number":302,"context_line":"            \u0027update_floatingip\u0027, original_obj, returned_obj)"},{"line_number":303,"context_line":"        self.assertEqual("},{"line_number":304,"context_line":"            self.nova_notifier.batch_notifier._pending_events.get(), event_dis)"},{"line_number":305,"context_line":"        self.assertEqual("},{"line_number":306,"context_line":"            self.nova_notifier.batch_notifier._pending_events.get(),"},{"line_number":307,"context_line":"            event_assoc)"}],"source_content_type":"text/x-python","patch_set":3,"id":"7faddb67_9b8131fe","line":304,"updated":"2019-08-05 07:43:36.000000000","message":"IIUC this is now like\n\n    self.assertEqual(actual, expected)\n\nand should be in opposite way. But this may be changed in follow-up patch IMO.","commit_id":"8b7d2c8a93fdf69a828f14bd527d8f132b27bc6e"}]}
