)]}'
{".zuul.yaml":[{"author":{"_account_id":1,"name":"James E. Blair","email":"jim@acmegating.com","username":"corvus"},"change_message_id":"81624b968bce53b4e6f101e9ece95cea549b7488","unresolved":false,"context_lines":[{"line_number":103,"context_line":"- job:"},{"line_number":104,"context_line":"    name: zuul-tox-py38"},{"line_number":105,"context_line":"    parent: zuul-tox"},{"line_number":106,"context_line":"    timeout: 6000  # 100 minutes"},{"line_number":107,"context_line":"    vars:"},{"line_number":108,"context_line":"      tox_envlist: py38"},{"line_number":109,"context_line":"      python_version: 3.8"}],"source_content_type":"text/x-yaml","patch_set":20,"id":"c64d2dc5_a76c3942","line":106,"updated":"2021-05-02 18:35:07.000000000","message":"I\u0027m not seeing an increase in runtime locally after I made a change to the test settled check.","commit_id":"6b1c982d28ddc69360c1391f5aea99681f9f6656"}],"tests/base.py":[{"author":{"_account_id":1,"name":"James E. Blair","email":"jim@acmegating.com","username":"corvus"},"change_message_id":"876d3063a567656ec51ba3cca3620638f5121bde","unresolved":false,"context_lines":[{"line_number":3108,"context_line":""},{"line_number":3109,"context_line":""},{"line_number":3110,"context_line":"class TestExecutorClient(zuul.executor.client.ExecutorClient):"},{"line_number":3111,"context_line":"    _executor_api_class \u003d TestExecutorApi"},{"line_number":3112,"context_line":""},{"line_number":3113,"context_line":""},{"line_number":3114,"context_line":"class RecordingExecutorServer(zuul.executor.server.ExecutorServer):"}],"source_content_type":"text/x-python","patch_set":20,"id":"1e86a20d_3b76600d","line":3111,"updated":"2021-04-24 00:11:00.000000000","message":"Starting a class name with Test indicates that it holds unit tests and the autoloader will search it.  We should use other class names for both of these (if it is necessary to have them; I\u0027d like to avoid it as much as possible).","commit_id":"6b1c982d28ddc69360c1391f5aea99681f9f6656"},{"author":{"_account_id":1,"name":"James E. Blair","email":"jim@acmegating.com","username":"corvus"},"change_message_id":"81624b968bce53b4e6f101e9ece95cea549b7488","unresolved":false,"context_lines":[{"line_number":4994,"context_line":"            # \"did not report start\" or \"has not been submitted\" messages, we"},{"line_number":4995,"context_line":"            # could wait a while here before checking again if the system is"},{"line_number":4996,"context_line":"            # settled."},{"line_number":4997,"context_line":"            time.sleep(1.0)"},{"line_number":4998,"context_line":""},{"line_number":4999,"context_line":"    def _logQueueStatus(self, logger, matcher, all_zk_queues_empty,"},{"line_number":5000,"context_line":"                        all_merge_jobs_waiting, all_builds_reported,"}],"source_content_type":"text/x-python","patch_set":20,"id":"6055f67f_4644655d","line":4997,"updated":"2021-05-02 18:35:07.000000000","message":"This is why you had to change the timeout.  I set this to 0.1 and that seemed to be enough to let other threads run without slowing down too much.  Still, we should try to eliminate the sleep and use only locks/events.","commit_id":"6b1c982d28ddc69360c1391f5aea99681f9f6656"}],"tests/zk/executor.py":[{"author":{"_account_id":1,"name":"James E. Blair","email":"jim@acmegating.com","username":"corvus"},"change_message_id":"81624b968bce53b4e6f101e9ece95cea549b7488","unresolved":false,"context_lines":[{"line_number":38,"context_line":"    def _iterBuildRequests(self):"},{"line_number":39,"context_line":"        # As this class is mainly used for assertions in the tests, it should"},{"line_number":40,"context_line":"        # look up the build requests directly from ZooKeeper and not from a"},{"line_number":41,"context_line":"        # cache layer."},{"line_number":42,"context_line":"        zones \u003d []"},{"line_number":43,"context_line":"        if self.zone_filter:"},{"line_number":44,"context_line":"            zones \u003d self.zone_filter"}],"source_content_type":"text/x-python","patch_set":20,"id":"3be192f7_2b328a8f","line":41,"updated":"2021-05-02 18:35:07.000000000","message":"This class is used by the executor in the tests, so this means that the actual behavior of getting a build request is never tested.  If we need to iterate over zk for test assertions, let\u0027s add a test-only method for just that.  I did that in my patch series.","commit_id":"6b1c982d28ddc69360c1391f5aea99681f9f6656"}],"zuul/executor/server.py":[{"author":{"_account_id":1,"name":"James E. Blair","email":"jim@acmegating.com","username":"corvus"},"change_message_id":"876d3063a567656ec51ba3cca3620638f5121bde","unresolved":false,"context_lines":[{"line_number":3158,"context_line":"            # 60 minutes. Find a proper way to do that. Maybe we could combine"},{"line_number":3159,"context_line":"            # this with other cleanups in the scheduler and use APScheduler for"},{"line_number":3160,"context_line":"            # proper scheduling."},{"line_number":3161,"context_line":"            time.sleep(5)"},{"line_number":3162,"context_line":""},{"line_number":3163,"context_line":"    def run_governor(self):"},{"line_number":3164,"context_line":"        while not self.governor_stop_event.wait(10):"}],"source_content_type":"text/x-python","patch_set":20,"id":"badd80fc_38658442","line":3161,"updated":"2021-04-24 00:11:00.000000000","message":"I\u0027m not sure every 60 minutes would be okay.  The only way to tell that an executor handling a build has not crashed is if the lock has disappeared, and we want to respond to that very quickly (less than a minute, I think).\n\nThis should probably run in the scheduler and operate across all zones at once, so that if all the executors for a zone go offline, their buildrequests are still released.","commit_id":"6b1c982d28ddc69360c1391f5aea99681f9f6656"}],"zuul/model.py":[{"author":{"_account_id":1,"name":"James E. Blair","email":"jim@acmegating.com","username":"corvus"},"change_message_id":"876d3063a567656ec51ba3cca3620638f5121bde","unresolved":false,"context_lines":[{"line_number":1930,"context_line":""},{"line_number":1931,"context_line":"@total_ordering"},{"line_number":1932,"context_line":"class BuildRequest:"},{"line_number":1933,"context_line":"    \"\"\"A request for a build in a specific zone\"\"\""},{"line_number":1934,"context_line":""},{"line_number":1935,"context_line":"    def __init__("},{"line_number":1936,"context_line":"        self,"}],"source_content_type":"text/x-python","patch_set":20,"id":"162ef751_6dcda3c5","line":1933,"updated":"2021-04-24 00:11:00.000000000","message":"Ultimately, I think we can merge Build and BuildRequest as they have pretty much the same data.  But let\u0027s do that later since having a BuildRequest helps bridge the gap when removing gearman.","commit_id":"6b1c982d28ddc69360c1391f5aea99681f9f6656"},{"author":{"_account_id":1,"name":"James E. Blair","email":"jim@acmegating.com","username":"corvus"},"change_message_id":"395aa845763e5c9ee3ba264c7dc6dbd6e707a943","unresolved":false,"context_lines":[{"line_number":2016,"context_line":"    RUNNING \u003d 2"},{"line_number":2017,"context_line":"    PAUSED \u003d 3"},{"line_number":2018,"context_line":"    # Finished"},{"line_number":2019,"context_line":"    COMPLETED \u003d 4"},{"line_number":2020,"context_line":""},{"line_number":2021,"context_line":""},{"line_number":2022,"context_line":"@total_ordering"}],"source_content_type":"text/x-python","patch_set":23,"id":"6fcc0d04_498e2beb","line":2019,"updated":"2021-05-17 23:17:51.000000000","message":"Might be a good idea to switch this to string constants as well, for zk introspection/debugging.","commit_id":"ee2b902bf3c39ad488f9e0cfcff7a98dba152727"},{"author":{"_account_id":4146,"name":"Clark Boylan","email":"cboylan@sapwetik.org","username":"cboylan"},"change_message_id":"fc4cd337e67a4a3bd2e2b79c3c15da43d62edcac","unresolved":true,"context_lines":[{"line_number":2056,"context_line":"    COMPLETED \u003d 4"},{"line_number":2057,"context_line":""},{"line_number":2058,"context_line":""},{"line_number":2059,"context_line":"@total_ordering"},{"line_number":2060,"context_line":"class BuildRequest:"},{"line_number":2061,"context_line":"    \"\"\"A request for a build in a specific zone\"\"\""},{"line_number":2062,"context_line":""}],"source_content_type":"text/x-python","patch_set":35,"id":"37047a81_57c8c094","line":2059,"updated":"2021-06-29 20:52:21.000000000","message":"The docs for this decorator indicate that if performance of the BuildRequest comparisons matters we should implement all of the comparison methods ourselves. However, I don\u0027t expect this is the case in this code as the total number of build requests is counted in the thousands for most zuul users and comparing thousands of entities shouldn\u0027t be too slow either way.","commit_id":"8315109d22a6068b91e46dd0b9ba334889a9e570"}],"zuul/zk/__init__.py":[{"author":{"_account_id":1,"name":"James E. Blair","email":"jim@acmegating.com","username":"corvus"},"change_message_id":"395aa845763e5c9ee3ba264c7dc6dbd6e707a943","unresolved":false,"context_lines":[{"line_number":189,"context_line":"            self.log.debug(json.loads(data.decode(\"utf-8\")))"},{"line_number":190,"context_line":""},{"line_number":191,"context_line":"        for node in self.client.get_children(path):"},{"line_number":192,"context_line":"            self._show_tree(f\"{path}/{node}\")"},{"line_number":193,"context_line":""},{"line_number":194,"context_line":""},{"line_number":195,"context_line":"class ZooKeeperSimpleBase(metaclass\u003dABCMeta):"}],"source_content_type":"text/x-python","patch_set":23,"id":"4777bfc1_06fbec7a","line":192,"updated":"2021-05-17 23:17:51.000000000","message":"Since this isn\u0027t used, maybe we should remove it or move it to the test suite?","commit_id":"ee2b902bf3c39ad488f9e0cfcff7a98dba152727"},{"author":{"_account_id":27952,"name":"Felix Edel","email":"felix.edel@bmw.de","username":"felix.schmidt"},"change_message_id":"b42c26295bb575f504f1b6c92f941da635e1bf46","unresolved":false,"context_lines":[{"line_number":189,"context_line":"            self.log.debug(json.loads(data.decode(\"utf-8\")))"},{"line_number":190,"context_line":""},{"line_number":191,"context_line":"        for node in self.client.get_children(path):"},{"line_number":192,"context_line":"            self._show_tree(f\"{path}/{node}\")"},{"line_number":193,"context_line":""},{"line_number":194,"context_line":""},{"line_number":195,"context_line":"class ZooKeeperSimpleBase(metaclass\u003dABCMeta):"}],"source_content_type":"text/x-python","patch_set":23,"id":"5209e3da_d2f4e1fa","line":192,"in_reply_to":"4777bfc1_06fbec7a","updated":"2021-05-18 06:25:17.000000000","message":"I needed this for debugging purposes and I thought it could be helpful for the future development. As we are putting more and more into ZooKeeper such a helper method might be useful to find leftovers in ZooKeeper and cleanup opportunities. Maybe there is a better place for this. As mainly put it in here as I didn\u0027t find a better one.","commit_id":"ee2b902bf3c39ad488f9e0cfcff7a98dba152727"},{"author":{"_account_id":4146,"name":"Clark Boylan","email":"cboylan@sapwetik.org","username":"cboylan"},"change_message_id":"fc4cd337e67a4a3bd2e2b79c3c15da43d62edcac","unresolved":true,"context_lines":[{"line_number":198,"context_line":"            data \u003d None"},{"line_number":199,"context_line":""},{"line_number":200,"context_line":"        if data:"},{"line_number":201,"context_line":"            self.log.debug(json.loads(data.decode(\"utf-8\")))"},{"line_number":202,"context_line":""},{"line_number":203,"context_line":"        for node in self.client.get_children(path):"},{"line_number":204,"context_line":"            self._show_tree(f\"{path}/{node}\")"}],"source_content_type":"text/x-python","patch_set":35,"id":"a25539ae_1f3ccaf9","line":201,"updated":"2021-06-29 20:52:21.000000000","message":"Small nit you might consider catching a json decoding error and printing the serialized string if it fails to decode. If this is a debugging aid then it may be used in cases of broken serialization.","commit_id":"8315109d22a6068b91e46dd0b9ba334889a9e570"}],"zuul/zk/executor.py":[{"author":{"_account_id":1,"name":"James E. Blair","email":"jim@acmegating.com","username":"corvus"},"change_message_id":"876d3063a567656ec51ba3cca3620638f5121bde","unresolved":false,"context_lines":[{"line_number":36,"context_line":"    DELETED \u003d 4"},{"line_number":37,"context_line":""},{"line_number":38,"context_line":""},{"line_number":39,"context_line":"class ExecutorApi(ZooKeeperBase):"},{"line_number":40,"context_line":""},{"line_number":41,"context_line":"    BUILD_REQUEST_ROOT \u003d \"/zuul/build-requests\""},{"line_number":42,"context_line":"    LOCK_ROOT \u003d \"/zuul/build-request-locks\""}],"source_content_type":"text/x-python","patch_set":20,"id":"05fbdd5e_b6155bfb","line":39,"updated":"2021-04-24 00:11:00.000000000","message":"Why not SimpleBase?","commit_id":"6b1c982d28ddc69360c1391f5aea99681f9f6656"},{"author":{"_account_id":1,"name":"James E. Blair","email":"jim@acmegating.com","username":"corvus"},"change_message_id":"876d3063a567656ec51ba3cca3620638f5121bde","unresolved":false,"context_lines":[{"line_number":40,"context_line":""},{"line_number":41,"context_line":"    BUILD_REQUEST_ROOT \u003d \"/zuul/build-requests\""},{"line_number":42,"context_line":"    LOCK_ROOT \u003d \"/zuul/build-request-locks\""},{"line_number":43,"context_line":"    DEFAULT_ZONE \u003d \"default-zone\""},{"line_number":44,"context_line":""},{"line_number":45,"context_line":"    log \u003d logging.getLogger(\"zuul.zk.executor.ExecutorApi\")"},{"line_number":46,"context_line":""}],"source_content_type":"text/x-python","patch_set":20,"id":"452fb984_7f722487","line":43,"updated":"2021-04-24 00:11:00.000000000","message":"There\u0027s nothing stopping a user from creating a zone called \u0027default-zone\u0027.","commit_id":"6b1c982d28ddc69360c1391f5aea99681f9f6656"},{"author":{"_account_id":1,"name":"James E. Blair","email":"jim@acmegating.com","username":"corvus"},"change_message_id":"876d3063a567656ec51ba3cca3620638f5121bde","unresolved":false,"context_lines":[{"line_number":50,"context_line":"        zone_filter\u003dNone,"},{"line_number":51,"context_line":"        build_request_callback\u003dNone,"},{"line_number":52,"context_line":"        build_event_callback\u003dNone,"},{"line_number":53,"context_line":"        use_cache\u003dFalse,"},{"line_number":54,"context_line":"    ):"},{"line_number":55,"context_line":"        super().__init__(client)"},{"line_number":56,"context_line":""}],"source_content_type":"text/x-python","patch_set":20,"id":"0b1fd28a_5a569173","line":53,"updated":"2021-04-24 00:11:00.000000000","message":"I can\u0027t find any code that does anything with caching.  What was this argument meant to support?","commit_id":"6b1c982d28ddc69360c1391f5aea99681f9f6656"},{"author":{"_account_id":1,"name":"James E. Blair","email":"jim@acmegating.com","username":"corvus"},"change_message_id":"876d3063a567656ec51ba3cca3620638f5121bde","unresolved":false,"context_lines":[{"line_number":83,"context_line":"            for zone in children:"},{"line_number":84,"context_line":"                self.registerZone(zone)"},{"line_number":85,"context_line":""},{"line_number":86,"context_line":"        self.kazoo_client.ChildrenWatch(self.BUILD_REQUEST_ROOT, watch_zones)"},{"line_number":87,"context_line":""},{"line_number":88,"context_line":"    def registerZone(self, zone):"},{"line_number":89,"context_line":"        self.log.debug(\"Registering zone %s\", zone)"}],"source_content_type":"text/x-python","patch_set":20,"id":"0b2d8690_ec87d556","line":86,"updated":"2021-04-24 00:11:00.000000000","message":"This is an important behavior change.  There is currently no support for an executor running jobs for all zones.  But, if I\u0027m following the code right, this isn\u0027t ever called by the actual executor since it\u0027s only called if zone_filter is None and use_cache is True.\n\nI think we can simplify the zone setup here.  I\u0027ll propose a cahnge.","commit_id":"6b1c982d28ddc69360c1391f5aea99681f9f6656"},{"author":{"_account_id":1,"name":"James E. Blair","email":"jim@acmegating.com","username":"corvus"},"change_message_id":"876d3063a567656ec51ba3cca3620638f5121bde","unresolved":false,"context_lines":[{"line_number":143,"context_line":""},{"line_number":144,"context_line":"            # Return False to stop the datawatch as the build got deleted."},{"line_number":145,"context_line":"            return False"},{"line_number":146,"context_line":""},{"line_number":147,"context_line":"    def _watchBuildRequests(self, build_requests, event\u003dNone):"},{"line_number":148,"context_line":"        if event is None:"},{"line_number":149,"context_line":"            return"}],"source_content_type":"text/x-python","patch_set":20,"id":"a2bd5d74_b81c834a","line":146,"updated":"2021-04-24 00:11:00.000000000","message":"Why not use a treecache?\n\nAlso, do we really need a cache?  The scheduler and executor seem to be communicating by creating and deleting nodes, not by mutating data.  I\u0027m not sure there\u0027s actually a case where we\u0027re benefitting from the cache.  Maybe on the executor when receiving a new build request, but we could turn that into a queue where we submit the build uuid pretty easily (so the executor pops a uuid from the queue, tries to lock it, and if it locks it, reads the znode and proceeds).  The event callback already returns a new buildrequest along with the event.\n\nI think that leaves the lostBuildRequests method as the only thing that benefits from a cache?","commit_id":"6b1c982d28ddc69360c1391f5aea99681f9f6656"},{"author":{"_account_id":27952,"name":"Felix Edel","email":"felix.edel@bmw.de","username":"felix.schmidt"},"change_message_id":"9421189351c9e2e02d88fff97ae4ded2ac9d6287","unresolved":false,"context_lines":[{"line_number":143,"context_line":""},{"line_number":144,"context_line":"            # Return False to stop the datawatch as the build got deleted."},{"line_number":145,"context_line":"            return False"},{"line_number":146,"context_line":""},{"line_number":147,"context_line":"    def _watchBuildRequests(self, build_requests, event\u003dNone):"},{"line_number":148,"context_line":"        if event is None:"},{"line_number":149,"context_line":"            return"}],"source_content_type":"text/x-python","patch_set":20,"id":"749be23f_3d6abef6","line":146,"in_reply_to":"a2bd5d74_b81c834a","updated":"2021-04-27 08:23:22.000000000","message":"I don\u0027t fully understand that. I thought you were suggesting to use a cache for the builds API to not ask ZooKeeper directly for the builds, but only operate on the cached data structure.\n\nThinking about that know: With the current build result change, the scheduler doesn\u0027t need to process the BuildRequest when a build is completed because the BuildCompletedEvent now contains all information. But, there is also the \"future\" use case of calculating stats metrics like queued/running builds based on the ExecutorApi and for those it would make a big difference if we iterate over all builds in ZK directly or in a cache since we have to evaluate their state (queued/running/completed).\n\nAnother option might be: When each executor stores the number of running builds in its component, we could sum those values up in the scheduler when calculating the stats. For the queued builds we could do something like queued \u003d total - running, whereby total is the number of child nodes in ZK. Calling kazoo.get_children() shouldn\u0027t be too costly.","commit_id":"6b1c982d28ddc69360c1391f5aea99681f9f6656"},{"author":{"_account_id":27952,"name":"Felix Edel","email":"felix.edel@bmw.de","username":"felix.schmidt"},"change_message_id":"3f620a5158c19265c084222569b2ec942664f559","unresolved":true,"context_lines":[{"line_number":377,"context_line":"        )"},{"line_number":378,"context_line":""},{"line_number":379,"context_line":"    @staticmethod"},{"line_number":380,"context_line":"    def _bytesToDict(data):"},{"line_number":381,"context_line":"        return json.loads(data.decode(\"utf-8\"))"},{"line_number":382,"context_line":""},{"line_number":383,"context_line":"    @staticmethod"}],"source_content_type":"text/x-python","patch_set":24,"id":"f6b30d11_b9fe3a2b","line":380,"updated":"2021-05-18 06:55:11.000000000","message":"Side note: I tried to implement these helper methods directly in the ZooKeeperBase class so we can use them in all other ZooKeeper classes. I find it helpful to not have to deal with the encode/decode part every time. However, doing so resulted in some circular import issues (I think because of the json_dumps function being imported in zk/__init__.py). Maybe we could keep that in mind and solve this issue some time.","commit_id":"db381ae62d5b63717d6cf86c69239c76a9f277b9"},{"author":{"_account_id":4146,"name":"Clark Boylan","email":"cboylan@sapwetik.org","username":"cboylan"},"change_message_id":"fc4cd337e67a4a3bd2e2b79c3c15da43d62edcac","unresolved":true,"context_lines":[{"line_number":40,"context_line":""},{"line_number":41,"context_line":"    BUILD_REQUEST_ROOT \u003d \"/zuul/build-requests\""},{"line_number":42,"context_line":"    LOCK_ROOT \u003d \"/zuul/build-request-locks\""},{"line_number":43,"context_line":"    DEFAULT_ZONE \u003d \"default-zone\""},{"line_number":44,"context_line":""},{"line_number":45,"context_line":"    log \u003d logging.getLogger(\"zuul.zk.executor.ExecutorApi\")"},{"line_number":46,"context_line":""}],"source_content_type":"text/x-python","patch_set":35,"id":"76625ec8_cdf47eb3","line":43,"updated":"2021-06-29 20:52:21.000000000","message":"Do we need a docs update indicating that \"default-zone\" is an invalid zone name (eg users shouldn\u0027t use this name?) and/or validate this when reading configs?","commit_id":"8315109d22a6068b91e46dd0b9ba334889a9e570"},{"author":{"_account_id":1,"name":"James E. Blair","email":"jim@acmegating.com","username":"corvus"},"change_message_id":"d93c6a6d1a896fd062aafc1cacf1a71f9261809f","unresolved":false,"context_lines":[{"line_number":40,"context_line":""},{"line_number":41,"context_line":"    BUILD_REQUEST_ROOT \u003d \"/zuul/build-requests\""},{"line_number":42,"context_line":"    LOCK_ROOT \u003d \"/zuul/build-request-locks\""},{"line_number":43,"context_line":"    DEFAULT_ZONE \u003d \"default-zone\""},{"line_number":44,"context_line":""},{"line_number":45,"context_line":"    log \u003d logging.getLogger(\"zuul.zk.executor.ExecutorApi\")"},{"line_number":46,"context_line":""}],"source_content_type":"text/x-python","patch_set":35,"id":"ae64889c_062aeb4d","line":43,"updated":"2021-06-29 21:37:18.000000000","message":"This is updated in the next change, and that restriction is removed.","commit_id":"8315109d22a6068b91e46dd0b9ba334889a9e570"},{"author":{"_account_id":4146,"name":"Clark Boylan","email":"cboylan@sapwetik.org","username":"cboylan"},"change_message_id":"fc4cd337e67a4a3bd2e2b79c3c15da43d62edcac","unresolved":true,"context_lines":[{"line_number":83,"context_line":"            for zone in children:"},{"line_number":84,"context_line":"                self.registerZone(zone)"},{"line_number":85,"context_line":""},{"line_number":86,"context_line":"        self.kazoo_client.ChildrenWatch(self.BUILD_REQUEST_ROOT, watch_zones)"},{"line_number":87,"context_line":""},{"line_number":88,"context_line":"    def registerZone(self, zone):"},{"line_number":89,"context_line":"        self.log.debug(\"Registering zone %s\", zone)"}],"source_content_type":"text/x-python","patch_set":35,"id":"34ed8667_8708edf1","line":86,"updated":"2021-06-29 20:52:21.000000000","message":"If anyone else is wondering ChildrenWatch() calls the supplied function on all children when it is first registered. According to the kazoo docs anyway.","commit_id":"8315109d22a6068b91e46dd0b9ba334889a9e570"},{"author":{"_account_id":1,"name":"James E. Blair","email":"jim@acmegating.com","username":"corvus"},"change_message_id":"d93c6a6d1a896fd062aafc1cacf1a71f9261809f","unresolved":false,"context_lines":[{"line_number":83,"context_line":"            for zone in children:"},{"line_number":84,"context_line":"                self.registerZone(zone)"},{"line_number":85,"context_line":""},{"line_number":86,"context_line":"        self.kazoo_client.ChildrenWatch(self.BUILD_REQUEST_ROOT, watch_zones)"},{"line_number":87,"context_line":""},{"line_number":88,"context_line":"    def registerZone(self, zone):"},{"line_number":89,"context_line":"        self.log.debug(\"Registering zone %s\", zone)"}],"source_content_type":"text/x-python","patch_set":35,"id":"2551f72b_fda0e80d","line":86,"updated":"2021-06-29 21:37:18.000000000","message":"Yes, same for datawatch, and we rely on that more in a later change.\n\nIn the case of a datawatch, it avoids race conditions.","commit_id":"8315109d22a6068b91e46dd0b9ba334889a9e570"},{"author":{"_account_id":4146,"name":"Clark Boylan","email":"cboylan@sapwetik.org","username":"cboylan"},"change_message_id":"fc4cd337e67a4a3bd2e2b79c3c15da43d62edcac","unresolved":true,"context_lines":[{"line_number":293,"context_line":"            # As the build node might contain children (result, data, ...) we"},{"line_number":294,"context_line":"            # must delete it recursively."},{"line_number":295,"context_line":"            self.kazoo_client.delete(build_request.path, recursive\u003dTrue)"},{"line_number":296,"context_line":"        except NoNodeError:"},{"line_number":297,"context_line":"            # Nothing to do if the node is already deleted"},{"line_number":298,"context_line":"            pass"},{"line_number":299,"context_line":""}],"source_content_type":"text/x-python","patch_set":35,"id":"aaf2cba2_bced115f","line":296,"updated":"2021-06-29 20:52:21.000000000","message":"Is there any chance a recursive delete can raise this without fully deleting all of the nodes under it? For example if two deletions race each other and one cleans up a sub tree before the other?","commit_id":"8315109d22a6068b91e46dd0b9ba334889a9e570"},{"author":{"_account_id":1,"name":"James E. Blair","email":"jim@acmegating.com","username":"corvus"},"change_message_id":"d93c6a6d1a896fd062aafc1cacf1a71f9261809f","unresolved":false,"context_lines":[{"line_number":293,"context_line":"            # As the build node might contain children (result, data, ...) we"},{"line_number":294,"context_line":"            # must delete it recursively."},{"line_number":295,"context_line":"            self.kazoo_client.delete(build_request.path, recursive\u003dTrue)"},{"line_number":296,"context_line":"        except NoNodeError:"},{"line_number":297,"context_line":"            # Nothing to do if the node is already deleted"},{"line_number":298,"context_line":"            pass"},{"line_number":299,"context_line":""}],"source_content_type":"text/x-python","patch_set":35,"id":"8db41f21_0f62ff25","line":296,"updated":"2021-06-29 21:37:18.000000000","message":"The kazoo recursive delete method appears robust against that: https://github.com/python-zk/kazoo/blob/master/kazoo/client.py#L1451","commit_id":"8315109d22a6068b91e46dd0b9ba334889a9e570"},{"author":{"_account_id":4146,"name":"Clark Boylan","email":"cboylan@sapwetik.org","username":"cboylan"},"change_message_id":"fc4cd337e67a4a3bd2e2b79c3c15da43d62edcac","unresolved":true,"context_lines":[{"line_number":301,"context_line":"        if event is None:"},{"line_number":302,"context_line":"            return"},{"line_number":303,"context_line":""},{"line_number":304,"context_line":"        for action in actions:"},{"line_number":305,"context_line":"            build_event \u003d None"},{"line_number":306,"context_line":"            if action \u003d\u003d \"cancel\":"},{"line_number":307,"context_line":"                build_event \u003d BuildRequestEvent.CANCELED"}],"source_content_type":"text/x-python","patch_set":35,"id":"b16276eb_082fd9aa","line":304,"updated":"2021-06-29 20:52:21.000000000","message":"Is there any concern that calling the callback on a cancel event then a resume event in that order will be a problem for us? If so we should probably not iterate through this list but instead check for cancel in action then handle it else check for resume in action and handle that exclusively.","commit_id":"8315109d22a6068b91e46dd0b9ba334889a9e570"},{"author":{"_account_id":1,"name":"James E. Blair","email":"jim@acmegating.com","username":"corvus"},"change_message_id":"d93c6a6d1a896fd062aafc1cacf1a71f9261809f","unresolved":false,"context_lines":[{"line_number":301,"context_line":"        if event is None:"},{"line_number":302,"context_line":"            return"},{"line_number":303,"context_line":""},{"line_number":304,"context_line":"        for action in actions:"},{"line_number":305,"context_line":"            build_event \u003d None"},{"line_number":306,"context_line":"            if action \u003d\u003d \"cancel\":"},{"line_number":307,"context_line":"                build_event \u003d BuildRequestEvent.CANCELED"}],"source_content_type":"text/x-python","patch_set":35,"id":"d21731ab_f562a9e4","line":304,"updated":"2021-06-29 21:37:18.000000000","message":"The list of \"actions\" is really the list of children, so for that to be a problem, we would have had to set both the canceled and resumed flag at the same time.  That\u0027s possible.\n\nIt\u0027s not going to have an adverse effect on the executor (the cancel call will actually resume the build, and then the resume event will be a noop).  But it will cause the executor to update the status of the build request.  I don\u0027t really like that race condition, so I think we should implement your suggestion and try to avoid it.  I don\u0027t think it\u0027s a critical flaw though, so I\u0027ll tack it onto the end of the stack.","commit_id":"8315109d22a6068b91e46dd0b9ba334889a9e570"},{"author":{"_account_id":1,"name":"James E. Blair","email":"jim@acmegating.com","username":"corvus"},"change_message_id":"d93c6a6d1a896fd062aafc1cacf1a71f9261809f","unresolved":false,"context_lines":[{"line_number":330,"context_line":"        except NoNodeError:"},{"line_number":331,"context_line":"            have_lock \u003d False"},{"line_number":332,"context_line":"            self.log.error("},{"line_number":333,"context_line":"                \"Build not found for locking: %s\", build_request.uuid"},{"line_number":334,"context_line":"            )"},{"line_number":335,"context_line":""},{"line_number":336,"context_line":"        # If we aren\u0027t blocking, it\u0027s possible we didn\u0027t get the lock"}],"source_content_type":"text/x-python","patch_set":35,"id":"7583a338_5589e545","line":333,"updated":"2021-06-29 21:37:18.000000000","message":"Operators should never need to know a zookeeper path, so I\u0027d like to avoid suggesting they do by putting ZK node paths in logs.  Developers yes, but it\u0027s a predictable path.","commit_id":"8315109d22a6068b91e46dd0b9ba334889a9e570"},{"author":{"_account_id":4146,"name":"Clark Boylan","email":"cboylan@sapwetik.org","username":"cboylan"},"change_message_id":"fc4cd337e67a4a3bd2e2b79c3c15da43d62edcac","unresolved":true,"context_lines":[{"line_number":330,"context_line":"        except NoNodeError:"},{"line_number":331,"context_line":"            have_lock \u003d False"},{"line_number":332,"context_line":"            self.log.error("},{"line_number":333,"context_line":"                \"Build not found for locking: %s\", build_request.uuid"},{"line_number":334,"context_line":"            )"},{"line_number":335,"context_line":""},{"line_number":336,"context_line":"        # If we aren\u0027t blocking, it\u0027s possible we didn\u0027t get the lock"}],"source_content_type":"text/x-python","patch_set":35,"id":"2333c667_6782044c","line":333,"updated":"2021-06-29 20:52:21.000000000","message":"We should consider logging the entire lock path rather than just the uuid to aid operators in debugging no node found errors here.","commit_id":"8315109d22a6068b91e46dd0b9ba334889a9e570"},{"author":{"_account_id":1,"name":"James E. Blair","email":"jim@acmegating.com","username":"corvus"},"change_message_id":"d93c6a6d1a896fd062aafc1cacf1a71f9261809f","unresolved":false,"context_lines":[{"line_number":365,"context_line":"            is_locked \u003d False"},{"line_number":366,"context_line":"            self.log.error("},{"line_number":367,"context_line":"                \"BuildReqeust not found to check lock: %s\", build_request.uuid"},{"line_number":368,"context_line":"            )"},{"line_number":369,"context_line":""},{"line_number":370,"context_line":"        return is_locked"},{"line_number":371,"context_line":""}],"source_content_type":"text/x-python","patch_set":35,"id":"74682dcb_066e1112","line":368,"updated":"2021-06-29 21:37:18.000000000","message":"I think this is a misleading message.  In fact, I do not know what would cause this error to hit; probably something pretty egregious that we can\u0027t actually handle.  I think we should remove the exception handler.\n\nRegardless, it is okay and expected for a build to exist without a lock existing, but no error is produced in that case (the kazoo Lock() will create the lock node for the uuid; it will even create the lock root).","commit_id":"8315109d22a6068b91e46dd0b9ba334889a9e570"},{"author":{"_account_id":4146,"name":"Clark Boylan","email":"cboylan@sapwetik.org","username":"cboylan"},"change_message_id":"fc4cd337e67a4a3bd2e2b79c3c15da43d62edcac","unresolved":true,"context_lines":[{"line_number":365,"context_line":"            is_locked \u003d False"},{"line_number":366,"context_line":"            self.log.error("},{"line_number":367,"context_line":"                \"BuildReqeust not found to check lock: %s\", build_request.uuid"},{"line_number":368,"context_line":"            )"},{"line_number":369,"context_line":""},{"line_number":370,"context_line":"        return is_locked"},{"line_number":371,"context_line":""}],"source_content_type":"text/x-python","patch_set":35,"id":"5a34b3ca_e0bb9974","line":368,"updated":"2021-06-29 20:52:21.000000000","message":"Is it possible for the build request to exist but the lock to not exist? In that case you could hit this path and log an error when that doesn\u0027t necessarily indicate an error (instead it would just indicate that the lock has never been held).","commit_id":"8315109d22a6068b91e46dd0b9ba334889a9e570"},{"author":{"_account_id":27952,"name":"Felix Edel","email":"felix.edel@bmw.de","username":"felix.schmidt"},"change_message_id":"c28405ad3ec8658cb6cd7774111df76e711afaba","unresolved":false,"context_lines":[{"line_number":365,"context_line":"            is_locked \u003d False"},{"line_number":366,"context_line":"            self.log.error("},{"line_number":367,"context_line":"                \"BuildReqeust not found to check lock: %s\", build_request.uuid"},{"line_number":368,"context_line":"            )"},{"line_number":369,"context_line":""},{"line_number":370,"context_line":"        return is_locked"},{"line_number":371,"context_line":""}],"source_content_type":"text/x-python","patch_set":35,"id":"606f40bd_825104a1","line":368,"in_reply_to":"74682dcb_066e1112","updated":"2021-06-30 06:22:18.000000000","message":"I think, I copied that logic over from the nodepool implementation https://opendev.org/zuul/nodepool/src/branch/master/nodepool/zk.py#L1898. But that was before we put the lock in a distinct ZK path, so it might not be necessary anymore.","commit_id":"8315109d22a6068b91e46dd0b9ba334889a9e570"},{"author":{"_account_id":4146,"name":"Clark Boylan","email":"cboylan@sapwetik.org","username":"cboylan"},"change_message_id":"fc4cd337e67a4a3bd2e2b79c3c15da43d62edcac","unresolved":true,"context_lines":[{"line_number":370,"context_line":"        return is_locked"},{"line_number":371,"context_line":""},{"line_number":372,"context_line":"    def lostBuildRequests(self):"},{"line_number":373,"context_line":"        # Get a list of builds which are running but not locked by any executor"},{"line_number":374,"context_line":"        yield from filter("},{"line_number":375,"context_line":"            lambda b: not self.isLocked(b),"},{"line_number":376,"context_line":"            self.inState(BuildRequestState.RUNNING, BuildRequestState.PAUSED),"}],"source_content_type":"text/x-python","patch_set":35,"id":"2cdbc143_b78dab25","line":373,"updated":"2021-06-29 20:52:21.000000000","message":"See above comment.","commit_id":"8315109d22a6068b91e46dd0b9ba334889a9e570"}]}
