)]}'
{"/PATCHSET_LEVEL":[{"author":{"_account_id":38562,"name":"Richard Cruise","email":"rcruise@redhat.com","username":"rcruise"},"change_message_id":"e0441b3d0263f685701f618b6497e41a6fe41ada","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"e5000de7_eb6581cb","updated":"2026-04-23 09:50:47.000000000","message":"2 ideas for improvement in the comments. Also I\u0027m giving -1 for now because there\u0027s no unit tests added for the new functions. Review assisted by Claude Sonnet 4.6","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"},{"author":{"_account_id":38360,"name":"Zachary Mark Raines","display_name":"Zachary Raines","email":"zachary.raines@canonical.com","username":"raineszm","status":"Sustaining Engineer @ Canonical"},"change_message_id":"2cc07754a654e5591cd2ab70a61870f8e89bb636","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"36f84d8a_ef43e4d7","updated":"2026-04-23 20:50:18.000000000","message":"Thanks for the quick turn around on updating this! \n\nI\u0027m wondering whether it is feasible and/or worth while to store some auxiliary information in the Healthmanager itself of a cache between ticks, such as the wait times, but using the updates in the table as a proxy for retries is a neat trick.\n\nReview Assisted by Claude Sonnet 4.6","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"},{"author":{"_account_id":12404,"name":"Rico Lin","email":"ricolin@ricolky.com","username":"rico.lin"},"change_message_id":"9dc9b93d21261f35feac8e06401a084fee326fee","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"b4878b71_12764488","updated":"2026-04-23 04:04:21.000000000","message":"This is the version without db change","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"},{"author":{"_account_id":12404,"name":"Rico Lin","email":"ricolin@ricolky.com","username":"rico.lin"},"change_message_id":"93792019062fa94302a1d745d322bd679083d9b7","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"c5a53f54_b773b6c1","updated":"2026-04-23 07:01:25.000000000","message":"test report\n```\nTest Report — failover-on-error (no-DB) patch\n\nEnvironment\n\n┌───────────────────────┬─────────────────────────────────────────────────────────────────────────────────────────────────────┐\n│ Item                  │ Value                                                                                               │\n├───────────────────────┼─────────────────────────────────────────────────────────────────────────────────────────────────────┤\n│ Feature branch        │ rlin-retry-on-error-no-db @ 8ab34cd2c (derived from openstack/octavia Gerrit 934638, DB-migration   │\n│                       │ dropped)                                                                                            │\n├───────────────────────┼─────────────────────────────────────────────────────────────────────────────────────────────────────┤\n│ DevStack host         │ Ubuntu 22.04, single-node AIO                                                                       │\n├───────────────────────┼─────────────────────────────────────────────────────────────────────────────────────────────────────┤\n│ OpenStack release     │ 2026.2 (master), nova/neutron/octavia from DevStack master                                          │\n├───────────────────────┼─────────────────────────────────────────────────────────────────────────────────────────────────────┤\n│ Networking            │ Neutron ML2 + OVS, project_network_types\u003dvxlan                                                      │\n├───────────────────────┼─────────────────────────────────────────────────────────────────────────────────────────────────────┤\n│ Octavia source        │ /opt/stack/octavia (patched in-place via git apply)                                                 │\n├───────────────────────┼─────────────────────────────────────────────────────────────────────────────────────────────────────┤\n│ Octavia venv          │ /opt/stack/data/venv (python 3.10)                                                                  │\n├───────────────────────┼─────────────────────────────────────────────────────────────────────────────────────────────────────┤\n│ DB                    │ MariaDB octavia schema — no alembic migration added                                                 │\n├───────────────────────┼─────────────────────────────────────────────────────────────────────────────────────────────────────┤\n│ Test LB               │ SINGLE topology, IPv4 private-subnet                                                                │\n├───────────────────────┼─────────────────────────────────────────────────────────────────────────────────────────────────────┤\n│ Config under          │ failover_on_error\u003dTrue, failover_on_error_max_retries\u003d3, failover_on_error_retry_max_interval\u003d600,  │\n│ [health_manager]      │ failover_on_error_retry_multiplier\u003d60                                                               │\n└───────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────┘\n\nFunctions tested\n\n┌───┬───────────────────────────────────────────────┬──────────────────────┬──────────────────────────────────────────────────┐\n│ # │ Code path                                     │ Scenario             │ Result                                           │\n├───┼───────────────────────────────────────────────┼──────────────────────┼──────────────────────────────────────────────────┤\n│ 1 │ AmphoraHealthRepository.get_stale_amphora()   │ Force amp            │ ✅ HM picked up: Stale amphora\u0027s id is: \u003camp\u003e    │\n│   │ now includes ERROR amps                       │ status\u003dERROR + stale │                                                  │\n│   │                                               │ last_update          │                                                  │\n├───┼───────────────────────────────────────────────┼──────────────────────┼──────────────────────────────────────────────────┤\n│ 2 │ AmphoraRepository.get_error_retry_count()     │ 1 / 3 / 4            │ ✅ Returned 1, 3, 4 matching synthesized history │\n│   │ (history-row derivation)                      │ consecutive          │                                                  │\n│   │                                               │ ERROR-like rows      │                                                  │\n│   │                                               │ within window        │                                                  │\n├───┼───────────────────────────────────────────────┼──────────────────────┼──────────────────────────────────────────────────┤\n│ 3 │ ControllerWorker._error_retry_gate_passed() — │ retry\u003d1 and retry\u003d3  │ ✅ Proceeding with failover of ERROR amphora ... │\n│   │ proceed                                       │ (within limit)       │ (retry N) → new amp ALLOCATED, old amp DELETED   │\n├───┼───────────────────────────────────────────────┼──────────────────────┼──────────────────────────────────────────────────┤\n│ 4 │ _error_retry_gate_passed() — backoff defer    │ retry\u003d3, elapsed \u003c   │ ✅ Deferring failover of ERROR amphora ...:      │\n│   │                                               │ required_wait        │ retry 3, waited 3.2s of required 170.7s (cap     │\n│   │                                               │                      │ 240.0s)                                          │\n├───┼───────────────────────────────────────────────┼──────────────────────┼──────────────────────────────────────────────────┤\n│ 5 │ _error_retry_gate_passed() — retry-limit      │ retry\u003d4 \u003e            │ ✅ Amphora ... has reached the failover_on_error │\n│   │ exceeded                                      │ max_retries\u003d3        │ retry limit (3). Skipping automatic failover; a  │\n│   │                                               │                      │ manual failover is required.                     │\n├───┼───────────────────────────────────────────────┼──────────────────────┼──────────────────────────────────────────────────┤\n│ 6 │ HealthManager.health_check() calls            │ End-to-end HM loop   │ ✅ Feature only activates when called from HM    │\n│   │ failover_amphora(count_error_retries\u003dTrue)    │                      │ (CLI-driven failover unaffected)                 │\n├───┼───────────────────────────────────────────────┼──────────────────────┼──────────────────────────────────────────────────┤\n│ 7 │ _release_failover_prov_status() on defer      │ LB released back to  │ ✅ LB re-picked next tick and eventually failed  │\n│   │                                               │ ACTIVE when backoff  │ over                                             │\n│   │                                               │ defers               │                                                  │\n├───┼───────────────────────────────────────────────┼──────────────────────┼──────────────────────────────────────────────────┤\n│ 8 │ _release_failover_prov_status() on            │ LB transitioned to   │ ✅ load_balancer.provisioning_status \u003d ERROR     │\n│   │ limit-exceeded                                │ ERROR (operator      │ observed                                         │\n│   │                                               │ intervention needed) │                                                  │\n└───┴───────────────────────────────────────────────┴──────────────────────┴──────────────────────────────────────────────────┘\n\nSummary\n\nAll four decision branches of the gate — proceed / proceed-at-limit / defer / refuse — were reproduced live on DevStack with\nmatching log and DB state transitions. One defect was found during testing (LB stuck in PENDING_UPDATE when the gate bailed\nout) and fixed inside the same commit. No schema migration is required; retry counting works purely off existing amphora\nhistory rows.\n```","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"}],"octavia/common/config.py":[{"author":{"_account_id":38360,"name":"Zachary Mark Raines","display_name":"Zachary Raines","email":"zachary.raines@canonical.com","username":"raineszm","status":"Sustaining Engineer @ Canonical"},"change_message_id":"2cc07754a654e5591cd2ab70a61870f8e89bb636","unresolved":true,"context_lines":[{"line_number":312,"context_line":"    cfg.BoolOpt(\u0027failover_on_error\u0027, default\u003dFalse,"},{"line_number":313,"context_line":"                help\u003d_(\u0027Set this to True to allow automatic failover when \u0027"},{"line_number":314,"context_line":"                       \u0027an amphora is in ERROR status. Beware that \u0027"},{"line_number":315,"context_line":"                       \u0027performing failover when in ERROR status might not \u0027"},{"line_number":316,"context_line":"                       \u0027resolve the ERROR for the amphora. \u0027"},{"line_number":317,"context_line":"                       \u0027So use this option with caution.\u0027)),"},{"line_number":318,"context_line":"    cfg.IntOpt(\u0027failover_on_error_max_retries\u0027,"}],"source_content_type":"text/x-python","patch_set":1,"id":"4f57c3d9_a2fd204a","line":315,"range":{"start_line":315,"start_character":44,"end_line":315,"end_character":64},"updated":"2026-04-23 20:50:18.000000000","message":"can drop this clause","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"},{"author":{"_account_id":38360,"name":"Zachary Mark Raines","display_name":"Zachary Raines","email":"zachary.raines@canonical.com","username":"raineszm","status":"Sustaining Engineer @ Canonical"},"change_message_id":"2cc07754a654e5591cd2ab70a61870f8e89bb636","unresolved":true,"context_lines":[{"line_number":320,"context_line":"               min\u003d-1,"},{"line_number":321,"context_line":"               help\u003d_(\u0027The maximum number of times to automatically failover \u0027"},{"line_number":322,"context_line":"                      \u0027an amphora in an ERROR state. \u0027"},{"line_number":323,"context_line":"                      \u0027Set to \"-1\" to remove the threshold. The retry count \u0027"},{"line_number":324,"context_line":"                      \u0027is derived from amphora history rows (no database \u0027"},{"line_number":325,"context_line":"                      \u0027column is added).\u0027)),"},{"line_number":326,"context_line":"    cfg.IntOpt(\u0027failover_on_error_retry_max_interval\u0027,"},{"line_number":327,"context_line":"               default\u003d60,"},{"line_number":328,"context_line":"               min\u003d0,"}],"source_content_type":"text/x-python","patch_set":1,"id":"dc208d24_18468eae","line":325,"range":{"start_line":323,"start_character":60,"end_line":325,"end_character":41},"updated":"2026-04-23 20:50:18.000000000","message":"Probably not necessary in the help text.","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"},{"author":{"_account_id":38360,"name":"Zachary Mark Raines","display_name":"Zachary Raines","email":"zachary.raines@canonical.com","username":"raineszm","status":"Sustaining Engineer @ Canonical"},"change_message_id":"2cc07754a654e5591cd2ab70a61870f8e89bb636","unresolved":true,"context_lines":[{"line_number":329,"context_line":"               help\u003d_(\u0027Maximum delay in seconds between automatic failover \u0027"},{"line_number":330,"context_line":"                      \u0027attempts for an amphora in ERROR state. Acts as the \u0027"},{"line_number":331,"context_line":"                      \u0027upper bound on the exponentially increasing wait \u0027"},{"line_number":332,"context_line":"                      \u0027between retries, following the \u0027"},{"line_number":333,"context_line":"                      \u0027wait_random_exponential strategy: the actual delay \u0027"},{"line_number":334,"context_line":"                      \u0027before retry N (1-indexed) is a uniform random \u0027"},{"line_number":335,"context_line":"                      \u0027value in [0, min(failover_on_error_retry_max_interval\u0027"},{"line_number":336,"context_line":"                      \u0027, failover_on_error_retry_multiplier * \u0027"},{"line_number":337,"context_line":"                      \u00272 ** (N - 1))]. Set to 0 to disable the delay.\u0027)),"},{"line_number":338,"context_line":"    cfg.FloatOpt(\u0027failover_on_error_retry_multiplier\u0027,"},{"line_number":339,"context_line":"                 default\u003d1.0,"},{"line_number":340,"context_line":"                 min\u003d0.0,"}],"source_content_type":"text/x-python","patch_set":1,"id":"f37ef18d_dec8ca87","line":337,"range":{"start_line":332,"start_character":38,"end_line":337,"end_character":70},"updated":"2026-04-23 20:50:18.000000000","message":"```suggestion\n                      \u0027The actual delay \u0027\n                      \u0027before the retry Nth retry is a uniform random \u0027\n                      \u0027value in [0, min(failover_on_error_retry_max_interval\u0027\n                      \u0027, failover_on_error_retry_multiplier * \u0027\n                      \u00272 ** (N - 1))]. Set to 0 to disable the delay.\u0027)),\n```","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"},{"author":{"_account_id":38360,"name":"Zachary Mark Raines","display_name":"Zachary Raines","email":"zachary.raines@canonical.com","username":"raineszm","status":"Sustaining Engineer @ Canonical"},"change_message_id":"2cc07754a654e5591cd2ab70a61870f8e89bb636","unresolved":true,"context_lines":[{"line_number":340,"context_line":"                 min\u003d0.0,"},{"line_number":341,"context_line":"                 help\u003d_(\u0027Multiplier applied to the exponential term in the \u0027"},{"line_number":342,"context_line":"                        \u0027wait_random_exponential strategy used between \u0027"},{"line_number":343,"context_line":"                        \u0027automatic failover retries of an amphora in ERROR \u0027"},{"line_number":344,"context_line":"                        \u0027state. The actual delay before retry N (1-indexed) \u0027"},{"line_number":345,"context_line":"                        \u0027is a uniform random value in \u0027"},{"line_number":346,"context_line":"                        \u0027[0, min(failover_on_error_retry_max_interval, \u0027"},{"line_number":347,"context_line":"                        \u0027failover_on_error_retry_multiplier * \u0027"},{"line_number":348,"context_line":"                        \u00272 ** (N - 1))].\u0027)),"},{"line_number":349,"context_line":"]"},{"line_number":350,"context_line":""},{"line_number":351,"context_line":"oslo_messaging_opts \u003d ["},{"line_number":352,"context_line":"    cfg.StrOpt(\u0027topic\u0027, help\u003d_(\u0027Topic (i.e. Queue) Name\u0027)),"}],"source_content_type":"text/x-python","patch_set":1,"id":"53d4ea32_7c078977","line":349,"range":{"start_line":343,"start_character":25,"end_line":349,"end_character":1},"updated":"2026-04-23 20:50:18.000000000","message":"this can be the same as above","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"}],"octavia/controller/healthmanager/health_manager.py":[{"author":{"_account_id":38360,"name":"Zachary Mark Raines","display_name":"Zachary Raines","email":"zachary.raines@canonical.com","username":"raineszm","status":"Sustaining Engineer @ Canonical"},"change_message_id":"2cc07754a654e5591cd2ab70a61870f8e89bb636","unresolved":true,"context_lines":[{"line_number":138,"context_line":""},{"line_number":139,"context_line":"            LOG.info(\"Stale amphora\u0027s id is: %s\", amp_health.amphora_id)"},{"line_number":140,"context_line":"            fut \u003d self.executor.submit("},{"line_number":141,"context_line":"                self.cw.failover_amphora, amp_health.amphora_id, reraise\u003dTrue,"},{"line_number":142,"context_line":"                count_error_retries\u003dTrue)"},{"line_number":143,"context_line":"            fut.add_done_callback("},{"line_number":144,"context_line":"                functools.partial(update_stats_on_done, stats)"}],"source_content_type":"text/x-python","patch_set":1,"id":"42c096a8_c8315b61","line":141,"updated":"2026-04-23 20:50:18.000000000","message":"See my comment in controller_worker.py about changing this argument.","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"}],"octavia/controller/worker/v2/controller_worker.py":[{"author":{"_account_id":38360,"name":"Zachary Mark Raines","display_name":"Zachary Raines","email":"zachary.raines@canonical.com","username":"raineszm","status":"Sustaining Engineer @ Canonical"},"change_message_id":"2cc07754a654e5591cd2ab70a61870f8e89bb636","unresolved":true,"context_lines":[{"line_number":17,"context_line":"from oslo_config import cfg"},{"line_number":18,"context_line":"from oslo_log import log as logging"},{"line_number":19,"context_line":"from oslo_utils import excutils"},{"line_number":20,"context_line":"import datetime"},{"line_number":21,"context_line":"import random"},{"line_number":22,"context_line":"from sqlalchemy.orm import exc as db_exceptions"},{"line_number":23,"context_line":"from stevedore import driver as stevedore_driver"}],"source_content_type":"text/x-python","patch_set":1,"id":"9137046a_8e6bdd0d","line":20,"in_reply_to":"35da4f0a_8930ce6a","updated":"2026-04-23 20:50:18.000000000","message":"\u003e pep8: H306: imports not in alphabetical order (oslo_utils.excutils, datetime)\n\nPlease fix.","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"},{"author":{"_account_id":38360,"name":"Zachary Mark Raines","display_name":"Zachary Raines","email":"zachary.raines@canonical.com","username":"raineszm","status":"Sustaining Engineer @ Canonical"},"change_message_id":"2cc07754a654e5591cd2ab70a61870f8e89bb636","unresolved":true,"context_lines":[{"line_number":997,"context_line":"                  the retry should be deferred or the retry limit was hit."},{"line_number":998,"context_line":"        \"\"\""},{"line_number":999,"context_line":"        limit \u003d CONF.health_manager.failover_on_error_max_retries"},{"line_number":1000,"context_line":"        max_interval \u003d ("},{"line_number":1001,"context_line":"            CONF.health_manager.failover_on_error_retry_max_interval)"},{"line_number":1002,"context_line":"        multiplier \u003d CONF.health_manager.failover_on_error_retry_multiplier"},{"line_number":1003,"context_line":""},{"line_number":1004,"context_line":"        with session.begin():"}],"source_content_type":"text/x-python","patch_set":1,"id":"7ebac886_7b782470","line":1001,"range":{"start_line":1000,"start_character":23,"end_line":1001,"end_character":69},"updated":"2026-04-23 20:50:18.000000000","message":"Don\u0027t need these parentheses","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"},{"author":{"_account_id":38360,"name":"Zachary Mark Raines","display_name":"Zachary Raines","email":"zachary.raines@canonical.com","username":"raineszm","status":"Sustaining Engineer @ Canonical"},"change_message_id":"2cc07754a654e5591cd2ab70a61870f8e89bb636","unresolved":true,"context_lines":[{"line_number":1008,"context_line":"        # Max retries check (limit \u003d\u003d -1 disables the ceiling)."},{"line_number":1009,"context_line":"        if limit !\u003d -1 and retry_n \u003e limit:"},{"line_number":1010,"context_line":"            LOG.warning("},{"line_number":1011,"context_line":"                \u0027Amphora %(id)s has reached the failover_on_error retry \u0027"},{"line_number":1012,"context_line":"                \u0027limit (%(limit)s). Skipping automatic failover; a manual \u0027"},{"line_number":1013,"context_line":"                \u0027failover is required.\u0027,"},{"line_number":1014,"context_line":"                {\u0027id\u0027: amphora.id, \u0027limit\u0027: limit})"},{"line_number":1015,"context_line":"            # Keep amp_health.busy set so we stop polling this amp; the"}],"source_content_type":"text/x-python","patch_set":1,"id":"13b6a0d3_760f9b25","line":1012,"range":{"start_line":1011,"start_character":37,"end_line":1012,"end_character":23},"updated":"2026-04-23 20:50:18.000000000","message":"We should mention `CONF.health_manager.failover_on_error_max_retries`","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"},{"author":{"_account_id":38360,"name":"Zachary Mark Raines","display_name":"Zachary Raines","email":"zachary.raines@canonical.com","username":"raineszm","status":"Sustaining Engineer @ Canonical"},"change_message_id":"2cc07754a654e5591cd2ab70a61870f8e89bb636","unresolved":true,"context_lines":[{"line_number":1019,"context_line":"        # wait_random_exponential gate: actual wait before retry N is a"},{"line_number":1020,"context_line":"        # uniform random value in"},{"line_number":1021,"context_line":"        # [0, min(max_interval, multiplier * 2 ** (N - 1))]."},{"line_number":1022,"context_line":"        if max_interval \u003e 0 and retry_n \u003e\u003d 1:"},{"line_number":1023,"context_line":"            exp_cap \u003d min(max_interval, multiplier * (2 ** (retry_n - 1)))"},{"line_number":1024,"context_line":"            required_wait \u003d random.uniform(0, exp_cap)"},{"line_number":1025,"context_line":"            last_attempt \u003d amphora.updated_at or amphora.created_at"}],"source_content_type":"text/x-python","patch_set":1,"id":"e68e9675_094914a1","line":1022,"range":{"start_line":1022,"start_character":27,"end_line":1022,"end_character":44},"updated":"2026-04-23 20:50:18.000000000","message":"why are we checking if `retry_n \u003e\u003d 1`?","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"},{"author":{"_account_id":38562,"name":"Richard Cruise","email":"rcruise@redhat.com","username":"rcruise"},"change_message_id":"e0441b3d0263f685701f618b6497e41a6fe41ada","unresolved":true,"context_lines":[{"line_number":1020,"context_line":"        # uniform random value in"},{"line_number":1021,"context_line":"        # [0, min(max_interval, multiplier * 2 ** (N - 1))]."},{"line_number":1022,"context_line":"        if max_interval \u003e 0 and retry_n \u003e\u003d 1:"},{"line_number":1023,"context_line":"            exp_cap \u003d min(max_interval, multiplier * (2 ** (retry_n - 1)))"},{"line_number":1024,"context_line":"            required_wait \u003d random.uniform(0, exp_cap)"},{"line_number":1025,"context_line":"            last_attempt \u003d amphora.updated_at or amphora.created_at"},{"line_number":1026,"context_line":"            if last_attempt is not None:"}],"source_content_type":"text/x-python","patch_set":1,"id":"e0e41d96_62a9f137","line":1023,"updated":"2026-04-23 09:50:47.000000000","message":"The first time this runs it\u0027ll trigger a failover after 1s. That\u0027s probably a little bit harsh for some operators\n\nA simple fix would be to add 3 to retry_n instead of subtracting 1, so the initial calculation would be 2 ** 3 \u003d 8s\n\nA more robust fix might be to have a configurable minimum interval with a default of 10s\n\nMaybe something like this \n\n\n```suggestion\n            exp_cap \u003d min(max_interval, max(min_interval, multiplier * (2 ** (retry_n - 1))))\n```\n\nSetting min_interval to 0 would preserve the original behaviour and do the very quick failover on the first cycle","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"},{"author":{"_account_id":38360,"name":"Zachary Mark Raines","display_name":"Zachary Raines","email":"zachary.raines@canonical.com","username":"raineszm","status":"Sustaining Engineer @ Canonical"},"change_message_id":"2cc07754a654e5591cd2ab70a61870f8e89bb636","unresolved":true,"context_lines":[{"line_number":1020,"context_line":"        # uniform random value in"},{"line_number":1021,"context_line":"        # [0, min(max_interval, multiplier * 2 ** (N - 1))]."},{"line_number":1022,"context_line":"        if max_interval \u003e 0 and retry_n \u003e\u003d 1:"},{"line_number":1023,"context_line":"            exp_cap \u003d min(max_interval, multiplier * (2 ** (retry_n - 1)))"},{"line_number":1024,"context_line":"            required_wait \u003d random.uniform(0, exp_cap)"},{"line_number":1025,"context_line":"            last_attempt \u003d amphora.updated_at or amphora.created_at"},{"line_number":1026,"context_line":"            if last_attempt is not None:"}],"source_content_type":"text/x-python","patch_set":1,"id":"4f3e17fc_4df5fdf6","line":1023,"range":{"start_line":1023,"start_character":21,"end_line":1023,"end_character":73},"updated":"2026-04-23 20:50:18.000000000","message":"We\u0027re redrawing from the uniform random on every tick which has a different wait distribution than drawing a wait time from uniform random and then waiting that long. What is implemented in the code here will tend to retry much more often than the single draw uniform random case. I\u0027m not sure how important that is but it should be kept in mind.","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"},{"author":{"_account_id":38360,"name":"Zachary Mark Raines","display_name":"Zachary Raines","email":"zachary.raines@canonical.com","username":"raineszm","status":"Sustaining Engineer @ Canonical"},"change_message_id":"2cc07754a654e5591cd2ab70a61870f8e89bb636","unresolved":true,"context_lines":[{"line_number":1020,"context_line":"        # uniform random value in"},{"line_number":1021,"context_line":"        # [0, min(max_interval, multiplier * 2 ** (N - 1))]."},{"line_number":1022,"context_line":"        if max_interval \u003e 0 and retry_n \u003e\u003d 1:"},{"line_number":1023,"context_line":"            exp_cap \u003d min(max_interval, multiplier * (2 ** (retry_n - 1)))"},{"line_number":1024,"context_line":"            required_wait \u003d random.uniform(0, exp_cap)"},{"line_number":1025,"context_line":"            last_attempt \u003d amphora.updated_at or amphora.created_at"},{"line_number":1026,"context_line":"            if last_attempt is not None:"}],"source_content_type":"text/x-python","patch_set":1,"id":"bee4bcb3_756851b6","line":1023,"in_reply_to":"e0e41d96_62a9f137","updated":"2026-04-23 20:50:18.000000000","message":"Having a configurable minimum also helps with the fact that *any* retry could be 0s since we\u0027re drawing from a uniform random.","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"},{"author":{"_account_id":38360,"name":"Zachary Mark Raines","display_name":"Zachary Raines","email":"zachary.raines@canonical.com","username":"raineszm","status":"Sustaining Engineer @ Canonical"},"change_message_id":"2cc07754a654e5591cd2ab70a61870f8e89bb636","unresolved":true,"context_lines":[{"line_number":1023,"context_line":"            exp_cap \u003d min(max_interval, multiplier * (2 ** (retry_n - 1)))"},{"line_number":1024,"context_line":"            required_wait \u003d random.uniform(0, exp_cap)"},{"line_number":1025,"context_line":"            last_attempt \u003d amphora.updated_at or amphora.created_at"},{"line_number":1026,"context_line":"            if last_attempt is not None:"},{"line_number":1027,"context_line":"                elapsed \u003d (datetime.datetime.utcnow() -"},{"line_number":1028,"context_line":"                           last_attempt).total_seconds()"},{"line_number":1029,"context_line":"                if elapsed \u003c required_wait:"}],"source_content_type":"text/x-python","patch_set":1,"id":"0ffa23f4_5d72c81b","line":1026,"range":{"start_line":1026,"start_character":11,"end_line":1026,"end_character":40},"updated":"2026-04-23 20:50:18.000000000","message":"if last_attempt is None, it means amphora.created_at is None, but created_at is set to utcnow when the entry is created.","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"},{"author":{"_account_id":38360,"name":"Zachary Mark Raines","display_name":"Zachary Raines","email":"zachary.raines@canonical.com","username":"raineszm","status":"Sustaining Engineer @ Canonical"},"change_message_id":"2cc07754a654e5591cd2ab70a61870f8e89bb636","unresolved":true,"context_lines":[{"line_number":1024,"context_line":"            required_wait \u003d random.uniform(0, exp_cap)"},{"line_number":1025,"context_line":"            last_attempt \u003d amphora.updated_at or amphora.created_at"},{"line_number":1026,"context_line":"            if last_attempt is not None:"},{"line_number":1027,"context_line":"                elapsed \u003d (datetime.datetime.utcnow() -"},{"line_number":1028,"context_line":"                           last_attempt).total_seconds()"},{"line_number":1029,"context_line":"                if elapsed \u003c required_wait:"},{"line_number":1030,"context_line":"                    LOG.debug("}],"source_content_type":"text/x-python","patch_set":1,"id":"7e15eb0d_003ae6a1","line":1027,"range":{"start_line":1027,"start_character":45,"end_line":1027,"end_character":53},"updated":"2026-04-23 20:50:18.000000000","message":"Should use `datetime.datetime.now(datetime.timezone.utc)` or `oslo_utils.timeutils.utcnow` since `utcnow` is deprecated.","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"},{"author":{"_account_id":38360,"name":"Zachary Mark Raines","display_name":"Zachary Raines","email":"zachary.raines@canonical.com","username":"raineszm","status":"Sustaining Engineer @ Canonical"},"change_message_id":"2cc07754a654e5591cd2ab70a61870f8e89bb636","unresolved":true,"context_lines":[{"line_number":1034,"context_line":"                        {\u0027id\u0027: amphora.id, \u0027n\u0027: retry_n,"},{"line_number":1035,"context_line":"                         \u0027el\u0027: elapsed, \u0027req\u0027: required_wait,"},{"line_number":1036,"context_line":"                         \u0027cap\u0027: exp_cap})"},{"line_number":1037,"context_line":"                    # Release the health-manager lock so another tick can"},{"line_number":1038,"context_line":"                    # re-pick this amphora once the backoff elapses."},{"line_number":1039,"context_line":"                    with session.begin():"},{"line_number":1040,"context_line":"                        amp_health \u003d self._amphora_health_repo.get("},{"line_number":1041,"context_line":"                            session, amphora_id\u003damphora.id)"},{"line_number":1042,"context_line":"                        if amp_health is not None:"},{"line_number":1043,"context_line":"                            self._amphora_health_repo.update("},{"line_number":1044,"context_line":"                                session, amphora.id, busy\u003dFalse)"},{"line_number":1045,"context_line":"                    return False"},{"line_number":1046,"context_line":""},{"line_number":1047,"context_line":"        LOG.info(\u0027Proceeding with failover of ERROR amphora %(id)s \u0027"}],"source_content_type":"text/x-python","patch_set":1,"id":"80259efb_fd7e5697","line":1044,"range":{"start_line":1037,"start_character":14,"end_line":1044,"end_character":64},"updated":"2026-04-23 20:50:18.000000000","message":"Logically this might make more sense in the if statement after `_error_retry_gate_passed` returns, since it separates the predicate and the action","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"},{"author":{"_account_id":38360,"name":"Zachary Mark Raines","display_name":"Zachary Raines","email":"zachary.raines@canonical.com","username":"raineszm","status":"Sustaining Engineer @ Canonical"},"change_message_id":"2cc07754a654e5591cd2ab70a61870f8e89bb636","unresolved":true,"context_lines":[{"line_number":1057,"context_line":""},{"line_number":1058,"context_line":"        :param amphora_id: ID for amphora to failover"},{"line_number":1059,"context_line":"        :param reraise: If enabled reraise any caught exception"},{"line_number":1060,"context_line":"        :param count_error_retries: When True and the amphora is in ERROR"},{"line_number":1061,"context_line":"            state, consult ``[health_manager]/failover_on_error`` and the"},{"line_number":1062,"context_line":"            associated max-retries / exponential-backoff options. The"},{"line_number":1063,"context_line":"            retry count is derived from amphora history rows (see"},{"line_number":1064,"context_line":"            ``AmphoraRepository.get_error_retry_count``) so no dedicated"},{"line_number":1065,"context_line":"            database column is needed."},{"line_number":1066,"context_line":"        :returns: None"},{"line_number":1067,"context_line":"        :raises octavia.common.exceptions.NotFound: The referenced amphora was"},{"line_number":1068,"context_line":"                                                    not found"}],"source_content_type":"text/x-python","patch_set":1,"id":"0f635b47_d10e3bca","line":1065,"range":{"start_line":1060,"start_character":5,"end_line":1065,"end_character":38},"updated":"2026-04-23 20:50:18.000000000","message":"This serves the same function as `CONF.health_manager.failover_on_error`. Since nothing else uses `CONF` in this function I\u0027d suggest renaming the variable to `failover_on_error` and passing `CONF.health_manager.failover_on_error` into it.","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"}],"octavia/db/repositories.py":[{"author":{"_account_id":38562,"name":"Richard Cruise","email":"rcruise@redhat.com","username":"rcruise"},"change_message_id":"e0441b3d0263f685701f618b6497e41a6fe41ada","unresolved":true,"context_lines":[{"line_number":1333,"context_line":"                     datetime.timedelta(seconds\u003dwindow_seconds))"},{"line_number":1334,"context_line":""},{"line_number":1335,"context_line":"        query \u003d session.query(models.Amphora).filter("},{"line_number":1336,"context_line":"            models.Amphora.load_balancer_id \u003d\u003d amphora.load_balancer_id)"},{"line_number":1337,"context_line":"        if getattr(amphora, \u0027role\u0027, None):"},{"line_number":1338,"context_line":"            query \u003d query.filter(models.Amphora.role \u003d\u003d amphora.role)"},{"line_number":1339,"context_line":"        rows \u003d query.order_by(models.Amphora.created_at.desc()).all()"}],"source_content_type":"text/x-python","patch_set":1,"id":"cfb3e83c_660f4c64","line":1336,"updated":"2026-04-23 09:50:47.000000000","message":"It\u0027s probably worth adding more filters here since the dataset might be quite large and walking the list in Python isn\u0027t the most efficient. I\u0027d suggest adding the terminal states and threshold time as filters to limit the result to what we\u0027re interested in","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"},{"author":{"_account_id":38360,"name":"Zachary Mark Raines","display_name":"Zachary Raines","email":"zachary.raines@canonical.com","username":"raineszm","status":"Sustaining Engineer @ Canonical"},"change_message_id":"2cc07754a654e5591cd2ab70a61870f8e89bb636","unresolved":true,"context_lines":[{"line_number":1333,"context_line":"                     datetime.timedelta(seconds\u003dwindow_seconds))"},{"line_number":1334,"context_line":""},{"line_number":1335,"context_line":"        query \u003d session.query(models.Amphora).filter("},{"line_number":1336,"context_line":"            models.Amphora.load_balancer_id \u003d\u003d amphora.load_balancer_id)"},{"line_number":1337,"context_line":"        if getattr(amphora, \u0027role\u0027, None):"},{"line_number":1338,"context_line":"            query \u003d query.filter(models.Amphora.role \u003d\u003d amphora.role)"},{"line_number":1339,"context_line":"        rows \u003d query.order_by(models.Amphora.created_at.desc()).all()"}],"source_content_type":"text/x-python","patch_set":1,"id":"40cbdec8_0b1b8dec","line":1336,"in_reply_to":"cfb3e83c_660f4c64","updated":"2026-04-23 20:50:18.000000000","message":"Can the whole thing be done as a COUNT query?","commit_id":"82ee78d863ee214e0c1dafb886247a320540eaa0"}]}
