)]}'
{"/PATCHSET_LEVEL":[{"author":{"_account_id":38059,"name":"Anoop Kumar Shukla","display_name":"Anoop Shukla","email":"anoop.shukla@netapp.com","username":"anoop2","status":"NetApp"},"change_message_id":"dc6e89e733b2bad3809270a5d195d5b37efa9d98","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"0fd81038_ac1b38b3","updated":"2026-05-08 14:27:21.000000000","message":"Looks very detailed and insightful. Have some questions below.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"ca2c850b_c9e7567f","updated":"2026-05-20 22:13:46.000000000","message":"Thank you very much for writing up this detailed spec, Francesco.. Please see comments inline","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"}],"specs/hibiscus/manila-per-process-healthchecks.rst":[{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":true,"context_lines":[{"line_number":8,"context_line":"Per-Process Healthcheck Endpoints"},{"line_number":9,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":10,"context_line":""},{"line_number":11,"context_line":"TBD (blueprint not yet registered)"},{"line_number":12,"context_line":""},{"line_number":13,"context_line":"This spec proposes adding per-process HTTP healthcheck endpoints to Manila"},{"line_number":14,"context_line":"services, enabling operators and orchestration frameworks to determine whether"}],"source_content_type":"text/x-rst","patch_set":1,"id":"77ae7a5d_2338044a","line":11,"range":{"start_line":11,"start_character":0,"end_line":11,"end_character":34},"updated":"2026-05-20 22:13:46.000000000","message":"I reported this: \n\nhttps://blueprints.launchpad.net/manila/+spec/per-process-healthchecks","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"3a15e136c1a1e3794314818c5da76bdca80b3056","unresolved":false,"context_lines":[{"line_number":8,"context_line":"Per-Process Healthcheck Endpoints"},{"line_number":9,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":10,"context_line":""},{"line_number":11,"context_line":"TBD (blueprint not yet registered)"},{"line_number":12,"context_line":""},{"line_number":13,"context_line":"This spec proposes adding per-process HTTP healthcheck endpoints to Manila"},{"line_number":14,"context_line":"services, enabling operators and orchestration frameworks to determine whether"}],"source_content_type":"text/x-rst","patch_set":1,"id":"5f9ea71d_6abcbd44","line":11,"range":{"start_line":11,"start_character":0,"end_line":11,"end_character":34},"in_reply_to":"77ae7a5d_2338044a","updated":"2026-05-21 09:28:48.000000000","message":"Done","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":true,"context_lines":[{"line_number":14,"context_line":"services, enabling operators and orchestration frameworks to determine whether"},{"line_number":15,"context_line":"a service is healthy and not simply alive. The design follows the"},{"line_number":16,"context_line":"approach approved for Nova in the `per-process-healthchecks`_ specification"},{"line_number":17,"context_line":"and aligns with the IETF Health Check Response draft [#ietf-healthcheck]_."},{"line_number":18,"context_line":""},{"line_number":19,"context_line":".. _per-process-healthchecks: https://specs.openstack.org/openstack/nova-specs/specs/2025.1/approved/per-process-healthchecks.html"},{"line_number":20,"context_line":""}],"source_content_type":"text/x-rst","patch_set":1,"id":"ec695d31_ba83d83c","line":17,"range":{"start_line":17,"start_character":55,"end_line":17,"end_character":71},"updated":"2026-05-20 22:13:46.000000000","message":"We should note that this IETF draft has expired and never made it to the RFC stage, but, doesn\u0027t stop us from using the pattern it established which apparently is being adopted widely.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":true,"context_lines":[{"line_number":43,"context_line":""},{"line_number":44,"context_line":"This problem is more evident in podified deployments, where Kubernetes probes"},{"line_number":45,"context_line":"are the primary mechanism for managing service lifecycle. In this context, the"},{"line_number":46,"context_line":"``manila-operator`` provides a health probe script (``healthcheck.py``) that"},{"line_number":47,"context_line":"runs an HTTP server on port 8080 inside ``manila-share`` and"},{"line_number":48,"context_line":"``manila-scheduler`` pod. On every HTTP GET it executes"},{"line_number":49,"context_line":"``manila-manage service list``, queries the ``services`` table, and returns"}],"source_content_type":"text/x-rst","patch_set":1,"id":"229e712a_e27dcd94","line":46,"range":{"start_line":46,"start_character":0,"end_line":46,"end_character":20},"updated":"2026-05-20 22:13:46.000000000","message":"You may want to link this; thsi isn\u0027t a community owned project.. but, it\u0027s fine to link it here because there are lots of tools doing very similar things.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"3a15e136c1a1e3794314818c5da76bdca80b3056","unresolved":false,"context_lines":[{"line_number":43,"context_line":""},{"line_number":44,"context_line":"This problem is more evident in podified deployments, where Kubernetes probes"},{"line_number":45,"context_line":"are the primary mechanism for managing service lifecycle. In this context, the"},{"line_number":46,"context_line":"``manila-operator`` provides a health probe script (``healthcheck.py``) that"},{"line_number":47,"context_line":"runs an HTTP server on port 8080 inside ``manila-share`` and"},{"line_number":48,"context_line":"``manila-scheduler`` pod. On every HTTP GET it executes"},{"line_number":49,"context_line":"``manila-manage service list``, queries the ``services`` table, and returns"}],"source_content_type":"text/x-rst","patch_set":1,"id":"7178d5d6_df9234f1","line":46,"range":{"start_line":46,"start_character":0,"end_line":46,"end_character":20},"in_reply_to":"229e712a_e27dcd94","updated":"2026-05-21 09:28:48.000000000","message":"Done","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":38059,"name":"Anoop Kumar Shukla","display_name":"Anoop Shukla","email":"anoop.shukla@netapp.com","username":"anoop2","status":"NetApp"},"change_message_id":"dc6e89e733b2bad3809270a5d195d5b37efa9d98","unresolved":true,"context_lines":[{"line_number":88,"context_line":"Proposed change"},{"line_number":89,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":90,"context_line":""},{"line_number":91,"context_line":"Add a lightweight, in-process HTTP endpoint (``/health``) to each Manila"},{"line_number":92,"context_line":"binary, exposing cached health state as IETF-aligned JSON. The design"},{"line_number":93,"context_line":"inherits its core principles from the Nova specification:"},{"line_number":94,"context_line":""}],"source_content_type":"text/x-rst","patch_set":1,"id":"89fc39ec_7e140643","line":91,"range":{"start_line":91,"start_character":47,"end_line":91,"end_character":54},"updated":"2026-05-08 14:27:21.000000000","message":"is the expectation for drivers to also check backend and respond if the driver/backend is in pass state or warn/error state?","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":38059,"name":"Anoop Kumar Shukla","display_name":"Anoop Shukla","email":"anoop.shukla@netapp.com","username":"anoop2","status":"NetApp"},"change_message_id":"f4a66257fa449b99178f6445d6f0be3f6c6dadd4","unresolved":true,"context_lines":[{"line_number":88,"context_line":"Proposed change"},{"line_number":89,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":90,"context_line":""},{"line_number":91,"context_line":"Add a lightweight, in-process HTTP endpoint (``/health``) to each Manila"},{"line_number":92,"context_line":"binary, exposing cached health state as IETF-aligned JSON. The design"},{"line_number":93,"context_line":"inherits its core principles from the Nova specification:"},{"line_number":94,"context_line":""}],"source_content_type":"text/x-rst","patch_set":1,"id":"b30e32f9_1046455f","line":91,"range":{"start_line":91,"start_character":47,"end_line":91,"end_character":54},"in_reply_to":"790c0f6f_a87f06f3","updated":"2026-05-26 14:21:12.000000000","message":"Acknowledged.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"8b3dbc9276a837e56bb6f0f64849ae7f9db98d87","unresolved":true,"context_lines":[{"line_number":88,"context_line":"Proposed change"},{"line_number":89,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":90,"context_line":""},{"line_number":91,"context_line":"Add a lightweight, in-process HTTP endpoint (``/health``) to each Manila"},{"line_number":92,"context_line":"binary, exposing cached health state as IETF-aligned JSON. The design"},{"line_number":93,"context_line":"inherits its core principles from the Nova specification:"},{"line_number":94,"context_line":""}],"source_content_type":"text/x-rst","patch_set":1,"id":"c97a6dd6_ba17d789","line":91,"range":{"start_line":91,"start_character":47,"end_line":91,"end_character":54},"in_reply_to":"89fc39ec_7e140643","updated":"2026-05-20 20:25:14.000000000","message":"I don\u0027t think so. In general drivers are not expected to implement health check logic themselves. \nI imagine the @healthcheck decorator being applied at the share manager layer and observes the outcome of calls that flow through drivers: if a driver raises an exception, the decorator captures it and updates the health state accordingly.\nI tried to capture this in the \"Driver impact\" section.\nIf useful we can think about optional extension point for drivers that have domain-specific health signals (e.g., CephFS checking cluster health via ceph status), but\nthis is not required.\nIs that section sufficient to cover these bits?","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":true,"context_lines":[{"line_number":88,"context_line":"Proposed change"},{"line_number":89,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":90,"context_line":""},{"line_number":91,"context_line":"Add a lightweight, in-process HTTP endpoint (``/health``) to each Manila"},{"line_number":92,"context_line":"binary, exposing cached health state as IETF-aligned JSON. The design"},{"line_number":93,"context_line":"inherits its core principles from the Nova specification:"},{"line_number":94,"context_line":""}],"source_content_type":"text/x-rst","patch_set":1,"id":"790c0f6f_a87f06f3","line":91,"range":{"start_line":91,"start_character":47,"end_line":91,"end_character":54},"in_reply_to":"c97a6dd6_ba17d789","updated":"2026-05-20 22:13:46.000000000","message":"I like that extensibility. There\u0027re already hooks to allow drivers to tell manila if they\u0027re ready for provisioning/management at all. So we could have a generic implementation based on that (which i think is where Francesco is heading).. but i can imagine needing to extend that to add more info without leaking the abstraction.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":38059,"name":"Anoop Kumar Shukla","display_name":"Anoop Shukla","email":"anoop.shukla@netapp.com","username":"anoop2","status":"NetApp"},"change_message_id":"dc6e89e733b2bad3809270a5d195d5b37efa9d98","unresolved":true,"context_lines":[{"line_number":96,"context_line":"   normal operations (database queries, RPC calls, driver calls), not by"},{"line_number":97,"context_line":"   active probes that add load."},{"line_number":98,"context_line":""},{"line_number":99,"context_line":"2. **Three-state model**: ``pass``, ``warn``, ``fail`` per check, following"},{"line_number":100,"context_line":"   the IETF Health Check Response draft."},{"line_number":101,"context_line":""},{"line_number":102,"context_line":"3. **Disabled by default**: opt-in via a ``[healthcheck]`` configuration"}],"source_content_type":"text/x-rst","patch_set":1,"id":"9733bb90_36b9cd26","line":99,"range":{"start_line":99,"start_character":46,"end_line":99,"end_character":54},"updated":"2026-05-08 14:27:21.000000000","message":"Does fail/warn state also provide a reason to why the failure/warning is seen? That can be used in providing a message on the error thrown by the services.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":true,"context_lines":[{"line_number":96,"context_line":"   normal operations (database queries, RPC calls, driver calls), not by"},{"line_number":97,"context_line":"   active probes that add load."},{"line_number":98,"context_line":""},{"line_number":99,"context_line":"2. **Three-state model**: ``pass``, ``warn``, ``fail`` per check, following"},{"line_number":100,"context_line":"   the IETF Health Check Response draft."},{"line_number":101,"context_line":""},{"line_number":102,"context_line":"3. **Disabled by default**: opt-in via a ``[healthcheck]`` configuration"}],"source_content_type":"text/x-rst","patch_set":1,"id":"95a86459_ad9f6194","line":99,"range":{"start_line":99,"start_character":46,"end_line":99,"end_character":54},"in_reply_to":"156e989d_26073dc4","updated":"2026-05-20 22:13:46.000000000","message":"I would hope we bake in logic to filter out any sensitive material here. We have had problems where the context data, or credentials can be leaked through mechanisms like this. \n\nThere\u0027s a feature called Asynchronous User Messages in manila which surfaces backend exceptions to end users; there, we specifically invented an abstraction layer to read exceptions and convert them into strings that have enough hints to troubleshoot.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"3a15e136c1a1e3794314818c5da76bdca80b3056","unresolved":true,"context_lines":[{"line_number":96,"context_line":"   normal operations (database queries, RPC calls, driver calls), not by"},{"line_number":97,"context_line":"   active probes that add load."},{"line_number":98,"context_line":""},{"line_number":99,"context_line":"2. **Three-state model**: ``pass``, ``warn``, ``fail`` per check, following"},{"line_number":100,"context_line":"   the IETF Health Check Response draft."},{"line_number":101,"context_line":""},{"line_number":102,"context_line":"3. **Disabled by default**: opt-in via a ``[healthcheck]`` configuration"}],"source_content_type":"text/x-rst","patch_set":1,"id":"ec963580_c84c72b6","line":99,"range":{"start_line":99,"start_character":46,"end_line":99,"end_character":54},"in_reply_to":"95a86459_ad9f6194","updated":"2026-05-21 09:28:48.000000000","message":"+1 and I called this out in my last PS","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"8b3dbc9276a837e56bb6f0f64849ae7f9db98d87","unresolved":true,"context_lines":[{"line_number":96,"context_line":"   normal operations (database queries, RPC calls, driver calls), not by"},{"line_number":97,"context_line":"   active probes that add load."},{"line_number":98,"context_line":""},{"line_number":99,"context_line":"2. **Three-state model**: ``pass``, ``warn``, ``fail`` per check, following"},{"line_number":100,"context_line":"   the IETF Health Check Response draft."},{"line_number":101,"context_line":""},{"line_number":102,"context_line":"3. **Disabled by default**: opt-in via a ``[healthcheck]`` configuration"}],"source_content_type":"text/x-rst","patch_set":1,"id":"156e989d_26073dc4","line":99,"range":{"start_line":99,"start_character":46,"end_line":99,"end_character":54},"in_reply_to":"9733bb90_36b9cd26","updated":"2026-05-20 20:25:14.000000000","message":"Yes, each health check entry can carry an \"output\" field with the reason associated with the failure or warning.\nThe decorator captures the exception message on failure and stores it as the output value. e.g., for CephFS the fail response might have an output field like the following:\n```\n  ...\n  \"output\": \"CephFS: mon_command failed: [errno 110] Connection timed out\"\n```","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":true,"context_lines":[{"line_number":102,"context_line":"3. **Disabled by default**: opt-in via a ``[healthcheck]`` configuration"},{"line_number":103,"context_line":"   section. The endpoint binds to ``localhost`` or a Unix socket by default."},{"line_number":104,"context_line":""},{"line_number":105,"context_line":"4. **Unauthenticated**: designed for local consumption by Kubernetes probes,"},{"line_number":106,"context_line":"   systemd watchdogs, or human debugging via ``curl``."},{"line_number":107,"context_line":""},{"line_number":108,"context_line":""},{"line_number":109,"context_line":"Main Health check definitions"}],"source_content_type":"text/x-rst","patch_set":1,"id":"fddccfdb_d21a0bd6","line":106,"range":{"start_line":105,"start_character":0,"end_line":106,"end_character":54},"updated":"2026-05-20 22:13:46.000000000","message":"This is even more of a reason to screen the exceptions that the handlers encounter.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"3a15e136c1a1e3794314818c5da76bdca80b3056","unresolved":false,"context_lines":[{"line_number":102,"context_line":"3. **Disabled by default**: opt-in via a ``[healthcheck]`` configuration"},{"line_number":103,"context_line":"   section. The endpoint binds to ``localhost`` or a Unix socket by default."},{"line_number":104,"context_line":""},{"line_number":105,"context_line":"4. **Unauthenticated**: designed for local consumption by Kubernetes probes,"},{"line_number":106,"context_line":"   systemd watchdogs, or human debugging via ``curl``."},{"line_number":107,"context_line":""},{"line_number":108,"context_line":""},{"line_number":109,"context_line":"Main Health check definitions"}],"source_content_type":"text/x-rst","patch_set":1,"id":"cef71d90_8fc272e7","line":106,"range":{"start_line":105,"start_character":0,"end_line":106,"end_character":54},"in_reply_to":"fddccfdb_d21a0bd6","updated":"2026-05-21 09:28:48.000000000","message":"Acknowledged","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":true,"context_lines":[{"line_number":141,"context_line":"  (``create_share``, ``delete_share``, ``_update_share_stats``). The periodic"},{"line_number":142,"context_line":"  stats reporting task (``_report_driver_status``, which runs periodically"},{"line_number":143,"context_line":"  per backend) provides a natural heartbeat for this check. It can be"},{"line_number":144,"context_line":"  extended depending on the driver implementation to report more events."},{"line_number":145,"context_line":""},{"line_number":146,"context_line":""},{"line_number":147,"context_line":"Status definitions"}],"source_content_type":"text/x-rst","patch_set":1,"id":"65dd4ac1_4887b486","line":144,"range":{"start_line":144,"start_character":71,"end_line":144,"end_character":72},"updated":"2026-05-20 22:13:46.000000000","message":"for startup specifically, we have the share manager\u0027s init method, which invokes a driver\u0027s _check_for_setup_error() method to understand recoverable vs irrecoverable failures.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"3a15e136c1a1e3794314818c5da76bdca80b3056","unresolved":false,"context_lines":[{"line_number":141,"context_line":"  (``create_share``, ``delete_share``, ``_update_share_stats``). The periodic"},{"line_number":142,"context_line":"  stats reporting task (``_report_driver_status``, which runs periodically"},{"line_number":143,"context_line":"  per backend) provides a natural heartbeat for this check. It can be"},{"line_number":144,"context_line":"  extended depending on the driver implementation to report more events."},{"line_number":145,"context_line":""},{"line_number":146,"context_line":""},{"line_number":147,"context_line":"Status definitions"}],"source_content_type":"text/x-rst","patch_set":1,"id":"9f413e64_d6b1e68b","line":144,"range":{"start_line":144,"start_character":71,"end_line":144,"end_character":72},"in_reply_to":"65dd4ac1_4887b486","updated":"2026-05-21 09:28:48.000000000","message":"Done","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":38059,"name":"Anoop Kumar Shukla","display_name":"Anoop Shukla","email":"anoop.shukla@netapp.com","username":"anoop2","status":"NetApp"},"change_message_id":"dc6e89e733b2bad3809270a5d195d5b37efa9d98","unresolved":true,"context_lines":[{"line_number":231,"context_line":"           \"time\": \"2025-06-15T10:29:45Z\""},{"line_number":232,"context_line":"         },"},{"line_number":233,"context_line":"         {"},{"line_number":234,"context_line":"           \"componentId\": \"netapp@netapp\","},{"line_number":235,"context_line":"           \"componentType\": \"datastore\","},{"line_number":236,"context_line":"           \"status\": \"pass\","},{"line_number":237,"context_line":"           \"time\": \"2025-06-15T10:29:50Z\""}],"source_content_type":"text/x-rst","patch_set":1,"id":"42da5dac_c69b5608","line":234,"range":{"start_line":234,"start_character":0,"end_line":234,"end_character":41},"updated":"2026-05-08 14:27:21.000000000","message":"How does this help understand which driver backend is successful/failed? Will the componentId be enough to provide that readability?","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"8b3dbc9276a837e56bb6f0f64849ae7f9db98d87","unresolved":true,"context_lines":[{"line_number":231,"context_line":"           \"time\": \"2025-06-15T10:29:45Z\""},{"line_number":232,"context_line":"         },"},{"line_number":233,"context_line":"         {"},{"line_number":234,"context_line":"           \"componentId\": \"netapp@netapp\","},{"line_number":235,"context_line":"           \"componentType\": \"datastore\","},{"line_number":236,"context_line":"           \"status\": \"pass\","},{"line_number":237,"context_line":"           \"time\": \"2025-06-15T10:29:50Z\""}],"source_content_type":"text/x-rst","patch_set":1,"id":"78e44328_451980a0","line":234,"range":{"start_line":234,"start_character":0,"end_line":234,"end_character":41},"in_reply_to":"42da5dac_c69b5608","updated":"2026-05-20 20:25:14.000000000","message":"The `componentId` maps directly to the `backend` configuration stanza name in\nmanila.conf (the `host@backend` identifier, e.g., \"storage01@cephfs\").\nThis is the same identifier operators already see via `manila-manage`, logs,\nand the services table, so it should be immediately recognizable.\nI\u0027ll make it clear because I understand that `cephfs@cephfs` doesn\u0027t make the idea.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"3a15e136c1a1e3794314818c5da76bdca80b3056","unresolved":false,"context_lines":[{"line_number":231,"context_line":"           \"time\": \"2025-06-15T10:29:45Z\""},{"line_number":232,"context_line":"         },"},{"line_number":233,"context_line":"         {"},{"line_number":234,"context_line":"           \"componentId\": \"netapp@netapp\","},{"line_number":235,"context_line":"           \"componentType\": \"datastore\","},{"line_number":236,"context_line":"           \"status\": \"pass\","},{"line_number":237,"context_line":"           \"time\": \"2025-06-15T10:29:50Z\""}],"source_content_type":"text/x-rst","patch_set":1,"id":"de5b1b33_41880445","line":234,"range":{"start_line":234,"start_character":0,"end_line":234,"end_character":41},"in_reply_to":"78e44328_451980a0","updated":"2026-05-21 09:28:48.000000000","message":"Done","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":38059,"name":"Anoop Kumar Shukla","display_name":"Anoop Shukla","email":"anoop.shukla@netapp.com","username":"anoop2","status":"NetApp"},"change_message_id":"f4a66257fa449b99178f6445d6f0be3f6c6dadd4","unresolved":false,"context_lines":[{"line_number":231,"context_line":"           \"time\": \"2025-06-15T10:29:45Z\""},{"line_number":232,"context_line":"         },"},{"line_number":233,"context_line":"         {"},{"line_number":234,"context_line":"           \"componentId\": \"netapp@netapp\","},{"line_number":235,"context_line":"           \"componentType\": \"datastore\","},{"line_number":236,"context_line":"           \"status\": \"pass\","},{"line_number":237,"context_line":"           \"time\": \"2025-06-15T10:29:50Z\""}],"source_content_type":"text/x-rst","patch_set":1,"id":"9a685a04_b20db8c4","line":234,"range":{"start_line":234,"start_character":0,"end_line":234,"end_character":41},"in_reply_to":"de5b1b33_41880445","updated":"2026-05-26 14:21:12.000000000","message":"Thanks! makes sense","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":38059,"name":"Anoop Kumar Shukla","display_name":"Anoop Shukla","email":"anoop.shukla@netapp.com","username":"anoop2","status":"NetApp"},"change_message_id":"dc6e89e733b2bad3809270a5d195d5b37efa9d98","unresolved":true,"context_lines":[{"line_number":241,"context_line":"         {"},{"line_number":242,"context_line":"           \"status\": \"warn\","},{"line_number":243,"context_line":"           \"time\": \"2025-06-15T10:28:00Z\","},{"line_number":244,"context_line":"           \"output\": \"last share server creation took 45s (threshold: 30s)\""},{"line_number":245,"context_line":"         }"},{"line_number":246,"context_line":"       ]"},{"line_number":247,"context_line":"     }"}],"source_content_type":"text/x-rst","patch_set":1,"id":"92bf5882_626a21dd","line":244,"updated":"2026-05-08 14:27:21.000000000","message":"What will be the guidelines to the output field? Is it the last error/warning or up to the driver to report any issue as warn/error? Also how are drivers supposed to manage error propogation? As-is vs making them more readable and not stack traces may be?","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":true,"context_lines":[{"line_number":241,"context_line":"         {"},{"line_number":242,"context_line":"           \"status\": \"warn\","},{"line_number":243,"context_line":"           \"time\": \"2025-06-15T10:28:00Z\","},{"line_number":244,"context_line":"           \"output\": \"last share server creation took 45s (threshold: 30s)\""},{"line_number":245,"context_line":"         }"},{"line_number":246,"context_line":"       ]"},{"line_number":247,"context_line":"     }"}],"source_content_type":"text/x-rst","patch_set":1,"id":"3c58316b_5a1ce987","line":244,"in_reply_to":"2ea9e9d1_3d752f5d","updated":"2026-05-20 22:13:46.000000000","message":"i commented elsewhere regarding santization of the errors. but\nunfortunately for me this share server example deviates from the message that this feature concerns the health of a service :( \n\nshare servers are created by the drivers on backends and their health is already tracked within the manila database.. a share server being unhealthy isn\u0027t a backend/service failure... and they\u0027re a resource like shares/snapshots/replicas/backups/groups/etc.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"3a15e136c1a1e3794314818c5da76bdca80b3056","unresolved":true,"context_lines":[{"line_number":241,"context_line":"         {"},{"line_number":242,"context_line":"           \"status\": \"warn\","},{"line_number":243,"context_line":"           \"time\": \"2025-06-15T10:28:00Z\","},{"line_number":244,"context_line":"           \"output\": \"last share server creation took 45s (threshold: 30s)\""},{"line_number":245,"context_line":"         }"},{"line_number":246,"context_line":"       ]"},{"line_number":247,"context_line":"     }"}],"source_content_type":"text/x-rst","patch_set":1,"id":"38a51d7e_88fa8a7e","line":244,"in_reply_to":"3c58316b_5a1ce987","updated":"2026-05-21 09:28:48.000000000","message":"I agree this is confusing. I removed the example that might deviate for the original proposal. Indeed resources are not the target here.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"8b3dbc9276a837e56bb6f0f64849ae7f9db98d87","unresolved":true,"context_lines":[{"line_number":241,"context_line":"         {"},{"line_number":242,"context_line":"           \"status\": \"warn\","},{"line_number":243,"context_line":"           \"time\": \"2025-06-15T10:28:00Z\","},{"line_number":244,"context_line":"           \"output\": \"last share server creation took 45s (threshold: 30s)\""},{"line_number":245,"context_line":"         }"},{"line_number":246,"context_line":"       ]"},{"line_number":247,"context_line":"     }"}],"source_content_type":"text/x-rst","patch_set":1,"id":"2ea9e9d1_3d752f5d","line":244,"in_reply_to":"92bf5882_626a21dd","updated":"2026-05-20 20:25:14.000000000","message":"I didn\u0027t think about accumulating the history, but the `last` error (current) should be enough for the purpose. I agree that a stack trace is not good.\nDrivers can register custom checks via the optional extension point and control their own output format, but the we should apply a generic rendering of the error that makes it short and readable.\nI have to think more about this part and I might need to provide more details under the Implementation section. Perhaps @gouthampravi@gmail.com has ideas for this.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":true,"context_lines":[{"line_number":279,"context_line":"           \"componentType\": \"datastore\","},{"line_number":280,"context_line":"           \"status\": \"fail\","},{"line_number":281,"context_line":"           \"time\": \"2025-06-15T10:28:00Z\","},{"line_number":282,"context_line":"           \"output\": \"CephFS: mon_command failed: [errno 110] Connection timed out\""},{"line_number":283,"context_line":"         },"},{"line_number":284,"context_line":"         {"},{"line_number":285,"context_line":"           \"componentId\": \"netapp@netapp\","}],"source_content_type":"text/x-rst","patch_set":1,"id":"b928c608_89bc86e2","line":282,"range":{"start_line":282,"start_character":0,"end_line":282,"end_character":83},"updated":"2026-05-20 22:13:46.000000000","message":"let\u0027s mandate error message sanitization; maybe we end up categorizing this.. e.g., \"connection_timeout\", \"authentication_failed\", \"service_unavailable\", etc..","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"3a15e136c1a1e3794314818c5da76bdca80b3056","unresolved":true,"context_lines":[{"line_number":279,"context_line":"           \"componentType\": \"datastore\","},{"line_number":280,"context_line":"           \"status\": \"fail\","},{"line_number":281,"context_line":"           \"time\": \"2025-06-15T10:28:00Z\","},{"line_number":282,"context_line":"           \"output\": \"CephFS: mon_command failed: [errno 110] Connection timed out\""},{"line_number":283,"context_line":"         },"},{"line_number":284,"context_line":"         {"},{"line_number":285,"context_line":"           \"componentId\": \"netapp@netapp\","}],"source_content_type":"text/x-rst","patch_set":1,"id":"e393bdd9_437a3493","line":282,"range":{"start_line":282,"start_character":0,"end_line":282,"end_character":83},"in_reply_to":"b928c608_89bc86e2","updated":"2026-05-21 09:28:48.000000000","message":"+1. I\u0027ll defer the categorization in a next PS I can push once we agree on the rest of the writing.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":32919,"name":"kiran pawar","display_name":"Kiran Pawar","email":"kinpaa@gmail.com","username":"kpdev"},"change_message_id":"48ac1ff928cf2fc3b5b7d6aac0791434f3bda735","unresolved":true,"context_lines":[{"line_number":349,"context_line":"listener) that serves the ``/health`` endpoint. It reads the in-memory state"},{"line_number":350,"context_line":"from the ``HealthcheckManager`` and serializes the response. No"},{"line_number":351,"context_line":"authentication is performed. The server runs in a dedicated thread within"},{"line_number":352,"context_line":"the service process."},{"line_number":353,"context_line":"Health state is stored in-memory only and is reset on process restart."},{"line_number":354,"context_line":""},{"line_number":355,"context_line":"Configuration"}],"source_content_type":"text/x-rst","patch_set":1,"id":"3f1ec4b5_f17f03ee","line":352,"updated":"2026-05-13 14:58:37.000000000","message":"If the service is gracefully stopped, does the healthcheck thread get torn down cleanly?","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":true,"context_lines":[{"line_number":349,"context_line":"listener) that serves the ``/health`` endpoint. It reads the in-memory state"},{"line_number":350,"context_line":"from the ``HealthcheckManager`` and serializes the response. No"},{"line_number":351,"context_line":"authentication is performed. The server runs in a dedicated thread within"},{"line_number":352,"context_line":"the service process."},{"line_number":353,"context_line":"Health state is stored in-memory only and is reset on process restart."},{"line_number":354,"context_line":""},{"line_number":355,"context_line":"Configuration"}],"source_content_type":"text/x-rst","patch_set":1,"id":"26a511aa_0d6a4e9e","line":352,"in_reply_to":"2efeac41_0f7605ef","updated":"2026-05-20 22:13:46.000000000","message":"+1","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"8b3dbc9276a837e56bb6f0f64849ae7f9db98d87","unresolved":true,"context_lines":[{"line_number":349,"context_line":"listener) that serves the ``/health`` endpoint. It reads the in-memory state"},{"line_number":350,"context_line":"from the ``HealthcheckManager`` and serializes the response. No"},{"line_number":351,"context_line":"authentication is performed. The server runs in a dedicated thread within"},{"line_number":352,"context_line":"the service process."},{"line_number":353,"context_line":"Health state is stored in-memory only and is reset on process restart."},{"line_number":354,"context_line":""},{"line_number":355,"context_line":"Configuration"}],"source_content_type":"text/x-rst","patch_set":1,"id":"2efeac41_0f7605ef","line":352,"in_reply_to":"3f1ec4b5_f17f03ee","updated":"2026-05-20 20:25:14.000000000","message":"Yes, I might need to clarify this.\nI assume the healthcheck HTTP server thread is torn down cleanly on graceful shutdown and when it receives the related event it exits without blocking process termination.\nI will call this out in my next update.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":true,"context_lines":[{"line_number":350,"context_line":"from the ``HealthcheckManager`` and serializes the response. No"},{"line_number":351,"context_line":"authentication is performed. The server runs in a dedicated thread within"},{"line_number":352,"context_line":"the service process."},{"line_number":353,"context_line":"Health state is stored in-memory only and is reset on process restart."},{"line_number":354,"context_line":""},{"line_number":355,"context_line":"Configuration"},{"line_number":356,"context_line":"-------------"}],"source_content_type":"text/x-rst","patch_set":1,"id":"c28978bf_5db0e726","line":353,"updated":"2026-05-20 22:13:46.000000000","message":"a concern for a single threaded HTTP server is its attack surface.. we should have a timeout for requests perhaps (this should be straightforward to implement if we\u0027re using socketserver).. \n\nelse we\u0027ll be opening this to a \"slowloris\"/DOS attack.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"3a15e136c1a1e3794314818c5da76bdca80b3056","unresolved":false,"context_lines":[{"line_number":350,"context_line":"from the ``HealthcheckManager`` and serializes the response. No"},{"line_number":351,"context_line":"authentication is performed. The server runs in a dedicated thread within"},{"line_number":352,"context_line":"the service process."},{"line_number":353,"context_line":"Health state is stored in-memory only and is reset on process restart."},{"line_number":354,"context_line":""},{"line_number":355,"context_line":"Configuration"},{"line_number":356,"context_line":"-------------"}],"source_content_type":"text/x-rst","patch_set":1,"id":"ed9ec9c1_8c3661f5","line":353,"in_reply_to":"c28978bf_5db0e726","updated":"2026-05-21 09:28:48.000000000","message":"Done","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":true,"context_lines":[{"line_number":362,"context_line":"   [healthcheck]"},{"line_number":363,"context_line":"   # Comma-separated listen URIs. Supports tcp:// and unix:// schemes."},{"line_number":364,"context_line":"   # Disabled when empty (default)."},{"line_number":365,"context_line":"   # uri \u003d tcp://localhost:9399"},{"line_number":366,"context_line":"   # uri \u003d unix:///run/manila/manila-share.sock"},{"line_number":367,"context_line":""},{"line_number":368,"context_line":"   # How long (seconds) a health indicator remains valid before going stale."}],"source_content_type":"text/x-rst","patch_set":1,"id":"84957c60_5dfaf6d7","line":365,"range":{"start_line":365,"start_character":27,"end_line":365,"end_character":31},"updated":"2026-05-20 22:13:46.000000000","message":"this, unlike the unix socket approach might result in port conflicts if someone runs multiple services on the same host; so maybe we need a line to prefer unix sockets instead? or tell them to customize the port per service..","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"3a15e136c1a1e3794314818c5da76bdca80b3056","unresolved":true,"context_lines":[{"line_number":362,"context_line":"   [healthcheck]"},{"line_number":363,"context_line":"   # Comma-separated listen URIs. Supports tcp:// and unix:// schemes."},{"line_number":364,"context_line":"   # Disabled when empty (default)."},{"line_number":365,"context_line":"   # uri \u003d tcp://localhost:9399"},{"line_number":366,"context_line":"   # uri \u003d unix:///run/manila/manila-share.sock"},{"line_number":367,"context_line":""},{"line_number":368,"context_line":"   # How long (seconds) a health indicator remains valid before going stale."}],"source_content_type":"text/x-rst","patch_set":1,"id":"5efe3de7_684d0534","line":365,"range":{"start_line":365,"start_character":27,"end_line":365,"end_character":31},"in_reply_to":"84957c60_5dfaf6d7","updated":"2026-05-21 09:28:48.000000000","message":"+1, let\u0027s remove the ip address customization. I did it in my last iteration. k8s can access it via localhost I guess and unix sockets are good for systemd integration.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":32919,"name":"kiran pawar","display_name":"Kiran Pawar","email":"kinpaa@gmail.com","username":"kpdev"},"change_message_id":"48ac1ff928cf2fc3b5b7d6aac0791434f3bda735","unresolved":true,"context_lines":[{"line_number":399,"context_line":"       path: /health"},{"line_number":400,"context_line":"       port: 9399"},{"line_number":401,"context_line":"     initialDelaySeconds: 10"},{"line_number":402,"context_line":"     periodSeconds: 5"},{"line_number":403,"context_line":""},{"line_number":404,"context_line":"This replaces the current approach described in the first section of the spec,"},{"line_number":405,"context_line":"and instead of forking a ``manila-manage`` subprocess on every probe invocation"}],"source_content_type":"text/x-rst","patch_set":1,"id":"2a3e2871_91fe1d64","line":402,"updated":"2026-05-13 14:58:37.000000000","message":"both liveness and readiness probe points to same path ?","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"8b3dbc9276a837e56bb6f0f64849ae7f9db98d87","unresolved":true,"context_lines":[{"line_number":399,"context_line":"       path: /health"},{"line_number":400,"context_line":"       port: 9399"},{"line_number":401,"context_line":"     initialDelaySeconds: 10"},{"line_number":402,"context_line":"     periodSeconds: 5"},{"line_number":403,"context_line":""},{"line_number":404,"context_line":"This replaces the current approach described in the first section of the spec,"},{"line_number":405,"context_line":"and instead of forking a ``manila-manage`` subprocess on every probe invocation"}],"source_content_type":"text/x-rst","patch_set":1,"id":"bfb24b27_c4116b46","line":402,"in_reply_to":"2a3e2871_91fe1d64","updated":"2026-05-20 20:25:14.000000000","message":"yes, the example shows how to configure probes in k8s but I agree it does not explain why the same path is ok.\n\nIn general, when the `status` is \"fail\", the endpoint returns HTTP 503.\nThe readiness probe fails very soon, and the pod is removed from the Service endpoints (it stops receiving traffic).\nThe liveness probe is configured to tolerate a few failures (failureThreshold: 3) before restarting the pod. This means that readiness reacts quickly to degradation (tight thresholds) while liveness needs more time. For this reason a single endpoint is sufficient because Kubernetes probes are based on the `HTTP` status codes and they react differently based on the configuration.\n\nI will add a paragraph if these bits are of interest here, but the idea is to not complicate much the solution and show that a k8s based environment can leverage this mechanism.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"3a15e136c1a1e3794314818c5da76bdca80b3056","unresolved":true,"context_lines":[{"line_number":399,"context_line":"       path: /health"},{"line_number":400,"context_line":"       port: 9399"},{"line_number":401,"context_line":"     initialDelaySeconds: 10"},{"line_number":402,"context_line":"     periodSeconds: 5"},{"line_number":403,"context_line":""},{"line_number":404,"context_line":"This replaces the current approach described in the first section of the spec,"},{"line_number":405,"context_line":"and instead of forking a ``manila-manage`` subprocess on every probe invocation"}],"source_content_type":"text/x-rst","patch_set":1,"id":"f3532d29_ab75b32f","line":402,"in_reply_to":"b018ac13_dd275f28","updated":"2026-05-21 09:28:48.000000000","message":"I\u0027ve updated this section with the short term plan. We can improve this feature over the time but I would come back to the endpoint separation with a follow up.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":true,"context_lines":[{"line_number":399,"context_line":"       path: /health"},{"line_number":400,"context_line":"       port: 9399"},{"line_number":401,"context_line":"     initialDelaySeconds: 10"},{"line_number":402,"context_line":"     periodSeconds: 5"},{"line_number":403,"context_line":""},{"line_number":404,"context_line":"This replaces the current approach described in the first section of the spec,"},{"line_number":405,"context_line":"and instead of forking a ``manila-manage`` subprocess on every probe invocation"}],"source_content_type":"text/x-rst","patch_set":1,"id":"b018ac13_dd275f28","line":402,"in_reply_to":"bfb24b27_c4116b46","updated":"2026-05-20 22:13:46.000000000","message":"I think, \"readiness\" looks different than \"liveness\". I can live with the unification - it is lots better than status quo..  but, in reality the pattern in many of these services (scheduler, share, data) is that there\u0027re startup (+reconciliation) activities that are different from a service going down after successfully starting up.. \n\nLiveness tests ask if we should restart a service (somewhere else); readiness asks if we can begin to use the service.. i feel like this split can be taken advantage of by the tooling we build around this. \n\nHow complicated would distinguishing these be?\n\nif we added a `/health` (startup and readiness checks) and a `/livez` (alive ping tests) would it help?\n\nI am okay with us trying to start with /health.. but, maybe note that we anticipate a differentiation later down the road; doing this may prevent someone from using /health in liveness probes entirely, or set appropriate failureThreshold and periodSeconds as your example shows.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":false,"context_lines":[{"line_number":414,"context_line":"RPC-based services (``manila-share``, ``manila-scheduler``, ``manila-data``),"},{"line_number":415,"context_line":"which are the services with the most critical health monitoring gaps."},{"line_number":416,"context_line":"Additionally, the existing middleware leaks system information and does not"},{"line_number":417,"context_line":"support the three-state model."},{"line_number":418,"context_line":""},{"line_number":419,"context_line":"**Active health probing (synthetic operations)**: instead of passively"},{"line_number":420,"context_line":"observing real operations, the service could periodically execute synthetic"}],"source_content_type":"text/x-rst","patch_set":1,"id":"96a175c3_1c5c8ca3","line":417,"updated":"2026-05-20 22:13:46.000000000","message":"nein :P not so easy to do this. The middleware isn\u0027t packaged/shipped with these components; and exposing this would be the same can of worms opened by someone else","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":true,"context_lines":[{"line_number":481,"context_line":""},{"line_number":482,"context_line":"* The endpoint can optionally be bound to ``0.0.0.0`` or a specific IP address"},{"line_number":483,"context_line":"  for external consumers (Prometheus, operator reconciler). This is an explicit"},{"line_number":484,"context_line":"  opt-in configuration and should be properly documented."},{"line_number":485,"context_line":""},{"line_number":486,"context_line":"* The response body might contain service-level metadata (hostname, backend name,"},{"line_number":487,"context_line":"  error messages). While this information is not privileged in a typical"}],"source_content_type":"text/x-rst","patch_set":1,"id":"c4689d6f_95d50929","line":484,"range":{"start_line":484,"start_character":56,"end_line":484,"end_character":57},"updated":"2026-05-20 22:13:46.000000000","message":"i doubt this would ever be needed; it\u0027s dangerous to do.. external consumers must have a way to plug into an internal mechanism safely.. \n\nunauthenticated endpoints like this on known ports are prime SSRF targets..","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"3a15e136c1a1e3794314818c5da76bdca80b3056","unresolved":false,"context_lines":[{"line_number":481,"context_line":""},{"line_number":482,"context_line":"* The endpoint can optionally be bound to ``0.0.0.0`` or a specific IP address"},{"line_number":483,"context_line":"  for external consumers (Prometheus, operator reconciler). This is an explicit"},{"line_number":484,"context_line":"  opt-in configuration and should be properly documented."},{"line_number":485,"context_line":""},{"line_number":486,"context_line":"* The response body might contain service-level metadata (hostname, backend name,"},{"line_number":487,"context_line":"  error messages). While this information is not privileged in a typical"}],"source_content_type":"text/x-rst","patch_set":1,"id":"7c8e4cfb_bcc53744","line":484,"range":{"start_line":484,"start_character":56,"end_line":484,"end_character":57},"in_reply_to":"c4689d6f_95d50929","updated":"2026-05-21 09:28:48.000000000","message":"Acknowledged","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":true,"context_lines":[{"line_number":489,"context_line":"  avoided. Documentation will recommend the best policies to restrict access"},{"line_number":490,"context_line":"  when exposing the endpoint beyond localhost."},{"line_number":491,"context_line":""},{"line_number":492,"context_line":"* No credentials, tokens, or user data are included in health responses."},{"line_number":493,"context_line":""},{"line_number":494,"context_line":""},{"line_number":495,"context_line":"Notifications impact"}],"source_content_type":"text/x-rst","patch_set":1,"id":"f205df4c_ea3bc06f","line":492,"range":{"start_line":492,"start_character":2,"end_line":492,"end_character":72},"updated":"2026-05-20 22:13:46.000000000","message":"++ We need the santization logic to ensure this.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"3a15e136c1a1e3794314818c5da76bdca80b3056","unresolved":false,"context_lines":[{"line_number":489,"context_line":"  avoided. Documentation will recommend the best policies to restrict access"},{"line_number":490,"context_line":"  when exposing the endpoint beyond localhost."},{"line_number":491,"context_line":""},{"line_number":492,"context_line":"* No credentials, tokens, or user data are included in health responses."},{"line_number":493,"context_line":""},{"line_number":494,"context_line":""},{"line_number":495,"context_line":"Notifications impact"}],"source_content_type":"text/x-rst","patch_set":1,"id":"6e32bd6a_9668f842","line":492,"range":{"start_line":492,"start_character":2,"end_line":492,"end_character":72},"in_reply_to":"f205df4c_ea3bc06f","updated":"2026-05-21 09:28:48.000000000","message":"Done","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":true,"context_lines":[{"line_number":535,"context_line":"  that do not opt in."},{"line_number":536,"context_line":"* Podified deployments using ``openstack-k8s-operators`` will benefit from"},{"line_number":537,"context_line":"  operator-level integration once the ``manila-operator`` is updated to"},{"line_number":538,"context_line":"  configure healthcheck URIs and Kubernetes probes."},{"line_number":539,"context_line":""},{"line_number":540,"context_line":""},{"line_number":541,"context_line":"Developer impact"}],"source_content_type":"text/x-rst","patch_set":1,"id":"9a33860f_42490e5c","line":538,"updated":"2026-05-20 22:13:46.000000000","message":"Maybe add some language on how systemd based deployments can take advantage of this:\n\nusing the healthcheck endpoint with systemd\u0027s WatchdogSec or Type\u003dnotify, or a simple systemd timer that curls the endpoint..","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"3a15e136c1a1e3794314818c5da76bdca80b3056","unresolved":false,"context_lines":[{"line_number":535,"context_line":"  that do not opt in."},{"line_number":536,"context_line":"* Podified deployments using ``openstack-k8s-operators`` will benefit from"},{"line_number":537,"context_line":"  operator-level integration once the ``manila-operator`` is updated to"},{"line_number":538,"context_line":"  configure healthcheck URIs and Kubernetes probes."},{"line_number":539,"context_line":""},{"line_number":540,"context_line":""},{"line_number":541,"context_line":"Developer impact"}],"source_content_type":"text/x-rst","patch_set":1,"id":"ce0f6660_9cfa586b","line":538,"in_reply_to":"9a33860f_42490e5c","updated":"2026-05-21 09:28:48.000000000","message":"Done","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":true,"context_lines":[{"line_number":561,"context_line":"-----------"},{"line_number":562,"context_line":""},{"line_number":563,"context_line":"Primary assignee:"},{"line_number":564,"context_line":"  \u003cTBD\u003e"},{"line_number":565,"context_line":""},{"line_number":566,"context_line":"Other contributors:"},{"line_number":567,"context_line":"  \u003cTBD\u003e"}],"source_content_type":"text/x-rst","patch_set":1,"id":"3db950fc_76a059b3","line":564,"range":{"start_line":564,"start_character":2,"end_line":564,"end_character":7},"updated":"2026-05-20 22:13:46.000000000","message":"i sure hope this will be you; but happy to assist! :)","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"3a15e136c1a1e3794314818c5da76bdca80b3056","unresolved":false,"context_lines":[{"line_number":561,"context_line":"-----------"},{"line_number":562,"context_line":""},{"line_number":563,"context_line":"Primary assignee:"},{"line_number":564,"context_line":"  \u003cTBD\u003e"},{"line_number":565,"context_line":""},{"line_number":566,"context_line":"Other contributors:"},{"line_number":567,"context_line":"  \u003cTBD\u003e"}],"source_content_type":"text/x-rst","patch_set":1,"id":"a245cc89_f685f861","line":564,"range":{"start_line":564,"start_character":2,"end_line":564,"end_character":7},"in_reply_to":"3db950fc_76a059b3","updated":"2026-05-21 09:28:48.000000000","message":"Acknowledged","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":38059,"name":"Anoop Kumar Shukla","display_name":"Anoop Shukla","email":"anoop.shukla@netapp.com","username":"anoop2","status":"NetApp"},"change_message_id":"dc6e89e733b2bad3809270a5d195d5b37efa9d98","unresolved":true,"context_lines":[{"line_number":581,"context_line":"   * Apply ``@healthcheck(\u0027database\u0027, ...)`` to key share manager methods"},{"line_number":582,"context_line":"   * Apply ``@healthcheck(\u0027backend_driver\u0027, ...)`` to driver-calling methods"},{"line_number":583,"context_line":"     and ``_report_driver_status``"},{"line_number":584,"context_line":"   * Conditionally register ``share_server`` check based on driver config"},{"line_number":585,"context_line":""},{"line_number":586,"context_line":"3. **Other service integration**:"},{"line_number":587,"context_line":""}],"source_content_type":"text/x-rst","patch_set":1,"id":"4e12e4ec_de08ab5b","line":584,"updated":"2026-05-08 14:27:21.000000000","message":"So is it required for drivers to register share_server check? Or is it optional?","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":25402,"name":"Francesco Pantano","email":"fpantano@redhat.com","username":"fmount"},"change_message_id":"8b3dbc9276a837e56bb6f0f64849ae7f9db98d87","unresolved":true,"context_lines":[{"line_number":581,"context_line":"   * Apply ``@healthcheck(\u0027database\u0027, ...)`` to key share manager methods"},{"line_number":582,"context_line":"   * Apply ``@healthcheck(\u0027backend_driver\u0027, ...)`` to driver-calling methods"},{"line_number":583,"context_line":"     and ``_report_driver_status``"},{"line_number":584,"context_line":"   * Conditionally register ``share_server`` check based on driver config"},{"line_number":585,"context_line":""},{"line_number":586,"context_line":"3. **Other service integration**:"},{"line_number":587,"context_line":""}],"source_content_type":"text/x-rst","patch_set":1,"id":"edfe5900_99951ede","line":584,"in_reply_to":"4e12e4ec_de08ab5b","updated":"2026-05-20 20:25:14.000000000","message":"I would keep this as an optional item. It could be automatically registered by the share manager when the backend is configured.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":38059,"name":"Anoop Kumar Shukla","display_name":"Anoop Shukla","email":"anoop.shukla@netapp.com","username":"anoop2","status":"NetApp"},"change_message_id":"f4a66257fa449b99178f6445d6f0be3f6c6dadd4","unresolved":true,"context_lines":[{"line_number":581,"context_line":"   * Apply ``@healthcheck(\u0027database\u0027, ...)`` to key share manager methods"},{"line_number":582,"context_line":"   * Apply ``@healthcheck(\u0027backend_driver\u0027, ...)`` to driver-calling methods"},{"line_number":583,"context_line":"     and ``_report_driver_status``"},{"line_number":584,"context_line":"   * Conditionally register ``share_server`` check based on driver config"},{"line_number":585,"context_line":""},{"line_number":586,"context_line":"3. **Other service integration**:"},{"line_number":587,"context_line":""}],"source_content_type":"text/x-rst","patch_set":1,"id":"cd256900_7bc97b8c","line":584,"in_reply_to":"e2065e71_1dbbd619","updated":"2026-05-26 14:21:12.000000000","message":"Okay got it.","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"},{"author":{"_account_id":16643,"name":"Goutham Pacha Ravi","email":"gouthampravi@gmail.com","username":"gouthamr"},"change_message_id":"c3928840b57f416cbb496795566aec613c242e33","unresolved":true,"context_lines":[{"line_number":581,"context_line":"   * Apply ``@healthcheck(\u0027database\u0027, ...)`` to key share manager methods"},{"line_number":582,"context_line":"   * Apply ``@healthcheck(\u0027backend_driver\u0027, ...)`` to driver-calling methods"},{"line_number":583,"context_line":"     and ``_report_driver_status``"},{"line_number":584,"context_line":"   * Conditionally register ``share_server`` check based on driver config"},{"line_number":585,"context_line":""},{"line_number":586,"context_line":"3. **Other service integration**:"},{"line_number":587,"context_line":""}],"source_content_type":"text/x-rst","patch_set":1,"id":"e2065e71_1dbbd619","line":584,"in_reply_to":"edfe5900_99951ede","updated":"2026-05-20 22:13:46.000000000","message":"see comment above regarding this","commit_id":"d36830476c7e99824396d7e3b0df87e3ffc8b9ee"}]}
