)]}' {"/COMMIT_MSG":[{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":true,"context_lines":[{"line_number":4,"context_line":"Commit: Jianjian Huo \u003cjhuo@nvidia.com\u003e"},{"line_number":5,"context_line":"CommitDate: 2023-08-01 23:11:21 -0700"},{"line_number":6,"context_line":""},{"line_number":7,"context_line":"proxy-server: add a global memcache lock to reduce backend requests"},{"line_number":8,"context_line":""},{"line_number":9,"context_line":"The cost of memcache misses could be deadly. For example, when"},{"line_number":10,"context_line":"updating shard range cache query miss, PUT requests would have to"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":2,"id":"fff3525e_bbbde854","line":7,"updated":"2023-08-25 00:02:59.000000000","message":"let\u0027s not call it a lock, yeah?","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":4,"context_line":"Commit: Jianjian Huo \u003cjhuo@nvidia.com\u003e"},{"line_number":5,"context_line":"CommitDate: 2023-08-01 23:11:21 -0700"},{"line_number":6,"context_line":""},{"line_number":7,"context_line":"proxy-server: add a global memcache lock to reduce backend requests"},{"line_number":8,"context_line":""},{"line_number":9,"context_line":"The cost of memcache misses could be deadly. For example, when"},{"line_number":10,"context_line":"updating shard range cache query miss, PUT requests would have to"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":2,"id":"1783a508_3a5960e9","line":7,"in_reply_to":"fff3525e_bbbde854","updated":"2024-01-12 06:04:07.000000000","message":"Acknowledged","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":true,"context_lines":[{"line_number":10,"context_line":"updating shard range cache query miss, PUT requests would have to"},{"line_number":11,"context_line":"query the backend to figure out which shard to upload the objects."},{"line_number":12,"context_line":"And when a lot of requests are sending to the backend at the same"},{"line_number":13,"context_line":"time, this could easily overload the root container and cause a"},{"line_number":14,"context_line":"lot of 500/503 errors."},{"line_number":15,"context_line":""},{"line_number":16,"context_line":"Memcache cluster can\u0027t eliminate cache misses totally, and container"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":2,"id":"758f0da1_ebc31cf0","line":13,"updated":"2023-08-25 00:02:59.000000000","message":"maybe easily is bit gloomy, the point is we\u0027ll allow 10K\u0027s of object PUTs per second but probably only 100s of req per second to a single root.","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":10,"context_line":"updating shard range cache query miss, PUT requests would have to"},{"line_number":11,"context_line":"query the backend to figure out which shard to upload the objects."},{"line_number":12,"context_line":"And when a lot of requests are sending to the backend at the same"},{"line_number":13,"context_line":"time, this could easily overload the root container and cause a"},{"line_number":14,"context_line":"lot of 500/503 errors."},{"line_number":15,"context_line":""},{"line_number":16,"context_line":"Memcache cluster can\u0027t eliminate cache misses totally, and container"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":2,"id":"8b407d50_c9a5b9f0","line":13,"in_reply_to":"758f0da1_ebc31cf0","updated":"2024-01-12 06:04:07.000000000","message":"Acknowledged","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":true,"context_lines":[{"line_number":14,"context_line":"lot of 500/503 errors."},{"line_number":15,"context_line":""},{"line_number":16,"context_line":"Memcache cluster can\u0027t eliminate cache misses totally, and container"},{"line_number":17,"context_line":"server can\u0027t scale its performance infinitely, in order to prepare"},{"line_number":18,"context_line":"proxy-server/container-server ready for those events, a global"},{"line_number":19,"context_line":"memcache lock can be added into proxy-server to reduce backend"},{"line_number":20,"context_line":"requests significanly: when updating shard range cache misses, the"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":2,"id":"355e7e31_77afe46f","line":17,"updated":"2023-08-25 00:02:59.000000000","message":"it\u0027s hard to argue with sound logic - kudos!","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":14,"context_line":"lot of 500/503 errors."},{"line_number":15,"context_line":""},{"line_number":16,"context_line":"Memcache cluster can\u0027t eliminate cache misses totally, and container"},{"line_number":17,"context_line":"server can\u0027t scale its performance infinitely, in order to prepare"},{"line_number":18,"context_line":"proxy-server/container-server ready for those events, a global"},{"line_number":19,"context_line":"memcache lock can be added into proxy-server to reduce backend"},{"line_number":20,"context_line":"requests significanly: when updating shard range cache misses, the"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":2,"id":"2981b874_02e2c97f","line":17,"in_reply_to":"355e7e31_77afe46f","updated":"2024-01-12 06:04:07.000000000","message":"Acknowledged","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":true,"context_lines":[{"line_number":18,"context_line":"proxy-server/container-server ready for those events, a global"},{"line_number":19,"context_line":"memcache lock can be added into proxy-server to reduce backend"},{"line_number":20,"context_line":"requests significanly: when updating shard range cache misses, the"},{"line_number":21,"context_line":"first one (or a few) request will set a global memcache lock before"},{"line_number":22,"context_line":"it send backend request to fetch updating shard ranges from"},{"line_number":23,"context_line":"container servers. And the following cache miss requests will wait"},{"line_number":24,"context_line":"for cache filling to finish, instead of all querying backend"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":2,"id":"73bbf17e_83343d4c","line":21,"updated":"2023-08-25 00:02:59.000000000","message":"having worked on some largish distributed systems; \"global\" and \"lock\" are kind of triggering for me to hear together 😊","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":18,"context_line":"proxy-server/container-server ready for those events, a global"},{"line_number":19,"context_line":"memcache lock can be added into proxy-server to reduce backend"},{"line_number":20,"context_line":"requests significanly: when updating shard range cache misses, the"},{"line_number":21,"context_line":"first one (or a few) request will set a global memcache lock before"},{"line_number":22,"context_line":"it send backend request to fetch updating shard ranges from"},{"line_number":23,"context_line":"container servers. And the following cache miss requests will wait"},{"line_number":24,"context_line":"for cache filling to finish, instead of all querying backend"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":2,"id":"11633efd_88970f02","line":21,"in_reply_to":"73bbf17e_83343d4c","updated":"2024-01-12 06:04:07.000000000","message":"Acknowledged","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":true,"context_lines":[{"line_number":30,"context_line":"In a production environment like this, we have seen cache misses"},{"line_number":31,"context_line":"caused by cache evictions of memcached replicas, and new added"},{"line_number":32,"context_line":"retries will greatly improve the chances to retrieve the cached"},{"line_number":33,"context_line":"shard ranges from the correctly cached replicas."},{"line_number":34,"context_line":""},{"line_number":35,"context_line":"Change-Id: I50ff92441c2f2c49b3034644aba59930e8a99589"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":2,"id":"c9bd0457_d3dfc988","line":33,"updated":"2023-08-25 00:02:59.000000000","message":"yes, I think the conclusion is \"this might seem complicated, but believe me you don\u0027t want to deal with 1K\u0027s of async created per second everytime you have a memcache blip\" - Al has said something along the lines of \"stale cache shard range is always better than no cache shard range\", I think you\u0027re saying something similar \"a bit of waiting in the proxy will save you pain on the backend\"","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":30,"context_line":"In a production environment like this, we have seen cache misses"},{"line_number":31,"context_line":"caused by cache evictions of memcached replicas, and new added"},{"line_number":32,"context_line":"retries will greatly improve the chances to retrieve the cached"},{"line_number":33,"context_line":"shard ranges from the correctly cached replicas."},{"line_number":34,"context_line":""},{"line_number":35,"context_line":"Change-Id: I50ff92441c2f2c49b3034644aba59930e8a99589"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":2,"id":"b29e36e3_98192d24","line":33,"in_reply_to":"c9bd0457_d3dfc988","updated":"2024-01-12 06:04:07.000000000","message":"Acknowledged","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":12,"context_line":"users face: given a cache item that is popular and difficult to"},{"line_number":13,"context_line":"recreate, in the event of cache misses, users could end up with"},{"line_number":14,"context_line":"hundreds (or thousands) of processes slamming the backend database"},{"line_number":15,"context_line":"at the same time in an attempt to refill the same cache content."},{"line_number":16,"context_line":""},{"line_number":17,"context_line":"Here is the way how cooperative token works. When lots of in-flight"},{"line_number":18,"context_line":"callers try to get the cached item specified by key from memcache"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":10,"id":"49809fcb_97da9e4f","line":15,"updated":"2024-03-15 15:00:01.000000000","message":"I think it would also be relevant to expand that OUR problem driving this change isn\u0027t actually the \"overwhelm the backend\" (although I guess we do still sometimes see miss.503) but generally I understand the problem as mostly trying to protect *memcache* from having a bunch of writes cause premature cache eviction under memory pressure. Unless I\u0027m still misunderstanding?","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":12,"context_line":"users face: given a cache item that is popular and difficult to"},{"line_number":13,"context_line":"recreate, in the event of cache misses, users could end up with"},{"line_number":14,"context_line":"hundreds (or thousands) of processes slamming the backend database"},{"line_number":15,"context_line":"at the same time in an attempt to refill the same cache content."},{"line_number":16,"context_line":""},{"line_number":17,"context_line":"Here is the way how cooperative token works. When lots of in-flight"},{"line_number":18,"context_line":"callers try to get the cached item specified by key from memcache"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":10,"id":"dfffe1cc_f40141bf","line":15,"in_reply_to":"49809fcb_97da9e4f","updated":"2024-03-20 04:07:49.000000000","message":"The original ghetto lock actually is designed mostly to avoid the \"overwhelm the backend\" situation. but in our case, yes, you are right, we are killing two birds with one stone. I have added the description of \"protect memcache from having a bunch of writes cause premature cache eviction under memory pressure\" in the commit message.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":16,"context_line":""},{"line_number":17,"context_line":"Here is the way how cooperative token works. When lots of in-flight"},{"line_number":18,"context_line":"callers try to get the cached item specified by key from memcache"},{"line_number":19,"context_line":"and get cache misses, only the first a few number (defined by"},{"line_number":20,"context_line":"``num_tokens``) of query requests will be able to get the cooperative"},{"line_number":21,"context_line":"tokens by creating or incrementing an internal memcache key, and then"},{"line_number":22,"context_line":"those callers with tokens can send backend requests to fetch data"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":10,"id":"26742011_3e641eaf","line":19,"updated":"2024-03-15 15:00:01.000000000","message":"s/defined/somewhat limited by/","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":16,"context_line":""},{"line_number":17,"context_line":"Here is the way how cooperative token works. When lots of in-flight"},{"line_number":18,"context_line":"callers try to get the cached item specified by key from memcache"},{"line_number":19,"context_line":"and get cache misses, only the first a few number (defined by"},{"line_number":20,"context_line":"``num_tokens``) of query requests will be able to get the cooperative"},{"line_number":21,"context_line":"tokens by creating or incrementing an internal memcache key, and then"},{"line_number":22,"context_line":"those callers with tokens can send backend requests to fetch data"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":10,"id":"d0199b78_978d3158","line":19,"in_reply_to":"26742011_3e641eaf","updated":"2024-03-20 04:07:49.000000000","message":"Acknowledged","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":21,"context_line":"tokens by creating or incrementing an internal memcache key, and then"},{"line_number":22,"context_line":"those callers with tokens can send backend requests to fetch data"},{"line_number":23,"context_line":"from backend servers and be able set data into memcache; all other"},{"line_number":24,"context_line":"cache miss requests without a token will have to wait for cache"},{"line_number":25,"context_line":"filling to finish, instead of all querying the backend servers at the"},{"line_number":26,"context_line":"same time. After those requests with token are done, they will release"},{"line_number":27,"context_line":"the token by deleting the internal cache key and finish this usage"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":10,"id":"b70dff21_462354c0","line":24,"updated":"2024-03-15 15:00:01.000000000","message":"s/will have/should/","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":21,"context_line":"tokens by creating or incrementing an internal memcache key, and then"},{"line_number":22,"context_line":"those callers with tokens can send backend requests to fetch data"},{"line_number":23,"context_line":"from backend servers and be able set data into memcache; all other"},{"line_number":24,"context_line":"cache miss requests without a token will have to wait for cache"},{"line_number":25,"context_line":"filling to finish, instead of all querying the backend servers at the"},{"line_number":26,"context_line":"same time. After those requests with token are done, they will release"},{"line_number":27,"context_line":"the token by deleting the internal cache key and finish this usage"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":10,"id":"3ea3dabc_6541e95c","line":24,"in_reply_to":"b70dff21_462354c0","updated":"2024-03-20 04:07:49.000000000","message":"Done","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":24,"context_line":"cache miss requests without a token will have to wait for cache"},{"line_number":25,"context_line":"filling to finish, instead of all querying the backend servers at the"},{"line_number":26,"context_line":"same time. After those requests with token are done, they will release"},{"line_number":27,"context_line":"the token by deleting the internal cache key and finish this usage"},{"line_number":28,"context_line":"session."},{"line_number":29,"context_line":""},{"line_number":30,"context_line":"The original ghetto lock only defines one token for usage, while this"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":10,"id":"e050e61b_13014b4f","line":27,"updated":"2024-03-15 15:00:01.000000000","message":"s/deleting/decrementing/ ???","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":24,"context_line":"cache miss requests without a token will have to wait for cache"},{"line_number":25,"context_line":"filling to finish, instead of all querying the backend servers at the"},{"line_number":26,"context_line":"same time. After those requests with token are done, they will release"},{"line_number":27,"context_line":"the token by deleting the internal cache key and finish this usage"},{"line_number":28,"context_line":"session."},{"line_number":29,"context_line":""},{"line_number":30,"context_line":"The original ghetto lock only defines one token for usage, while this"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":10,"id":"d905343f_118c4888","line":27,"in_reply_to":"e050e61b_13014b4f","updated":"2024-03-20 04:07:49.000000000","message":"Currently the implementation uses delete instead of decrement for avoid some edge cases. For any single request with cooperative token acquired, it can just safely delete the token key after its operations finish. so for the default value 3 of \"num_tokens\", there will be two more duplicate memcache delete operations. But if we use memcache decrement, we need to worry about cases like, what if one request with cooperative token takes too long, even longer than \"token_ttl\"; and what if one request dies and will never do decrement on the token key.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":28,"context_line":"session."},{"line_number":29,"context_line":""},{"line_number":30,"context_line":"The original ghetto lock only defines one token for usage, while this"},{"line_number":31,"context_line":"cooperative token mechanism uses ``num_tokens`` to define the maximum"},{"line_number":32,"context_line":"number of tokens during one usage session, default to be 3. This is"},{"line_number":33,"context_line":"used to increase fault tolerance in the distributed environment, when"},{"line_number":34,"context_line":"one caller process with token hangs or exits, any other requests with"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":10,"id":"a357d752_ba2653b8","line":31,"updated":"2024-03-15 15:00:01.000000000","message":"s/maximum number/minimum limit/","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":28,"context_line":"session."},{"line_number":29,"context_line":""},{"line_number":30,"context_line":"The original ghetto lock only defines one token for usage, while this"},{"line_number":31,"context_line":"cooperative token mechanism uses ``num_tokens`` to define the maximum"},{"line_number":32,"context_line":"number of tokens during one usage session, default to be 3. This is"},{"line_number":33,"context_line":"used to increase fault tolerance in the distributed environment, when"},{"line_number":34,"context_line":"one caller process with token hangs or exits, any other requests with"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":10,"id":"62d8425d_e9ced243","line":31,"in_reply_to":"a357d752_ba2653b8","updated":"2024-03-20 04:07:49.000000000","message":"Acknowledged","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":30,"context_line":"The original ghetto lock only defines one token for usage, while this"},{"line_number":31,"context_line":"cooperative token mechanism uses ``num_tokens`` to define the maximum"},{"line_number":32,"context_line":"number of tokens during one usage session, default to be 3. This is"},{"line_number":33,"context_line":"used to increase fault tolerance in the distributed environment, when"},{"line_number":34,"context_line":"one caller process with token hangs or exits, any other requests with"},{"line_number":35,"context_line":"token still can set new fetched data into memcache and finish the"},{"line_number":36,"context_line":"whole usage session. In very rare case, when all 3 callers with tokens"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":10,"id":"5c0d8606_9dbd1319","line":33,"updated":"2024-03-15 15:00:01.000000000","message":"\"increase fault tolerance in the distributed environment\" 👍","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":30,"context_line":"The original ghetto lock only defines one token for usage, while this"},{"line_number":31,"context_line":"cooperative token mechanism uses ``num_tokens`` to define the maximum"},{"line_number":32,"context_line":"number of tokens during one usage session, default to be 3. This is"},{"line_number":33,"context_line":"used to increase fault tolerance in the distributed environment, when"},{"line_number":34,"context_line":"one caller process with token hangs or exits, any other requests with"},{"line_number":35,"context_line":"token still can set new fetched data into memcache and finish the"},{"line_number":36,"context_line":"whole usage session. In very rare case, when all 3 callers with tokens"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":10,"id":"d1ce3199_9c17cbcb","line":33,"in_reply_to":"5c0d8606_9dbd1319","updated":"2024-03-20 04:07:49.000000000","message":"Acknowledged","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":37,"context_line":"fails, the existing usage session ends after ``token_ttl`` period is"},{"line_number":38,"context_line":"reached and the internal key is expired, then all pending requests"},{"line_number":39,"context_line":"which have no token will exit waiting and fall back to query the"},{"line_number":40,"context_line":"backend (the situation without cooperative token); the new requests"},{"line_number":41,"context_line":"which cache misses and need querying backend after ``token_ttl`` will"},{"line_number":42,"context_line":"start a new round of cooperation session."},{"line_number":43,"context_line":""}],"source_content_type":"text/x-gerrit-commit-message","patch_set":10,"id":"13180b16_78ffea28","line":40,"updated":"2024-03-15 15:00:01.000000000","message":"\"the situation without cooperative token\" 👍","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":37,"context_line":"fails, the existing usage session ends after ``token_ttl`` period is"},{"line_number":38,"context_line":"reached and the internal key is expired, then all pending requests"},{"line_number":39,"context_line":"which have no token will exit waiting and fall back to query the"},{"line_number":40,"context_line":"backend (the situation without cooperative token); the new requests"},{"line_number":41,"context_line":"which cache misses and need querying backend after ``token_ttl`` will"},{"line_number":42,"context_line":"start a new round of cooperation session."},{"line_number":43,"context_line":""}],"source_content_type":"text/x-gerrit-commit-message","patch_set":10,"id":"9bc4ef80_facc086f","line":40,"in_reply_to":"13180b16_78ffea28","updated":"2024-03-20 04:07:49.000000000","message":"Acknowledged","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"92e4d55d3301aa03164d741370e14748e95eedeb","unresolved":true,"context_lines":[{"line_number":26,"context_line":"backend requests to fetch data from backend servers and be able set"},{"line_number":27,"context_line":"data into memcache; all other cache miss requests without a token should"},{"line_number":28,"context_line":"wait for cache filling to finish, instead of all querying the backend"},{"line_number":29,"context_line":"servers at the same time."},{"line_number":30,"context_line":""},{"line_number":31,"context_line":"Change-Id: I50ff92441c2f2c49b3034644aba59930e8a99589"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":15,"id":"9c12a443_e9fcc32e","line":29,"updated":"2024-03-26 15:06:38.000000000","message":"👍 reads great!","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"619e2c2898764382962905311bc930e42956e463","unresolved":false,"context_lines":[{"line_number":26,"context_line":"backend requests to fetch data from backend servers and be able set"},{"line_number":27,"context_line":"data into memcache; all other cache miss requests without a token should"},{"line_number":28,"context_line":"wait for cache filling to finish, instead of all querying the backend"},{"line_number":29,"context_line":"servers at the same time."},{"line_number":30,"context_line":""},{"line_number":31,"context_line":"Change-Id: I50ff92441c2f2c49b3034644aba59930e8a99589"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":15,"id":"2cb9e92e_c0bfdde7","line":29,"in_reply_to":"9c12a443_e9fcc32e","updated":"2024-04-02 03:21:47.000000000","message":"Acknowledged","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4a3409f2c7557fb623f094089756f527fcf64b4b","unresolved":true,"context_lines":[{"line_number":16,"context_line":"time in an attempt to refill the same cache content. And this thundering"},{"line_number":17,"context_line":"herd problem not only often leads to unresponsive backend, and also"},{"line_number":18,"context_line":"those writes into memcached cause premature cache eviction under memory"},{"line_number":19,"context_line":"pressure."},{"line_number":20,"context_line":""},{"line_number":21,"context_line":"With cooperative token, When lots of in-flight callers try to get the"},{"line_number":22,"context_line":"cached item specified by key from memcache and get cache misses, only"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":22,"id":"e84c8bdf_8bc1c79f","line":19,"updated":"2024-04-19 19:45:42.000000000","message":"thank you for expanding on this!","commit_id":"869da30764a12814b42bcbd41a6a4c5bd326c082"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"49205ebacd44070d9afb148c56b81fc9c5274f1b","unresolved":false,"context_lines":[{"line_number":16,"context_line":"time in an attempt to refill the same cache content. And this thundering"},{"line_number":17,"context_line":"herd problem not only often leads to unresponsive backend, and also"},{"line_number":18,"context_line":"those writes into memcached cause premature cache eviction under memory"},{"line_number":19,"context_line":"pressure."},{"line_number":20,"context_line":""},{"line_number":21,"context_line":"With cooperative token, When lots of in-flight callers try to get the"},{"line_number":22,"context_line":"cached item specified by key from memcache and get cache misses, only"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":22,"id":"1494f484_33b7719f","line":19,"in_reply_to":"e84c8bdf_8bc1c79f","updated":"2024-04-19 23:05:14.000000000","message":"Acknowledged","commit_id":"869da30764a12814b42bcbd41a6a4c5bd326c082"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4a3409f2c7557fb623f094089756f527fcf64b4b","unresolved":true,"context_lines":[{"line_number":18,"context_line":"those writes into memcached cause premature cache eviction under memory"},{"line_number":19,"context_line":"pressure."},{"line_number":20,"context_line":""},{"line_number":21,"context_line":"With cooperative token, When lots of in-flight callers try to get the"},{"line_number":22,"context_line":"cached item specified by key from memcache and get cache misses, only"},{"line_number":23,"context_line":"the first a few number (limited by by ``num_tokens``) of query requests"},{"line_number":24,"context_line":"will be able to get the cooperative tokens by creating or incrementing"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":22,"id":"cc569647_6e1b650e","line":21,"updated":"2024-04-19 19:45:42.000000000","message":"s/When/when/","commit_id":"869da30764a12814b42bcbd41a6a4c5bd326c082"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"49205ebacd44070d9afb148c56b81fc9c5274f1b","unresolved":false,"context_lines":[{"line_number":18,"context_line":"those writes into memcached cause premature cache eviction under memory"},{"line_number":19,"context_line":"pressure."},{"line_number":20,"context_line":""},{"line_number":21,"context_line":"With cooperative token, When lots of in-flight callers try to get the"},{"line_number":22,"context_line":"cached item specified by key from memcache and get cache misses, only"},{"line_number":23,"context_line":"the first a few number (limited by by ``num_tokens``) of query requests"},{"line_number":24,"context_line":"will be able to get the cooperative tokens by creating or incrementing"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":22,"id":"c9dcbb44_2e0f4afd","line":21,"in_reply_to":"cc569647_6e1b650e","updated":"2024-04-19 23:05:14.000000000","message":"Done","commit_id":"869da30764a12814b42bcbd41a6a4c5bd326c082"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"f0bb1bfd8815909cded335c731656367e5ef3f80","unresolved":true,"context_lines":[{"line_number":7,"context_line":"common: add memcached based cooperative token mechanism."},{"line_number":8,"context_line":""},{"line_number":9,"context_line":"Memcached based cooperative token is a improved version of ghetto lock,"},{"line_number":10,"context_line":"see the descriptioin of ghetto lock at here:"},{"line_number":11,"context_line":"https://github.com/memcached/memcached/wiki/ProgrammingTricks"},{"line_number":12,"context_line":"It\u0027s used to avoid the thundering herd situation which many caching"},{"line_number":13,"context_line":"users face: given a cache item that is popular and difficult to"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":58,"id":"1ced0519_130f81a0","line":10,"range":{"start_line":10,"start_character":8,"end_line":10,"end_character":20},"updated":"2025-09-05 17:20:22.000000000","message":"s/descriptioin/description/","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"4c1274f7efe1429f2f72426dec4e95e955fbee47","unresolved":false,"context_lines":[{"line_number":7,"context_line":"common: add memcached based cooperative token mechanism."},{"line_number":8,"context_line":""},{"line_number":9,"context_line":"Memcached based cooperative token is a improved version of ghetto lock,"},{"line_number":10,"context_line":"see the descriptioin of ghetto lock at here:"},{"line_number":11,"context_line":"https://github.com/memcached/memcached/wiki/ProgrammingTricks"},{"line_number":12,"context_line":"It\u0027s used to avoid the thundering herd situation which many caching"},{"line_number":13,"context_line":"users face: given a cache item that is popular and difficult to"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":58,"id":"5caa2c8a_9f22cf77","line":10,"range":{"start_line":10,"start_character":8,"end_line":10,"end_character":20},"in_reply_to":"1ced0519_130f81a0","updated":"2025-09-18 04:45:01.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"f0bb1bfd8815909cded335c731656367e5ef3f80","unresolved":true,"context_lines":[{"line_number":13,"context_line":"users face: given a cache item that is popular and difficult to"},{"line_number":14,"context_line":"recreate, in the event of cache misses, users could end up with hundreds"},{"line_number":15,"context_line":"(or thousands) of processes slamming the backend database at the same"},{"line_number":16,"context_line":"time in an attempt to refill the same cache content. And this thundering"},{"line_number":17,"context_line":"herd problem not only often leads to unresponsive backend, and also"},{"line_number":18,"context_line":"those writes into memcached cause premature cache eviction under memory"},{"line_number":19,"context_line":"pressure."},{"line_number":20,"context_line":""},{"line_number":21,"context_line":"With cooperative token, when lots of in-flight callers try to get the"},{"line_number":22,"context_line":"cached item specified by key from memcache and get cache misses, only"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":58,"id":"20e2b565_12c3380f","line":19,"range":{"start_line":16,"start_character":53,"end_line":19,"end_character":9},"updated":"2025-09-05 17:20:22.000000000","message":"this isn\u0027t a valid sentence as it is written. Maybe:\n\n```\nThis thundering\nherd problem not only often leads to unresponsive backend; \nthose writes into memcached cause premature cache eviction under memory\npressure.\n```","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"4c1274f7efe1429f2f72426dec4e95e955fbee47","unresolved":false,"context_lines":[{"line_number":13,"context_line":"users face: given a cache item that is popular and difficult to"},{"line_number":14,"context_line":"recreate, in the event of cache misses, users could end up with hundreds"},{"line_number":15,"context_line":"(or thousands) of processes slamming the backend database at the same"},{"line_number":16,"context_line":"time in an attempt to refill the same cache content. And this thundering"},{"line_number":17,"context_line":"herd problem not only often leads to unresponsive backend, and also"},{"line_number":18,"context_line":"those writes into memcached cause premature cache eviction under memory"},{"line_number":19,"context_line":"pressure."},{"line_number":20,"context_line":""},{"line_number":21,"context_line":"With cooperative token, when lots of in-flight callers try to get the"},{"line_number":22,"context_line":"cached item specified by key from memcache and get cache misses, only"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":58,"id":"7f948761_2afd63a9","line":19,"range":{"start_line":16,"start_character":53,"end_line":19,"end_character":9},"in_reply_to":"20e2b565_12c3380f","updated":"2025-09-18 04:45:01.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"f0bb1bfd8815909cded335c731656367e5ef3f80","unresolved":true,"context_lines":[{"line_number":19,"context_line":"pressure."},{"line_number":20,"context_line":""},{"line_number":21,"context_line":"With cooperative token, when lots of in-flight callers try to get the"},{"line_number":22,"context_line":"cached item specified by key from memcache and get cache misses, only"},{"line_number":23,"context_line":"the first a few number (limited by by ``num_tokens``) of query requests"},{"line_number":24,"context_line":"will be able to get the cooperative tokens by creating or incrementing"},{"line_number":25,"context_line":"an internal memcache key, and then those callers with tokens can send"},{"line_number":26,"context_line":"backend requests to fetch data from backend servers and be able set"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":58,"id":"802641c5_f16287c3","line":23,"range":{"start_line":22,"start_character":65,"end_line":23,"end_character":71},"updated":"2025-09-05 17:20:22.000000000","message":"```\n..., only the first few query requests (limited by by ``num_tokens``) will be able...\n```","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"4c1274f7efe1429f2f72426dec4e95e955fbee47","unresolved":false,"context_lines":[{"line_number":19,"context_line":"pressure."},{"line_number":20,"context_line":""},{"line_number":21,"context_line":"With cooperative token, when lots of in-flight callers try to get the"},{"line_number":22,"context_line":"cached item specified by key from memcache and get cache misses, only"},{"line_number":23,"context_line":"the first a few number (limited by by ``num_tokens``) of query requests"},{"line_number":24,"context_line":"will be able to get the cooperative tokens by creating or incrementing"},{"line_number":25,"context_line":"an internal memcache key, and then those callers with tokens can send"},{"line_number":26,"context_line":"backend requests to fetch data from backend servers and be able set"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":58,"id":"bbf49027_3ad4099d","line":23,"range":{"start_line":22,"start_character":65,"end_line":23,"end_character":71},"in_reply_to":"802641c5_f16287c3","updated":"2025-09-18 04:45:01.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"f0bb1bfd8815909cded335c731656367e5ef3f80","unresolved":true,"context_lines":[{"line_number":23,"context_line":"the first a few number (limited by by ``num_tokens``) of query requests"},{"line_number":24,"context_line":"will be able to get the cooperative tokens by creating or incrementing"},{"line_number":25,"context_line":"an internal memcache key, and then those callers with tokens can send"},{"line_number":26,"context_line":"backend requests to fetch data from backend servers and be able set"},{"line_number":27,"context_line":"data into memcache; all other cache miss requests without a token should"},{"line_number":28,"context_line":"wait for cache filling to finish, instead of all querying the backend"},{"line_number":29,"context_line":"servers at the same time."}],"source_content_type":"text/x-gerrit-commit-message","patch_set":58,"id":"e85c5d5c_608d2cc4","line":26,"range":{"start_line":26,"start_character":56,"end_line":26,"end_character":67},"updated":"2025-09-05 17:20:22.000000000","message":"s/be able set/be able to set/","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"4c1274f7efe1429f2f72426dec4e95e955fbee47","unresolved":false,"context_lines":[{"line_number":23,"context_line":"the first a few number (limited by by ``num_tokens``) of query requests"},{"line_number":24,"context_line":"will be able to get the cooperative tokens by creating or incrementing"},{"line_number":25,"context_line":"an internal memcache key, and then those callers with tokens can send"},{"line_number":26,"context_line":"backend requests to fetch data from backend servers and be able set"},{"line_number":27,"context_line":"data into memcache; all other cache miss requests without a token should"},{"line_number":28,"context_line":"wait for cache filling to finish, instead of all querying the backend"},{"line_number":29,"context_line":"servers at the same time."}],"source_content_type":"text/x-gerrit-commit-message","patch_set":58,"id":"29632740_a10403f4","line":26,"range":{"start_line":26,"start_character":56,"end_line":26,"end_character":67},"in_reply_to":"e85c5d5c_608d2cc4","updated":"2025-09-18 04:45:01.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"}],"/PATCHSET_LEVEL":[{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"a6a59d8e442cb63d30a6673c398ae6801807a646","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"ab8fb68c_c9b6adc6","updated":"2023-08-01 14:18:22.000000000","message":"I\u0027d like to see this new global-memcache-lock implemented as a new testable primative in utils; then applied for the shard range fetching case.\n\nan ideal implementation might allow some small \"N\" requestors - see the lock_path context manager.\n\nI think we want to ensure than when there\u0027s a memcache failure(s) leading to lock-timeout we fall through to the default request-to-root behavior, we\u0027ll already be significantly slowing down requests to the backend.\n\nNeat idea! Probably worth pursuit - KUDOS!","commit_id":"c2ded7e6c723660bd6175fda1180d97d8674f2cc"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"546eeb322579a6a8e9c5fb2b9b5e580377da6316","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"9bc188f6_c506afbf","updated":"2023-08-01 05:45:51.000000000","message":"No test yet, try to collect the feedbacks on the overall approach.","commit_id":"c2ded7e6c723660bd6175fda1180d97d8674f2cc"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f30c62ac0176bd7a4d71ed142aa2769a91d48f87","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":1,"id":"61291c9a_704f9308","updated":"2023-08-02 06:12:29.000000000","message":"Thanks for the review! Moved lock set/clear function to utils, will add tests later.","commit_id":"c2ded7e6c723660bd6175fda1180d97d8674f2cc"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"c3b8364f493da69015ff4e05c39233025b9a396d","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"f161beba_2839631d","updated":"2023-08-02 16:39:03.000000000","message":"FWIW, memcache locking seems to have been something of an idea at some point: https://github.com/openstack/swift/commit/69b18e3c\n\nI don\u0027t think we ever had an actual implementation, though.","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":2,"id":"b0174b94_c6b26a1c","updated":"2023-08-25 00:02:59.000000000","message":"I have this all torn up on my local branch, but I did NOT get anywhere close to a working solution. I\u0027ll keep hacking at it.\n\nI\u0027m sure my overall sentiment is:\n\n1) I love this idea\n2) I hate the name \"global|cache\" *lock*\n3) I think regardless of what we call it ....\n\nit will be sufficently complex that we\u0027ll want to test the *mechanism* of throttling cache filling *independently* of the specific behavior - I really really want it to be an *abstraction*\n\nget_from_info_cache_or_memcache_or_fill_from_backend_with_backoff_but_always_make_sure_its_in_cache_in_the_end_and_dont_return_until_it_is_or_timeout()\n\n^ that seems like a useful concept (needs a shorter name)\n\nI think having a couple of functions in utils is a good start, but in psudeo code the direction I\u0027m thinking is actually going to use some closures to pass in partials:\n\n\tdef get_from_cache_like_skip_or_fallback(get_f, set_f, fallback_f, skip_chance\u003d0):\n\t if skip_chance and random.random() \u003c skip_chance:\n\t\tstate \u003d \u0027skip\u0027\n\t else:\n\t\tvalue, state \u003d get_f()\n\t if not value:\n\t\tvalue, state \u003d fallback_f()\n\t\tif value:\n\t\t set_f(value)\n\t return value, state\n\n... but it\u0027s probably not even remotely clear that something like that would be better, much less obvious - and working code wins - so you can ignore my intuition until I have something to show for it (just ran out of time today, and the existing implementatoin has some fairly sophsiticated behaviors!)","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"9af500b9230a35de9ed57d87146aa69ad63444f4","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":3,"id":"fa31293b_f2e43cf2","updated":"2023-08-28 16:15:03.000000000","message":"I messed this all up 😭\n\nI\u0027ll do (at least) one more respin","commit_id":"931b802516555363c91c0443eb73a1bef69f0e72"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"f74757fb_e28d9f8d","updated":"2024-01-12 20:27:10.000000000","message":"I think moving the business logic out into it\u0027s own method and making fetch_from_backend generic was a win. I could get onboard with \"fetch_from_backend should set backend_response and return the normalized form\" but I think there\u0027s probably other ways you could do that as well. e.g. rather than have ONE method that\u0027s NotImplemented that does TWO things - have TWO methods that are NotImplemented that each do ONE thing.\n\nI think some logging/stats in the base class would be very useful to us as we roll this out and try to learn how it\u0027s behaving.\n\nHow do you plan to test this?","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"88f30246_9ffa3337","updated":"2024-01-16 19:52:53.000000000","message":"Thanks so much for the reviews!","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"eba25903961438b8b6cf0e33fc904d83392f6fa5","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":4,"id":"b7eee564_18c0b8c4","updated":"2024-01-12 06:12:56.000000000","message":"will need break up this patch into two, one for the base class and one for the actual use in the updating shard range path; also needs test cases.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"8c3edc7ef38309ffa5ba0a73112d3322f152bf98","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":5,"id":"eaf24552_3f7dd820","updated":"2024-01-16 19:58:47.000000000","message":"Still no test yet. after reviewers agree on the interface, I will add test cases for the base class and shard range cache token. And I plan to test it with testing cluster as well by evicting memcached cache.","commit_id":"b8d41c9360dbd0604a5143f9175631ff25781d09"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"fad23fcc9a8765b6d96b3b325398b7ba3746e17a","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":6,"id":"50ed225a_a2dfaf2b","updated":"2024-01-22 16:38:52.000000000","message":"This is a really nice idea and conceptually I look forward to having the capability. I\u0027m not keen on the current interface and mixing of concerns - I\u0027d like the cooperative token to not need to know what it is governing, just be a yes/no/wait gating function. I left some ideas inline.\n\nSomeone once said \"Given N reviewers you can expect to have N+1 differing opinions\". I hope my opinion doesn\u0027t cause too much trouble ;-)","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"756326a04f2d9e7765486dcf6dedff466d07f10e","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":6,"id":"82c0b25b_75cbf85e","updated":"2024-01-25 00:32:59.000000000","message":"recheck","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"b0a0fc43e185708a736e7ab41f5aa910fefedd76","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":6,"id":"831a109a_1ad2cc81","updated":"2024-02-07 19:28:53.000000000","message":"saw this in email and wanted to encourge you to try it out! don\u0027t be afraid to write code we decide to throw away - helps us learn all the ways we don\u0027t want to do it.","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"8007916b6068566286569c73cf26d26c3d40b414","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":6,"id":"d55445bd_228879ca","updated":"2024-02-14 05:07:15.000000000","message":"thanks a lot for the reviews and suggestions.","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"924950a76e5d1ee1e04f9800ca3820f1a68a3d91","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":8,"id":"ef68d774_004dff54","updated":"2024-02-15 12:58:23.000000000","message":"I like the context-manager ;) I\u0027m not quite convinced that the trade-offs in making the CooperativeCachePopulator generic are working. But I think with some more refining this will be great.","commit_id":"f3f3e1fe92153ff527ac4ce9ff49ade7be1e85c4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42942d7218e212d05f5f4a19dd84f057c97bfb86","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":8,"id":"8fc59683_b1b0bfaf","updated":"2024-02-20 04:46:55.000000000","message":"thanks for the reviews!","commit_id":"f3f3e1fe92153ff527ac4ce9ff49ade7be1e85c4"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"4251cfa176a1748522e900fb9664a6ae2dac966e","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":10,"id":"645d7e23_49787e29","updated":"2024-03-15 13:35:53.000000000","message":"I think I may have more comments in the follow on patch where this is used","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":10,"id":"3bcbe8a6_c94a3fe0","updated":"2024-03-15 15:00:01.000000000","message":"I think this change adds a new utils helper with no callers and no tests - that\u0027s no a very intresting (or mergeable) change on it\u0027s own; so I\u0027m going to go further down the patch chain and see what I can learn.\n\nI think the need and the use-case is well described in the commit message - kudos! We should definately be working on this.\n\nI think the implementation/interface may require some iteration to get all the maintainers comfortable with it; there\u0027s a chance this new primative could have a significant impact across a large swath of critical code-paths - it\u0027s worth the investment to make it the best we can.\n\nPlease don\u0027t worry too much about my comments; I\u0027m just ramping up and hitting a few surprises is expected: https://medium.com/@paulotaylor/code-quality-measurement-wtfs-minute-774225fb7394\n\nWhat I *would* like you to worry about (very seriously) is HOW are we going to test this thing\u0027s race conditions? We *need* to have tests that start up multiple consumers that inject pauses at controlled sync points so that we can easily instrument a variety of race conditions and understand the failure modes we want to accept. I expect the test infra is going to be 2-4 times more code than the implementation (if the implementation is any good). And honestly I don\u0027t care that much about the implementation - if the tests can demonstrate it works the way we want under all the failure modes and races we care about then it gets to be as ugly as it needs to be in order to achive our definition of correctness. Please start playing around with eventlet.Event() and a \"pausable fake memcache\" so we can eventlet.spawn a couple populate_cache_with_cooperative_token threads and see where the dragons are at.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":7233,"name":"Matthew Oliver","email":"matt@oliver.net.au","username":"mattoliverau"},"change_message_id":"288c0cdfc2d2834c8643a477cd994323358d11e6","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":10,"id":"a75d5143_78ccef22","updated":"2024-03-13 21:59:30.000000000","message":"Obviously some tests that test this out would be awesome. Especally as that\u0027ll help demonstraight how this generic coop token tooling works.\n\nI assume testing is the reason you have the fake memcached patch up in gerrit 😊","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"c783da2689193d11c60a978c512ea3f46be8dc61","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":14,"id":"d7a5fc22_c3a8d715","updated":"2024-03-22 18:04:02.000000000","message":"I think the context manager approach helped emphasise separation of concerns, but I can see that the interface might not have been intuitive, so I\u0027m fine with working harder on a class based solution. At the end of the day it\u0027s about how execution flows between the \"helper\" coop token thing and the caller\u0027s concrete behaviours, and getting an appropriate separation of concerns.\n\nI still feel that there is a simpler primitive: \"try to get a coop token and do either A or B based on the outcome\". That primitive should not have to care what A and B do, just that they are callables. Then we can reason about and test the coop token primitive separately from the business of fetching and caching data.\n\nMaybe the primitive is a superclass of what you have here?\n\nI\u0027d also like to see if we can just pass in two callables A() and B() - I think the for and else clauses I had here https://review.opendev.org/c/openstack/swift/+/913425/comment/f581a1a6_26f6102b/ *might* be a basis for what A() and B() would do.\n\nThese are just my thoughts/ideas and I\u0027m sorry I have not had time to flesh them out in an editor! Thanks for your efforts @JianJian!","commit_id":"50bc9bc0e169490bd9506ff77ff468e59d771564"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"92e4d55d3301aa03164d741370e14748e95eedeb","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":15,"id":"ed4f491a_3f9c4c7c","updated":"2024-03-26 15:06:38.000000000","message":"I have to admit I find this a little easier to read than the sleepy iterable context manager! I don\u0027t think that proves there\u0027s not room for a lower order primative that\u0027s less opionated than this class which would still be useful for testing... but I\u0027m definately thinking about the whole domain at the level of \"make backend requests and populate memcache\" and w/o a use-case for the cooperative-token outside of that context I\u0027m not sure I can invent what the interface for the lower order primative should be.\n\nThis \"ghetto lock\" interface is just NOT the same interface as a lock that blocks until you win or hit a timeout, it\u0027s WEIRDER than that, right?\n\nwith explict get_token/delete_token:\n\n try:\n am_winner \u003d get_token() # raises CooperationError on incr fail\n if am_winner:\n try:\n fetch_from_backend_and_set_in_memcache()\n except:\n pass\n else:\n delete_token()\n else:\n sleep_and_wait_on_memcache() # raises CooperationError on memcache tll\n except CooperationError:\n fetch_from_backend_and_set_in_memcache()\n\nwith a context manager:\n\n try:\n try:\n with cooperative_token() as ctx:\n # am winner, token will be deleted when I\u0027m done\n try:\n fetch_from_backend_and_set_in_memcache()\n except:\n # oops!?\n ctx.should_delete \u003d False\n except CooperativeLooser:\n sleep_and_wait_on_memcache()\n except CooperationError:\n fetch_from_backend_and_set_in_memcache()\n\nThe sleep_iter does seem like it\u0027s less code; but it\u0027s so hard for me to reason about I might be missing something:\n\n with sleepy_iter() as ctx:\n for i in ctx:\n data \u003d check_memcache()\n if data:\n return data\n try:\n return fetch_from_backend_and_set_in_memcache()\n except:\n ctx.should_delete \u003d False\n\n... and there\u0027s certainly a question about *when* we should allow more requests/token-winners after we hit a `fetch_from_backend_set_in_memcache` error. I don\u0027t think we want a whole fresh session; so at most maybe one additional token winner (do we want to \"interrupt\" loosers that had been waiting on memcache or just wait for the next request). I think it *might* be close to ideal to just let everyone stall up to the full memcache_wait_ttl_timeout and then start a fresh batch - but I guess at *somepoint* we need a degenerate degradation into the existing thundering hurd case (maybe after three guys timeout/die, that\u0027s enough?)\n\nSince we\u0027re using a class please consider dropping all the \"pass in functions\"/dependency-injection pattern in favor of a boring old OOO abstract/concreate subclass based pattern.","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"619e2c2898764382962905311bc930e42956e463","unresolved":true,"context_lines":[],"source_content_type":"","patch_set":15,"id":"f34cebc7_c68caf75","in_reply_to":"ed4f491a_3f9c4c7c","updated":"2024-04-02 03:21:47.000000000","message":"thanks for the ideas! I have adopted the first idea, to explicitly delete token in the main function.\n\nOn \"a lower order primitive\" for not only cooperative token and also for other situationss like cooperations between object expirer, after spending some time and thinking on it, I feel this primitive is simple enough (in our case, just memcache incr and delete), maybe different categories of usage situations can just call those basic memcache operations? and different usage situations probably will act differently based on the results of those memcache operatioins.\n\nIt\u0027s hard to implement a basic cooperative_token() context manager, mainly because the cooperative token is only removed when any request with a token finishes both backend fetching and memcache set successful. Since we have the class to encapsulate the token get and release, I feel a lower level context manager is not necessary.\n\n\u003e Since we\u0027re using a class please consider dropping all the \"pass in functions\"/dependency-injection pattern in favor of a boring old OOO abstract/concreate subclass based pattern.\n\nAgreed. I will drop all injection functions and use subclasses after I finish all the unit test and testing.","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"50192330b0eaa55928bcecef326b623b9faae22f","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":15,"id":"bb4492d1_0a710498","in_reply_to":"f34cebc7_c68caf75","updated":"2024-09-25 16:09:59.000000000","message":"Done","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"619e2c2898764382962905311bc930e42956e463","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":18,"id":"360cfe1e_12c0bd2f","updated":"2024-04-02 03:21:47.000000000","message":"thanks very much for the reviews!","commit_id":"f75df28ff4b418427bcd7de6a2696998253ec56d"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"781689035c4c047edcd3df8f621a2067e678f68c","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":22,"id":"1e0dbbc7_49aa0275","updated":"2024-04-04 18:15:38.000000000","message":"recheck\nunrelated container probe test failures.","commit_id":"869da30764a12814b42bcbd41a6a4c5bd326c082"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"90cc4c9845589c3cd20fbd3b6e065b82b17b0348","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":25,"id":"d9760d3b_66dcbc44","updated":"2024-04-18 11:47:10.000000000","message":"I have started an etherpad to discuss ideas\n\n https://etherpad.opendev.org/p/swift-cooperative-memcache-token\n \nI think that may be more efficient than exchanging gerrit comments.","commit_id":"9f475b7a7bae512c7cb7c53ab12c58e388dc1257"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4a3409f2c7557fb623f094089756f527fcf64b4b","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":25,"id":"8e811e42_e93492b2","updated":"2024-04-19 19:45:42.000000000","message":"Is there a way to test this under a more realistic load w/o a consumer or do need to carry this patch AND the next patch to get to see it in action?","commit_id":"9f475b7a7bae512c7cb7c53ab12c58e388dc1257"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1e6f89ad9d2f1f4afd8604a2ae65f6ee0d7fb494","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":25,"id":"a1fceded_d4012d6e","updated":"2024-04-19 03:36:39.000000000","message":"thanks for the reviews!","commit_id":"9f475b7a7bae512c7cb7c53ab12c58e388dc1257"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1e6f89ad9d2f1f4afd8604a2ae65f6ee0d7fb494","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":25,"id":"80791380_552de893","in_reply_to":"d9760d3b_66dcbc44","updated":"2024-04-19 03:36:39.000000000","message":"Thanks for starting the etherpad. I added more comments there.","commit_id":"9f475b7a7bae512c7cb7c53ab12c58e388dc1257"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"8046b1825440bdbfc3bc34d7b36ea2cfcc487e38","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":26,"id":"2dde56da_d8b10c58","updated":"2024-04-22 03:31:25.000000000","message":"recheck\n\nssh and rsync failures were gone, but there was an unrelated probe test failure: test_reconciler_move_object_twice","commit_id":"4dd49346f9f2e84b7f83f84cbf9db231c08997ae"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cd4e8481ed4722ab9a5f80e36cf7768055e4ea26","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":26,"id":"6bc9db9b_aeb1ab7a","updated":"2024-04-21 21:11:06.000000000","message":"recheck\n\nunrelated rsync error: \"ssh: connect to host 23.253.56.19 port 22: Connection timed out\nrsync: connection unexpectedly closed (0 bytes received so far) [Receiver]\"","commit_id":"4dd49346f9f2e84b7f83f84cbf9db231c08997ae"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"b4217f40894554248d7a7ede197350387c5a2099","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":26,"id":"9f4840fe_39c3136c","updated":"2024-04-22 01:00:04.000000000","message":"recheck\nstill the unrelated ssh and rsync failures.\n\"ssh: connect to host 23.253.56.19 port 22: Connection timed out\nrsync: connection unexpectedly closed (0 bytes received so far) [Receiver]\"","commit_id":"4dd49346f9f2e84b7f83f84cbf9db231c08997ae"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"49205ebacd44070d9afb148c56b81fc9c5274f1b","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":26,"id":"224d916e_7d8d5cbd","updated":"2024-04-19 23:05:14.000000000","message":"thanks for the reviews!","commit_id":"4dd49346f9f2e84b7f83f84cbf9db231c08997ae"},{"author":{"_account_id":7233,"name":"Matthew Oliver","email":"matt@oliver.net.au","username":"mattoliverau"},"change_message_id":"7b315613f27a881219393fdb50c21e051627af79","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":32,"id":"2b5720d6_00cf67ff","updated":"2024-07-08 07:46:50.000000000","message":"just pushing up initual comments. No show stoppers 😊","commit_id":"a2d4b75fb2e361d8ecdadbbc525129602760ec25"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f3064c1509afef499bf100fa5ef7b516368dcf6c","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":32,"id":"9126e925_24ec1e03","updated":"2024-07-09 14:28:54.000000000","message":"thanks for the reviews.","commit_id":"a2d4b75fb2e361d8ecdadbbc525129602760ec25"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"50192330b0eaa55928bcecef326b623b9faae22f","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":35,"id":"6b6e9c05_01c072dd","updated":"2024-09-25 16:09:59.000000000","message":"recheck","commit_id":"972579556d4137efee254497cc131d52db781c03"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"54439fa07fb5d989285db550cd4758e372c916e7","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":37,"id":"b9c7456e_0aa40689","updated":"2024-10-25 02:37:13.000000000","message":"recheck\n\npy38 should be happy now, too.","commit_id":"a4c4d393c3059cf22f5c995df5c080b5811d61c0"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"f5bec38371925518f53ca34e1cfcc7d358f0ce77","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":37,"id":"5b07a1d5_35949f6f","updated":"2024-10-24 18:54:15.000000000","message":"recheck\n\npyeclib issues should (hopefully!) be resolved now.","commit_id":"a4c4d393c3059cf22f5c995df5c080b5811d61c0"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"0e92a04b108667ce615385557e82b7fb4b37418c","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":37,"id":"91024c91_fac6c017","updated":"2024-10-24 03:11:40.000000000","message":"recheck\n\nunrelated TIMED_OUT probe test.","commit_id":"a4c4d393c3059cf22f5c995df5c080b5811d61c0"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"da96d051980e0361a2227b0400f6fa6e3750980e","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":37,"id":"c07a4ede_67b04e2e","updated":"2024-10-25 15:30:16.000000000","message":"recheck\nswift-probetests-centos-9-stream passed but timedout","commit_id":"a4c4d393c3059cf22f5c995df5c080b5811d61c0"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"49dd3537db5741f8d4531ffa1110fcd73b6f2811","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":37,"id":"f360b6a2_e787a4e0","updated":"2024-10-24 01:03:53.000000000","message":"thanks for the reviews!","commit_id":"a4c4d393c3059cf22f5c995df5c080b5811d61c0"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"df0123ddc6a5118de24bcab42bf97548be03393e","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":38,"id":"e468c5ec_06e43600","updated":"2025-02-05 19:43:04.000000000","message":"Abstractions are ultimately impossible; this may very well be our best-effort. IMHO it\u0027s use in the follow-on seems reasonable enough that we can probably maintain it. My gut suggests a more OOP design would be more flexible to maintain in the future but 1) my hypothetical code isn\u0027t written yet and 2) we only have one concrete use-case today.\n\nHopefully if we ever grow a second use-case it won\u0027t be too hard to update these tests and still keep cooperative-namespace-caching working. We should probably wait to +A this until we\u0027re ready to merge it\u0027s one-and-only use-case in the follow-up.","commit_id":"4249d3819d24c6e4ba0c440301a6ba935846f0a1"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1e31abe36b5c77333acfe388107d315e43177622","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":43,"id":"b1883ce3_4974dcea","updated":"2025-05-01 21:34:53.000000000","message":"Thanks for the review and help!","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"fd187de69854f2137151bab18c2f131b23d38789","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":43,"id":"b88f225c_4a3e6298","updated":"2025-04-30 20:19:47.000000000","message":"Threw some ideas in https://review.opendev.org/c/openstack/swift/+/948585","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"a89ce01fc06197ae7d361499c8aefb52f52772c0","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":44,"id":"dc1326d5_cf328ec0","updated":"2025-05-02 17:56:01.000000000","message":"Holding off on +A until we\u0027ve got https://review.opendev.org/c/openstack/swift/+/908969 ready to merge, too.","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"9b5f9df7ef8e676e7130812d1a8131291f80b2a1","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":44,"id":"8c077129_b5103d96","updated":"2025-05-05 21:32:09.000000000","message":"I think the problem with merging this as-is that makes it *harder* to maintain swift going forward will come from someone explicitly setting:\n\n```\nnamespace_cache_use_token \u003d False\nnamespace_cache_tokens_per_session \u003d 3\n```\n\nThen when we *fix* `num_token \u003c 1` to mean the only possible reasonable thing it *could* mean (i.e. don\u0027t lookup tokens and go directly to backend) \n\n948833: num_token \u003d 0 is go slow button | https://review.opendev.org/c/openstack/swift/+/948833\n\n... we have to maintain the redundant `namespace_cache_use_token` *forever* (i.e. with the above config they get direct to backend, so if we just ignore use_token we\u0027ve changed the semantic meaning; even tho it\u0027d be easier to spell it like `num_token \u003d 0` b/c what else could that mean?)\n\nI think we could add an independent `token_ttl` and fix fix the weird `retry_interval * 1.5` in a follow-up, but if someone managed to tune this implementation the way they wanted it may be difficult to get an equivalent default without checking for `if token_ttl not in config: token_ttl \u003d interval * 10` which I still don\u0027t see why that would be what anyone wants; presumably this thing could be used for all kinds of variety of backend_reqeusts that may be quick or slow and totally unrelated to how you want your memcache polling to behave.\n\nI haven\u0027t attempted to code it up, but I expect some of the external use of the `populator.set_cache_state` would simplify if we moved the `record_cache_op` for the set into the `CooperativeCachePopulator`.\n\nI don\u0027t really understand the reasoning for:\n\n```\n ns_bound_list, get_cache_state \u003d get_namespaces_from_cache(\n req, cache_key, skip_chance)\n response \u003d None\n if not ns_bound_list:\n # namespaces not found in memcache or cache was skipped, so pull\n # the full set of updating shard ranges from the backend and set in\n # the memcache.\n cache_populator_cls \u003d (\n CooperativeNamespaceCachePopulator\n if self.app.namespace_cache_use_token\n else DirectNamespaceCachePopulator\n )\n cache_populator \u003d cache_populator_cls(\n self, self.logger, account, container, req, cache_key)\n ns_bound_list \u003d cache_populator.fetch_data()\n```\n\nas opposed to *just*\n\n```\n cache_populator \u003d CooperativeNamespaceCachePopulator(\n self, req, account, container)\n ns_bound_list \u003d cache_populator.fetch_data()\n```\n\nThe `CooperativeNamespaceCachePopulator` should know how to lookup everything it needs from the `ctrl` and `req` - we should make the args list smaller. It already HAS to know how to `get_namespaces_from_cache` for the polling. It would be a nice abstraction if it handled the `skip_cache` stuff automatically since that\u0027s also always what you want. And it could also form an opinion on how to increment the cache_op for the reads when you miss|skip, but then loose|wait and hit (i.e. no set)","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"fd079a543129d979551147dd455b511d4d553c6e","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":44,"id":"d3ef1c65_f0f01f3b","updated":"2025-05-07 05:08:33.000000000","message":"thanks for the reviews!","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"fd079a543129d979551147dd455b511d4d553c6e","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":44,"id":"8e85ff6e_d0051587","in_reply_to":"8c077129_b5103d96","updated":"2025-05-07 05:08:33.000000000","message":"Good idea to further simplify the code path by combining ``DirectNamespaceCachePopulator`` and ``CooperativeNamespaceCachePopulator`` together, thanks!","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"6cb4c00b5d866f0adfc367d233fc084dadc6126f","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":51,"id":"d17a2259_dd657b56","updated":"2025-05-13 22:09:40.000000000","message":"I confused myself trying to understand the lack_retires tests in the follow-on change\n\nturns out I do sort of understand how lack_retries works but mis-understood the proxy tests using a variable \"retries\" to mean \"how many times we get from memcache\" (which includes one extra get that happens *outside* of the `_sleep_and_retry_memcache` loop).","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"bc33ca00177d71d1674f2f9caec1a06429b7cb19","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":51,"id":"2335e882_848766b9","updated":"2025-05-13 14:20:39.000000000","message":"much improved, don\u0027t love the stats.","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":58,"id":"0a7865f0_da31c030","updated":"2025-09-08 15:33:27.000000000","message":"The CooperativeCachePopulator is stateful and as such cannot be re-used without the state getting muddled. I\u0027d like to see re-use of fetch_data prevented to avoid accidental bugs in the future.\n\nI think the stats labels could be initialised more clearly (see my diff), and there\u0027s a couple of cases where they seem to be wrong/lacking:\n\n* the inc_error doesn\u0027t actually get reported\n* memcache errors while getting data are not distinguished from memcache misses.\n\nI was surprised that the class is *not* responsible for an *initial* attempt to fetch data from memcache, but *is* responsible for setting data in memcache. Perhaps that will become clear when I get to the next patch.\n\nI wonder about the attributes of the CooperativeCachePopulator that are not \u0027_\u0027 prefixed and are repeatedly asserted in tests - do they need to be part of the class interface? they seem to be implementation details. Having them tested so much makes the implementation brittle (I am guilty of that mistake myself :( )\n\nI haven\u0027t reviewd the concurrent tests yet, but wanted to push the comments I have so far.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"f0bb1bfd8815909cded335c731656367e5ef3f80","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":58,"id":"14b70e05_662c8b96","updated":"2025-09-05 17:20:22.000000000","message":"only a partial review - I\u0027m going to keep going next week, I didn\u0027t actually get to the meaty stuff yet!","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"4c1274f7efe1429f2f72426dec4e95e955fbee47","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":58,"id":"92e0aa97_ebcf1c2a","updated":"2025-09-18 04:45:01.000000000","message":"thanks for the review and refactoring! have addressed comments in the main class, still working on comments on memcache helper and test.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":59,"id":"8ce7a98d_0b7d2062","updated":"2025-09-22 05:47:05.000000000","message":"thanks a lot for the thorough review and help!","commit_id":"c8884fc0e33fa30c6646b94aa694312060b7d609"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"f3c176a0b2b9e61da8a19a7513ee2e08b18fc30b","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":60,"id":"7e5b1027_5949b9f4","updated":"2025-09-25 22:35:07.000000000","message":"I honestly spent more time thinking about this patch as a \"means to and end\"\n\n908969: proxy: use cooperative tokens to coalesce updating shard range requests into backend | https://review.opendev.org/c/openstack/swift/+/908969\n\n... than thinking about this patch standing on it\u0027s own.\n\nHonestly it\u0027s hard to get a \"general use tool\" correct when you only have one concrete example actually implemented. I think if I re-reviewed this patch I\u0027d *mostly* decide I can live with it in order to get us cooperative-updating-shard-cache population.\n\nSome stuff I remember sticking out:\n\n1) the memcache retry interval is tightly coupled to the token_ttl with fixed width retries and inferred from avg_backend_request - which is a lot of orthogonal concerns coupled together for no good reason I can think of?\n2) the state leaking out of this object to support legacy statsd metrics is annoying and ugly\n3) the weird behavior where all 97 of 100 requests retry memcache for 10x longer avg_backend_req length before all dog piling on the backend is both a) in-efficient and b) self-defeating - luckily you can increase num_tokens until you\u0027re happy you have enough redundancy that *at least one with_token* WILL set memcache (or you have bigger problems anyway).\n\n... I think my only reservation at this point is that `swift_token` is a really REALLY bad/too-generic name for the new labeled metric!?\n\nI\u0027d suggest we try to get consensus on maybe `swift_coop_cache{resource\u003d\u0027shard_updating\u0027}` and get this (and more importantly the follow-up) MERGED so we can iterate on adapting/improving this primitive to other use-cases.","commit_id":"b74296ef8a4902726852bae1a0e80eb15061efa8"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"10f897d87faa2a035331daf6011424bc796d7cfc","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":60,"id":"b441858b_54a717a2","updated":"2025-09-26 18:29:20.000000000","message":"Thanks a lot for the review!","commit_id":"b74296ef8a4902726852bae1a0e80eb15061efa8"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"612655d3eb285f86ae1aa4e296980f7f239fa23f","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":61,"id":"4fbdf1d3_be8bc3a6","updated":"2025-09-29 21:14:23.000000000","message":"I think the design for this is too heavily influenced by the requirements of the existing sharding cache metrics. In an imaginary clean slate implementation I think we\u0027d want the initial fetch/miss from memcahe to also be embedded into utils class (it obviously already knows how to fetch the key from memcache and deserialize it! so as-is we\u0027re begging the consumer to write a class that calls code that\u0027s defined/used outside of the class). But this is a trade-off you have to make in brownfield development: the follow-up diff looks pretty tight, so bleeding set_cache_state is the price we pay! (for now)\n\nI\u0027ve been pretty critical \"fail open\" design and the coupling of the knobs this change provides - but it\u0027s clear to me that after 2 years it\u0027s not going to get much better until *after* we merge it - and IMHO *this* is good enough to merge!\n\nI want the CooperativeNamespaceCachePopulator behavior on master and running in prod for everyone else; I can maintain this code. Nice work balancing these tradeoffs Jian.","commit_id":"707a65ab3c2150fad093a904c3a3d099d74fa236"}],"swift/common/memcached.py":[{"author":{"_account_id":7233,"name":"Matthew Oliver","email":"matt@oliver.net.au","username":"mattoliverau"},"change_message_id":"288c0cdfc2d2834c8643a477cd994323358d11e6","unresolved":true,"context_lines":[{"line_number":128,"context_line":""},{"line_number":129,"context_line":""},{"line_number":130,"context_line":"class MemcachePoolTimeout(Timeout):"},{"line_number":131,"context_line":" pass"},{"line_number":132,"context_line":""},{"line_number":133,"context_line":""},{"line_number":134,"context_line":"class MemcacheConnPool(Pool):"}],"source_content_type":"text/x-python","patch_set":10,"id":"93826e33_3053adad","side":"PARENT","line":131,"updated":"2024-03-13 21:59:30.000000000","message":"Yup nice move to the where these should really live!","commit_id":"51ae9b00c917d963d0477120b4a5a20373ea79e8"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"a9bca8d92d206bd59d7923b6a00fc394e6adbac8","unresolved":false,"context_lines":[{"line_number":128,"context_line":""},{"line_number":129,"context_line":""},{"line_number":130,"context_line":"class MemcachePoolTimeout(Timeout):"},{"line_number":131,"context_line":" pass"},{"line_number":132,"context_line":""},{"line_number":133,"context_line":""},{"line_number":134,"context_line":"class MemcacheConnPool(Pool):"}],"source_content_type":"text/x-python","patch_set":10,"id":"0a274b1e_b5776c44","side":"PARENT","line":131,"in_reply_to":"93826e33_3053adad","updated":"2024-03-20 05:06:51.000000000","message":"Acknowledged","commit_id":"51ae9b00c917d963d0477120b4a5a20373ea79e8"}],"swift/common/utils/__init__.py":[{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":true,"context_lines":[{"line_number":2414,"context_line":" cache_lock_key \u003d \u0027cache_lock/%s\u0027 % cache_key"},{"line_number":2415,"context_line":" cache_lock \u003d None"},{"line_number":2416,"context_line":" while retries_time_window \u003e 0:"},{"line_number":2417,"context_line":" cache_value \u003d memcache.get(cache_key, raise_on_error\u003dTrue)"},{"line_number":2418,"context_line":" if cache_value:"},{"line_number":2419,"context_line":" # cache hit."},{"line_number":2420,"context_line":" return cache_value"}],"source_content_type":"text/x-python","patch_set":2,"id":"661db553_5f128a2a","line":2417,"updated":"2023-08-25 00:02:59.000000000","message":"should we allow retry on cache error or raise the memcache connection error to the caller?","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":2414,"context_line":" cache_lock_key \u003d \u0027cache_lock/%s\u0027 % cache_key"},{"line_number":2415,"context_line":" cache_lock \u003d None"},{"line_number":2416,"context_line":" while retries_time_window \u003e 0:"},{"line_number":2417,"context_line":" cache_value \u003d memcache.get(cache_key, raise_on_error\u003dTrue)"},{"line_number":2418,"context_line":" if cache_value:"},{"line_number":2419,"context_line":" # cache hit."},{"line_number":2420,"context_line":" return cache_value"}],"source_content_type":"text/x-python","patch_set":2,"id":"781f08d4_58eda35e","line":2417,"in_reply_to":"661db553_5f128a2a","updated":"2024-01-12 06:04:07.000000000","message":"new implementation handles memcache connection errors.","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":true,"context_lines":[{"line_number":2420,"context_line":" return cache_value"},{"line_number":2421,"context_line":" # cache miss."},{"line_number":2422,"context_line":" if not cache_lock:"},{"line_number":2423,"context_line":" cache_lock \u003d memcache.get(cache_lock_key, raise_on_error\u003dTrue)"},{"line_number":2424,"context_line":" if not cache_lock:"},{"line_number":2425,"context_line":" # first cache miss request, set cache_lock, return None"},{"line_number":2426,"context_line":" # and caller will fetch data from backend."}],"source_content_type":"text/x-python","patch_set":2,"id":"0b47e57a_03601304","line":2423,"updated":"2023-08-25 00:02:59.000000000","message":"proxy workers share a memcache connection pool, but these calls can trampoline and we allow multiple connections - perhaps more obvious, multiple proxies can call multiple mcrouter backends and talk to different memcache servers - easy to imagine many servers getting a miss here simoultaneously.","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":2420,"context_line":" return cache_value"},{"line_number":2421,"context_line":" # cache miss."},{"line_number":2422,"context_line":" if not cache_lock:"},{"line_number":2423,"context_line":" cache_lock \u003d memcache.get(cache_lock_key, raise_on_error\u003dTrue)"},{"line_number":2424,"context_line":" if not cache_lock:"},{"line_number":2425,"context_line":" # first cache miss request, set cache_lock, return None"},{"line_number":2426,"context_line":" # and caller will fetch data from backend."}],"source_content_type":"text/x-python","patch_set":2,"id":"89912952_b6b80502","line":2423,"in_reply_to":"0b47e57a_03601304","updated":"2024-01-16 19:52:53.000000000","message":"yes, previous implementation would allow a few of requests to acquire cooperative token, but has no precise control. the new way of using ``memcache::incr`` will give us what we want.","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":true,"context_lines":[{"line_number":2421,"context_line":" # cache miss."},{"line_number":2422,"context_line":" if not cache_lock:"},{"line_number":2423,"context_line":" cache_lock \u003d memcache.get(cache_lock_key, raise_on_error\u003dTrue)"},{"line_number":2424,"context_line":" if not cache_lock:"},{"line_number":2425,"context_line":" # first cache miss request, set cache_lock, return None"},{"line_number":2426,"context_line":" # and caller will fetch data from backend."},{"line_number":2427,"context_line":" memcache.set(cache_lock_key, value\u003d0, time\u003dlock_ttl)"}],"source_content_type":"text/x-python","patch_set":2,"id":"9fce8123_0b274955","line":2424,"updated":"2023-08-25 00:02:59.000000000","message":"I think cache_lock will be the value returned from memcache, i.e. 0\n\nso this might evaluate true even on cache hit?","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":2421,"context_line":" # cache miss."},{"line_number":2422,"context_line":" if not cache_lock:"},{"line_number":2423,"context_line":" cache_lock \u003d memcache.get(cache_lock_key, raise_on_error\u003dTrue)"},{"line_number":2424,"context_line":" if not cache_lock:"},{"line_number":2425,"context_line":" # first cache miss request, set cache_lock, return None"},{"line_number":2426,"context_line":" # and caller will fetch data from backend."},{"line_number":2427,"context_line":" memcache.set(cache_lock_key, value\u003d0, time\u003dlock_ttl)"}],"source_content_type":"text/x-python","patch_set":2,"id":"10f15244_0de6f027","line":2424,"in_reply_to":"9fce8123_0b274955","updated":"2024-01-12 06:04:07.000000000","message":"not related to new implementation anymore.","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":true,"context_lines":[{"line_number":2424,"context_line":" if not cache_lock:"},{"line_number":2425,"context_line":" # first cache miss request, set cache_lock, return None"},{"line_number":2426,"context_line":" # and caller will fetch data from backend."},{"line_number":2427,"context_line":" memcache.set(cache_lock_key, value\u003d0, time\u003dlock_ttl)"},{"line_number":2428,"context_line":" return None"},{"line_number":2429,"context_line":" else:"},{"line_number":2430,"context_line":" # there are requests in-flight which will fetch data form backend"}],"source_content_type":"text/x-python","patch_set":2,"id":"27c84036_d9d95cfc","line":2427,"updated":"2023-08-25 00:02:59.000000000","message":"so the cache value for the \"lock\" is always 0, but we set a ttl\n\nit seems like two simoultanious callers could easily both set this value.","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":2424,"context_line":" if not cache_lock:"},{"line_number":2425,"context_line":" # first cache miss request, set cache_lock, return None"},{"line_number":2426,"context_line":" # and caller will fetch data from backend."},{"line_number":2427,"context_line":" memcache.set(cache_lock_key, value\u003d0, time\u003dlock_ttl)"},{"line_number":2428,"context_line":" return None"},{"line_number":2429,"context_line":" else:"},{"line_number":2430,"context_line":" # there are requests in-flight which will fetch data form backend"}],"source_content_type":"text/x-python","patch_set":2,"id":"b7b97b2e_d580b72a","line":2427,"in_reply_to":"27c84036_d9d95cfc","updated":"2024-01-12 06:04:07.000000000","message":"yes, so previous implementation has no good control of number of tokens. since the timing to set memcache is ~1ms, so max number of tokens should be within 3 or so.","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":true,"context_lines":[{"line_number":2436,"context_line":" return None"},{"line_number":2437,"context_line":""},{"line_number":2438,"context_line":""},{"line_number":2439,"context_line":"def clear_memcached_get_global_lock(memcache, cache_key):"},{"line_number":2440,"context_line":" \"\"\""},{"line_number":2441,"context_line":" Clear the global lock if it exists."},{"line_number":2442,"context_line":""}],"source_content_type":"text/x-python","patch_set":2,"id":"fb7cfa53_d2248c73","line":2439,"updated":"2023-08-25 00:02:59.000000000","message":"heh, there it is again \"global_lock\" - if I was grepping a new code base and found a string like this I\u0027d know *exactly* where to go looking for bugs 😜","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":2436,"context_line":" return None"},{"line_number":2437,"context_line":""},{"line_number":2438,"context_line":""},{"line_number":2439,"context_line":"def clear_memcached_get_global_lock(memcache, cache_key):"},{"line_number":2440,"context_line":" \"\"\""},{"line_number":2441,"context_line":" Clear the global lock if it exists."},{"line_number":2442,"context_line":""}],"source_content_type":"text/x-python","patch_set":2,"id":"6268fe37_8dea756a","line":2439,"in_reply_to":"fb7cfa53_d2248c73","updated":"2024-01-12 06:04:07.000000000","message":"changed name from lock to token. :-)","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":true,"context_lines":[{"line_number":2443,"context_line":" :param memcache: the memcache instance"},{"line_number":2444,"context_line":" :param cache_key: the cache key"},{"line_number":2445,"context_line":" \"\"\""},{"line_number":2446,"context_line":" cache_lock_key \u003d \u0027cache_lock/%s\u0027 % cache_key"},{"line_number":2447,"context_line":" memcache.delete(cache_lock_key)"},{"line_number":2448,"context_line":""},{"line_number":2449,"context_line":""}],"source_content_type":"text/x-python","patch_set":2,"id":"4df26a81_e1051bee","line":2446,"updated":"2023-08-25 00:02:59.000000000","message":"ha, \"cache_lock\" even better! I\u0027d be thinking \"either this person doesn\u0027t know how a cache works or they don\u0027t know how a lock works\" but maybe they just don\u0027t think names matter...","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":2443,"context_line":" :param memcache: the memcache instance"},{"line_number":2444,"context_line":" :param cache_key: the cache key"},{"line_number":2445,"context_line":" \"\"\""},{"line_number":2446,"context_line":" cache_lock_key \u003d \u0027cache_lock/%s\u0027 % cache_key"},{"line_number":2447,"context_line":" memcache.delete(cache_lock_key)"},{"line_number":2448,"context_line":""},{"line_number":2449,"context_line":""}],"source_content_type":"text/x-python","patch_set":2,"id":"6b840342_437314af","line":2446,"in_reply_to":"4df26a81_e1051bee","updated":"2024-01-12 06:04:07.000000000","message":"changed name from lock to token. :-)","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":true,"context_lines":[{"line_number":2444,"context_line":" :param cache_key: the cache key"},{"line_number":2445,"context_line":" \"\"\""},{"line_number":2446,"context_line":" cache_lock_key \u003d \u0027cache_lock/%s\u0027 % cache_key"},{"line_number":2447,"context_line":" memcache.delete(cache_lock_key)"},{"line_number":2448,"context_line":""},{"line_number":2449,"context_line":""},{"line_number":2450,"context_line":"def read_conf_dir(parser, conf_dir):"}],"source_content_type":"text/x-python","patch_set":2,"id":"c84b6034_4928da5e","line":2447,"updated":"2023-08-25 00:02:59.000000000","message":"I think this interface is not as strong as it could be - it\u0027s relying on the caller to understand these methods must be used together with some kind of try/finally\n\nI think if we had an existing primative like:\n\nget_or_freshen_cache(cache_key, refresh_func)\n\nit\u0027d be more easy to see a path towoards:\n\nget_or_freshen_cache_with_backoff(cache_key, refresh_func, ...)\n\nwith maybe some variables to tweak the behavior of the cache limiter and backoff.","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":2444,"context_line":" :param cache_key: the cache key"},{"line_number":2445,"context_line":" \"\"\""},{"line_number":2446,"context_line":" cache_lock_key \u003d \u0027cache_lock/%s\u0027 % cache_key"},{"line_number":2447,"context_line":" memcache.delete(cache_lock_key)"},{"line_number":2448,"context_line":""},{"line_number":2449,"context_line":""},{"line_number":2450,"context_line":"def read_conf_dir(parser, conf_dir):"}],"source_content_type":"text/x-python","patch_set":2,"id":"cdd1798c_e9868af8","line":2447,"in_reply_to":"c84b6034_4928da5e","updated":"2024-01-12 06:04:07.000000000","message":"not related to new implementation anymore.","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":2561,"context_line":" :param token_ttl: time-to-live of the global memcached cooperative token;"},{"line_number":2562,"context_line":" when all requests with tokens failed to fetch data from backend or set"},{"line_number":2563,"context_line":" data into memcached, ``token_ttl`` will expire the existing token and"},{"line_number":2564,"context_line":" make sure the new requests after ``token_ttl`` can continue to proceed."},{"line_number":2565,"context_line":" :param sleep_interval: sleep interval when waiting for the global"},{"line_number":2566,"context_line":" cooperative token."},{"line_number":2567,"context_line":" :param num_tokens: number of in-flight requests allowed to fetch data from"}],"source_content_type":"text/x-python","patch_set":4,"id":"6d258b9a_83527af4","line":2564,"updated":"2024-01-12 20:27:10.000000000","message":"at this point do all pending requests trigger the same thundering herd; or are the subject to a second round of cooperation?","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":2561,"context_line":" :param token_ttl: time-to-live of the global memcached cooperative token;"},{"line_number":2562,"context_line":" when all requests with tokens failed to fetch data from backend or set"},{"line_number":2563,"context_line":" data into memcached, ``token_ttl`` will expire the existing token and"},{"line_number":2564,"context_line":" make sure the new requests after ``token_ttl`` can continue to proceed."},{"line_number":2565,"context_line":" :param sleep_interval: sleep interval when waiting for the global"},{"line_number":2566,"context_line":" cooperative token."},{"line_number":2567,"context_line":" :param num_tokens: number of in-flight requests allowed to fetch data from"}],"source_content_type":"text/x-python","patch_set":4,"id":"4a5d0809_6d02faf0","line":2564,"in_reply_to":"6d258b9a_83527af4","updated":"2024-01-16 19:52:53.000000000","message":"Great questions, I added below paragraph of comments to answer them.\n\n``` The original ghetto lock only defines one token for usage, while this\n cooperative token mechanism uses ``num_tokens`` to define the maximum\n number of tokens during one usage session, default to be 3. This is used to\n increase fault tolerance in the distributed environment, when one caller\n process with token hangs or exits, any other requests with token still can\n set new fetched data into memcache and finish the whole usage session. In\n very rare case, when all 3 callers with tokens fails, the existing usage\n session ends after ``token_ttl`` period is reached and the internal key is\n expired, then all pending requests which have no token will exit waiting\n and fall back to query the backend (the situation without cooperative\n token); the new requests which cache misses and need querying backend\n after ``token_ttl`` will start a new round of cooperation session.```","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":2563,"context_line":" data into memcached, ``token_ttl`` will expire the existing token and"},{"line_number":2564,"context_line":" make sure the new requests after ``token_ttl`` can continue to proceed."},{"line_number":2565,"context_line":" :param sleep_interval: sleep interval when waiting for the global"},{"line_number":2566,"context_line":" cooperative token."},{"line_number":2567,"context_line":" :param num_tokens: number of in-flight requests allowed to fetch data from"},{"line_number":2568,"context_line":" the backend; default to be 3, which give enough redundancy when any"},{"line_number":2569,"context_line":" request with token fails to fetch data from the backend or fails"}],"source_content_type":"text/x-python","patch_set":4,"id":"712a4b17_1c5e4481","line":2566,"updated":"2024-01-12 20:27:10.000000000","message":"how will this be configured/determined? Will it mainly be driven by the number of simeoltatneous outstanding requestors or just a property of how fast we want the proxy request greenlet to busy loop?","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":2563,"context_line":" data into memcached, ``token_ttl`` will expire the existing token and"},{"line_number":2564,"context_line":" make sure the new requests after ``token_ttl`` can continue to proceed."},{"line_number":2565,"context_line":" :param sleep_interval: sleep interval when waiting for the global"},{"line_number":2566,"context_line":" cooperative token."},{"line_number":2567,"context_line":" :param num_tokens: number of in-flight requests allowed to fetch data from"},{"line_number":2568,"context_line":" the backend; default to be 3, which give enough redundancy when any"},{"line_number":2569,"context_line":" request with token fails to fetch data from the backend or fails"}],"source_content_type":"text/x-python","patch_set":4,"id":"b0da1dfa_42b8f951","line":2566,"in_reply_to":"712a4b17_1c5e4481","updated":"2024-01-16 19:52:53.000000000","message":"It should be configured to be a single backend query time, then greenlet won\u0027t spend too much cycles on busy loop.\n\nfor shard range caching, it\u0027s defined here: https://review.opendev.org/c/openstack/swift/+/890174/4/swift/proxy/server.py#256\n\nthe default value is defined as 50ms at here: \nhttps://review.opendev.org/c/openstack/swift/+/890174/4/swift/proxy/controllers/base.py#75\n\nthe new namespace query from the largest container on prod is about 50ms.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":2564,"context_line":" make sure the new requests after ``token_ttl`` can continue to proceed."},{"line_number":2565,"context_line":" :param sleep_interval: sleep interval when waiting for the global"},{"line_number":2566,"context_line":" cooperative token."},{"line_number":2567,"context_line":" :param num_tokens: number of in-flight requests allowed to fetch data from"},{"line_number":2568,"context_line":" the backend; default to be 3, which give enough redundancy when any"},{"line_number":2569,"context_line":" request with token fails to fetch data from the backend or fails"},{"line_number":2570,"context_line":" to set new data into memcached."}],"source_content_type":"text/x-python","patch_set":4,"id":"776ebddd_75a017e7","line":2567,"range":{"start_line":2567,"start_character":23,"end_line":2567,"end_character":59},"updated":"2024-01-12 20:27:10.000000000","message":"I don\u0027t like this phrasing; we want it sound was wishy-washy as is really is - something like \"soft-cap\" or \"minimum number of allowed requests\"","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":2564,"context_line":" make sure the new requests after ``token_ttl`` can continue to proceed."},{"line_number":2565,"context_line":" :param sleep_interval: sleep interval when waiting for the global"},{"line_number":2566,"context_line":" cooperative token."},{"line_number":2567,"context_line":" :param num_tokens: number of in-flight requests allowed to fetch data from"},{"line_number":2568,"context_line":" the backend; default to be 3, which give enough redundancy when any"},{"line_number":2569,"context_line":" request with token fails to fetch data from the backend or fails"},{"line_number":2570,"context_line":" to set new data into memcached."}],"source_content_type":"text/x-python","patch_set":4,"id":"7c1ea7ae_b9803e9f","line":2567,"range":{"start_line":2567,"start_character":23,"end_line":2567,"end_character":59},"in_reply_to":"776ebddd_75a017e7","updated":"2024-01-16 19:52:53.000000000","message":"it\u0027s \"the maximum number of tokens\", I will modify this sentence.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":2565,"context_line":" :param sleep_interval: sleep interval when waiting for the global"},{"line_number":2566,"context_line":" cooperative token."},{"line_number":2567,"context_line":" :param num_tokens: number of in-flight requests allowed to fetch data from"},{"line_number":2568,"context_line":" the backend; default to be 3, which give enough redundancy when any"},{"line_number":2569,"context_line":" request with token fails to fetch data from the backend or fails"},{"line_number":2570,"context_line":" to set new data into memcached."},{"line_number":2571,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":4,"id":"03047b0a_491ffcc6","line":2568,"range":{"start_line":2568,"start_character":21,"end_line":2568,"end_character":36},"updated":"2024-01-12 20:27:10.000000000","message":"sounds reasonable!","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":2565,"context_line":" :param sleep_interval: sleep interval when waiting for the global"},{"line_number":2566,"context_line":" cooperative token."},{"line_number":2567,"context_line":" :param num_tokens: number of in-flight requests allowed to fetch data from"},{"line_number":2568,"context_line":" the backend; default to be 3, which give enough redundancy when any"},{"line_number":2569,"context_line":" request with token fails to fetch data from the backend or fails"},{"line_number":2570,"context_line":" to set new data into memcached."},{"line_number":2571,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":4,"id":"32d6769f_a78b6311","line":2568,"range":{"start_line":2568,"start_character":21,"end_line":2568,"end_character":36},"in_reply_to":"03047b0a_491ffcc6","updated":"2024-01-16 19:52:53.000000000","message":"Acknowledged","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":2577,"context_line":" self._memcache \u003d memcache"},{"line_number":2578,"context_line":" self._cache_key \u003d cache_key"},{"line_number":2579,"context_line":" self._cache_ttl \u003d cache_ttl"},{"line_number":2580,"context_line":" self._token_key \u003d \u0027cache_token/%s\u0027 % cache_key"},{"line_number":2581,"context_line":" self._token_ttl \u003d token_ttl"},{"line_number":2582,"context_line":" self._token_sleep_interval \u003d sleep_interval"},{"line_number":2583,"context_line":" self._num_tokens \u003d num_tokens"}],"source_content_type":"text/x-python","patch_set":4,"id":"c207cf98_cc1d902c","line":2580,"updated":"2024-01-12 20:27:10.000000000","message":"maybe we should \"reserve\" some of the memcache namespace to avoid accidentel collision with a consumer cache key...\n\nIn the wsgi environ since any mw can set w/e environ key they want we prefix all of ours keys with a `swift.key_name`\n\nIn the account namespace since AUTH_ prefix can be almost any string for different for any reseller; we prefix all of our accounts with \".\" (e.g. .expiring_objects, .misplaced_objects, etc)\n\nMore recently; since swift supports any utf-8 encoded character we use selected the \"null-byte\" to prefix systems containers \u0027\\x00\u0027\n\nI think a \".swift.cache_token/\" or even \"_cache_token\" would look a little better.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":2577,"context_line":" self._memcache \u003d memcache"},{"line_number":2578,"context_line":" self._cache_key \u003d cache_key"},{"line_number":2579,"context_line":" self._cache_ttl \u003d cache_ttl"},{"line_number":2580,"context_line":" self._token_key \u003d \u0027cache_token/%s\u0027 % cache_key"},{"line_number":2581,"context_line":" self._token_ttl \u003d token_ttl"},{"line_number":2582,"context_line":" self._token_sleep_interval \u003d sleep_interval"},{"line_number":2583,"context_line":" self._num_tokens \u003d num_tokens"}],"source_content_type":"text/x-python","patch_set":4,"id":"03a3483b_a2d2de3c","line":2580,"in_reply_to":"c207cf98_cc1d902c","updated":"2024-01-16 19:52:53.000000000","message":"okay, I will use the latter ``_cache_token``, to save a few characters.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":2583,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":2584,"context_line":" self._set_cache_state \u003d None"},{"line_number":2585,"context_line":" self._req_served_from_cache \u003d False"},{"line_number":2586,"context_line":" # To be set by ``do_fetch_backend`` of the derived classes."},{"line_number":2587,"context_line":" self._backend_response \u003d None"},{"line_number":2588,"context_line":""},{"line_number":2589,"context_line":" def do_fetch_backend(self, *args, **kwargs):"}],"source_content_type":"text/x-python","patch_set":4,"id":"e08c49f8_31a13501","line":2586,"updated":"2024-01-12 20:27:10.000000000","message":"oic; maybe it sets this to the http response but returns some normalized value?","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":2583,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":2584,"context_line":" self._set_cache_state \u003d None"},{"line_number":2585,"context_line":" self._req_served_from_cache \u003d False"},{"line_number":2586,"context_line":" # To be set by ``do_fetch_backend`` of the derived classes."},{"line_number":2587,"context_line":" self._backend_response \u003d None"},{"line_number":2588,"context_line":""},{"line_number":2589,"context_line":" def do_fetch_backend(self, *args, **kwargs):"}],"source_content_type":"text/x-python","patch_set":4,"id":"718df02f_9e2f0e78","line":2586,"in_reply_to":"e08c49f8_31a13501","updated":"2024-01-16 19:52:53.000000000","message":"also discussed in other comment thread.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":2593,"context_line":""},{"line_number":2594,"context_line":" :param args: positional args"},{"line_number":2595,"context_line":" :param kwargs: Keyword args"},{"line_number":2596,"context_line":" :returns: data fetched from backend"},{"line_number":2597,"context_line":" \"\"\""},{"line_number":2598,"context_line":" raise NotImplementedError"},{"line_number":2599,"context_line":""}],"source_content_type":"text/x-python","patch_set":4,"id":"8116235a_31784e31","line":2596,"updated":"2024-01-12 20:27:10.000000000","message":"the comment on the _backend_response attr is not sufficient; if there is a requirement this function set _backend_response it should be in the docstring.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":2593,"context_line":""},{"line_number":2594,"context_line":" :param args: positional args"},{"line_number":2595,"context_line":" :param kwargs: Keyword args"},{"line_number":2596,"context_line":" :returns: data fetched from backend"},{"line_number":2597,"context_line":" \"\"\""},{"line_number":2598,"context_line":" raise NotImplementedError"},{"line_number":2599,"context_line":""}],"source_content_type":"text/x-python","patch_set":4,"id":"f48739ef_28c4cf81","line":2596,"in_reply_to":"8116235a_31784e31","updated":"2024-01-16 19:52:53.000000000","message":"Acknowledged","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":2605,"context_line":" will be able to query the backend."},{"line_number":2606,"context_line":""},{"line_number":2607,"context_line":" :param args: positional args to pass to ``do_fetch_backend``"},{"line_number":2608,"context_line":" :param kwargs: Keyword args to pass to ``do_fetch_backend``"},{"line_number":2609,"context_line":" :returns: value of the data fetched from backend; None if not exist."},{"line_number":2610,"context_line":" \"\"\""},{"line_number":2611,"context_line":" if not self._memcache:"}],"source_content_type":"text/x-python","patch_set":4,"id":"4fbf2381_9b6ac263","line":2608,"updated":"2024-01-12 20:27:10.000000000","message":"oic, the signature is agnostic - the caller has to pass in everything they need for a backend request even if the response ultimately comes from memcache.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":2605,"context_line":" will be able to query the backend."},{"line_number":2606,"context_line":""},{"line_number":2607,"context_line":" :param args: positional args to pass to ``do_fetch_backend``"},{"line_number":2608,"context_line":" :param kwargs: Keyword args to pass to ``do_fetch_backend``"},{"line_number":2609,"context_line":" :returns: value of the data fetched from backend; None if not exist."},{"line_number":2610,"context_line":" \"\"\""},{"line_number":2611,"context_line":" if not self._memcache:"}],"source_content_type":"text/x-python","patch_set":4,"id":"91f3c8b2_b69841d6","line":2608,"in_reply_to":"4fbf2381_9b6ac263","updated":"2024-01-16 19:52:53.000000000","message":"Acknowledged","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":2609,"context_line":" :returns: value of the data fetched from backend; None if not exist."},{"line_number":2610,"context_line":" \"\"\""},{"line_number":2611,"context_line":" if not self._memcache:"},{"line_number":2612,"context_line":" return self.do_fetch_backend(*args, **kwargs)"},{"line_number":2613,"context_line":""},{"line_number":2614,"context_line":" # Try to get a cooperative token by using memcache increments."},{"line_number":2615,"context_line":" try:"}],"source_content_type":"text/x-python","patch_set":4,"id":"ffbc55ba_4cd0cc34","line":2612,"updated":"2024-01-12 20:27:10.000000000","message":"what about infocache?","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2609,"context_line":" :returns: value of the data fetched from backend; None if not exist."},{"line_number":2610,"context_line":" \"\"\""},{"line_number":2611,"context_line":" if not self._memcache:"},{"line_number":2612,"context_line":" return self.do_fetch_backend(*args, **kwargs)"},{"line_number":2613,"context_line":""},{"line_number":2614,"context_line":" # Try to get a cooperative token by using memcache increments."},{"line_number":2615,"context_line":" try:"}],"source_content_type":"text/x-python","patch_set":4,"id":"252ab7df_a0306482","line":2612,"in_reply_to":"2366a0aa_03079bef","updated":"2024-03-20 04:07:49.000000000","message":"Acknowledged","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2609,"context_line":" :returns: value of the data fetched from backend; None if not exist."},{"line_number":2610,"context_line":" \"\"\""},{"line_number":2611,"context_line":" if not self._memcache:"},{"line_number":2612,"context_line":" return self.do_fetch_backend(*args, **kwargs)"},{"line_number":2613,"context_line":""},{"line_number":2614,"context_line":" # Try to get a cooperative token by using memcache increments."},{"line_number":2615,"context_line":" try:"}],"source_content_type":"text/x-python","patch_set":4,"id":"2366a0aa_03079bef","line":2612,"in_reply_to":"eca9b0a2_4cf2a9d7","updated":"2024-03-15 15:00:01.000000000","message":"agree, we should always have infocache","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":true,"context_lines":[{"line_number":2609,"context_line":" :returns: value of the data fetched from backend; None if not exist."},{"line_number":2610,"context_line":" \"\"\""},{"line_number":2611,"context_line":" if not self._memcache:"},{"line_number":2612,"context_line":" return self.do_fetch_backend(*args, **kwargs)"},{"line_number":2613,"context_line":""},{"line_number":2614,"context_line":" # Try to get a cooperative token by using memcache increments."},{"line_number":2615,"context_line":" try:"}],"source_content_type":"text/x-python","patch_set":4,"id":"eca9b0a2_4cf2a9d7","line":2612,"in_reply_to":"ffbc55ba_4cd0cc34","updated":"2024-01-16 19:52:53.000000000","message":"this is guarded return when deployment doesn\u0027t have memcached at all. but we should always have infocache.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":2614,"context_line":" # Try to get a cooperative token by using memcache increments."},{"line_number":2615,"context_line":" try:"},{"line_number":2616,"context_line":" total_requests \u003d self._memcache.incr("},{"line_number":2617,"context_line":" self._token_key, delta\u003d1, time\u003dself._token_ttl)"},{"line_number":2618,"context_line":" except MemcacheConnectionError:"},{"line_number":2619,"context_line":" self._set_cache_state \u003d \u0027error\u0027"},{"line_number":2620,"context_line":" return self.do_fetch_backend(*args, **kwargs)"}],"source_content_type":"text/x-python","patch_set":4,"id":"c88291f1_81f4b460","line":2617,"updated":"2024-01-12 20:27:10.000000000","message":"IIRC this is actually going to *try* to incr, and if that fails NOKEY it will set it to 1 - so in race you can have N requests all set to 1 before fore somone actually managed to increment to 2 and then 3.\n\nDoes that match your understanding as well?","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":2614,"context_line":" # Try to get a cooperative token by using memcache increments."},{"line_number":2615,"context_line":" try:"},{"line_number":2616,"context_line":" total_requests \u003d self._memcache.incr("},{"line_number":2617,"context_line":" self._token_key, delta\u003d1, time\u003dself._token_ttl)"},{"line_number":2618,"context_line":" except MemcacheConnectionError:"},{"line_number":2619,"context_line":" self._set_cache_state \u003d \u0027error\u0027"},{"line_number":2620,"context_line":" return self.do_fetch_backend(*args, **kwargs)"}],"source_content_type":"text/x-python","patch_set":4,"id":"1b95c990_ad267a3b","line":2617,"in_reply_to":"c88291f1_81f4b460","updated":"2024-01-16 19:52:53.000000000","message":"Actually, swift implementation of ``memcache.incr`` use ``memcache add`` to set the initial value to 1 when incr fails NOKEY. See\nhttps://github.com/NVIDIA/swift/blob/ss-release-2.32.0.9/swift/common/memcached.py#L531\n\nAnd ``memcache add`` is atomic and will only succeed when NOKEY, so 2nd and 3rd requests will see ``incr NOKEY --\u003e add fail --\u003e incr 1 succeed``. So there may be some extra memcache operatioins, but CacheCooperativeToken will only grants exactly ``num_tokens`` number of tokens.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":2617,"context_line":" self._token_key, delta\u003d1, time\u003dself._token_ttl)"},{"line_number":2618,"context_line":" except MemcacheConnectionError:"},{"line_number":2619,"context_line":" self._set_cache_state \u003d \u0027error\u0027"},{"line_number":2620,"context_line":" return self.do_fetch_backend(*args, **kwargs)"},{"line_number":2621,"context_line":""},{"line_number":2622,"context_line":" if total_requests \u003c\u003d self._num_tokens:"},{"line_number":2623,"context_line":" # Acquired a cooperative token, go fetching data from backend"}],"source_content_type":"text/x-python","patch_set":4,"id":"fc89ba3a_4967b6c7","line":2620,"updated":"2024-01-12 20:27:10.000000000","message":"it this class not responsible for trying to get the value from memcache?","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":false,"context_lines":[{"line_number":2617,"context_line":" self._token_key, delta\u003d1, time\u003dself._token_ttl)"},{"line_number":2618,"context_line":" except MemcacheConnectionError:"},{"line_number":2619,"context_line":" self._set_cache_state \u003d \u0027error\u0027"},{"line_number":2620,"context_line":" return self.do_fetch_backend(*args, **kwargs)"},{"line_number":2621,"context_line":""},{"line_number":2622,"context_line":" if total_requests \u003c\u003d self._num_tokens:"},{"line_number":2623,"context_line":" # Acquired a cooperative token, go fetching data from backend"}],"source_content_type":"text/x-python","patch_set":4,"id":"57e7ce35_05e25e73","line":2620,"in_reply_to":"88eecc61_686d2434","updated":"2024-03-15 15:00:01.000000000","message":"Acknowledged","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":true,"context_lines":[{"line_number":2617,"context_line":" self._token_key, delta\u003d1, time\u003dself._token_ttl)"},{"line_number":2618,"context_line":" except MemcacheConnectionError:"},{"line_number":2619,"context_line":" self._set_cache_state \u003d \u0027error\u0027"},{"line_number":2620,"context_line":" return self.do_fetch_backend(*args, **kwargs)"},{"line_number":2621,"context_line":""},{"line_number":2622,"context_line":" if total_requests \u003c\u003d self._num_tokens:"},{"line_number":2623,"context_line":" # Acquired a cooperative token, go fetching data from backend"}],"source_content_type":"text/x-python","patch_set":4,"id":"88eecc61_686d2434","line":2620,"in_reply_to":"fc89ba3a_4967b6c7","updated":"2024-01-16 19:52:53.000000000","message":"this is only when memcache operation raises ``MemcacheConnectionError``, we will switch back to query backend.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":2625,"context_line":" if not data:"},{"line_number":2626,"context_line":" return None"},{"line_number":2627,"context_line":" if self._infocache:"},{"line_number":2628,"context_line":" self._infocache[self._cache_key] \u003d data"},{"line_number":2629,"context_line":" try:"},{"line_number":2630,"context_line":" self._memcache.set("},{"line_number":2631,"context_line":" self._cache_key, data,"}],"source_content_type":"text/x-python","patch_set":4,"id":"5219f9a0_487135f8","line":2628,"updated":"2024-01-12 20:27:10.000000000","message":"when I was playing with this it seemed like the shape of the datastructure returned form the backend (an http response) was often not appropriate to set in the infocache directly - and also perhaps different than the value we\u0027d want to set in memcache.\n\ni.e.\n\nresp \u003c\u003d backend full shard listing\nNamespaceBoundList python object \u003d\u003e infocache\nlist_of_bisectable_tuples \u003d\u003e memcache","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":2625,"context_line":" if not data:"},{"line_number":2626,"context_line":" return None"},{"line_number":2627,"context_line":" if self._infocache:"},{"line_number":2628,"context_line":" self._infocache[self._cache_key] \u003d data"},{"line_number":2629,"context_line":" try:"},{"line_number":2630,"context_line":" self._memcache.set("},{"line_number":2631,"context_line":" self._cache_key, data,"}],"source_content_type":"text/x-python","patch_set":4,"id":"c379f300_b6263811","line":2628,"in_reply_to":"5219f9a0_487135f8","updated":"2024-01-16 19:52:53.000000000","message":"good point, we should have both memcache and infocache store the same format of data. I will change infocache to store list_of_bisectable_tuples which is same as what memcache does.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":2631,"context_line":" self._cache_key, data,"},{"line_number":2632,"context_line":" time\u003dself._cache_ttl, raise_on_error\u003dTrue)"},{"line_number":2633,"context_line":" # Remove all cooperative tokens related to this usage."},{"line_number":2634,"context_line":" self._memcache.delete(self._token_key)"},{"line_number":2635,"context_line":" except MemcacheConnectionError:"},{"line_number":2636,"context_line":" self._set_cache_state \u003d \u0027set_error\u0027"},{"line_number":2637,"context_line":" else:"}],"source_content_type":"text/x-python","patch_set":4,"id":"d0895c57_55eae79f","line":2634,"updated":"2024-01-12 20:27:10.000000000","message":"I think this is right; if another request just got a cache miss before we set - and then goes to incr it will think it\u0027s first in line and make the backend request, do another cache set/overwrite (if successful) - and clear the token afterwards.\n\nHopefully soon after, other requests would start finding the cached value and not even LOOK at the cache token value.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":2631,"context_line":" self._cache_key, data,"},{"line_number":2632,"context_line":" time\u003dself._cache_ttl, raise_on_error\u003dTrue)"},{"line_number":2633,"context_line":" # Remove all cooperative tokens related to this usage."},{"line_number":2634,"context_line":" self._memcache.delete(self._token_key)"},{"line_number":2635,"context_line":" except MemcacheConnectionError:"},{"line_number":2636,"context_line":" self._set_cache_state \u003d \u0027set_error\u0027"},{"line_number":2637,"context_line":" else:"}],"source_content_type":"text/x-python","patch_set":4,"id":"b85f4a6b_eda0126e","line":2634,"in_reply_to":"d0895c57_55eae79f","updated":"2024-01-16 19:52:53.000000000","message":"Acknowledged","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"eba25903961438b8b6cf0e33fc904d83392f6fa5","unresolved":true,"context_lines":[{"line_number":2644,"context_line":" retries_time_window \u003d self._token_ttl"},{"line_number":2645,"context_line":" while retries_time_window \u003e 0:"},{"line_number":2646,"context_line":" eventlet.sleep(self._token_sleep_interval)"},{"line_number":2647,"context_line":" retries_time_window -\u003d self._token_sleep_interval"},{"line_number":2648,"context_line":" cache_value \u003d self._memcache.get("},{"line_number":2649,"context_line":" self._cache_key, raise_on_error\u003dTrue)"},{"line_number":2650,"context_line":" if cache_value:"}],"source_content_type":"text/x-python","patch_set":4,"id":"077bf804_ca2ff33e","line":2647,"updated":"2024-01-12 06:12:56.000000000","message":"will convert this to wall time based, since eventlet.sleep() is not accurate.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":false,"context_lines":[{"line_number":2644,"context_line":" retries_time_window \u003d self._token_ttl"},{"line_number":2645,"context_line":" while retries_time_window \u003e 0:"},{"line_number":2646,"context_line":" eventlet.sleep(self._token_sleep_interval)"},{"line_number":2647,"context_line":" retries_time_window -\u003d self._token_sleep_interval"},{"line_number":2648,"context_line":" cache_value \u003d self._memcache.get("},{"line_number":2649,"context_line":" self._cache_key, raise_on_error\u003dTrue)"},{"line_number":2650,"context_line":" if cache_value:"}],"source_content_type":"text/x-python","patch_set":4,"id":"8b60c9af_6a262a5a","line":2647,"in_reply_to":"077bf804_ca2ff33e","updated":"2024-03-15 15:00:01.000000000","message":"Acknowledged","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2644,"context_line":" retries_time_window \u003d self._token_ttl"},{"line_number":2645,"context_line":" while retries_time_window \u003e 0:"},{"line_number":2646,"context_line":" eventlet.sleep(self._token_sleep_interval)"},{"line_number":2647,"context_line":" retries_time_window -\u003d self._token_sleep_interval"},{"line_number":2648,"context_line":" cache_value \u003d self._memcache.get("},{"line_number":2649,"context_line":" self._cache_key, raise_on_error\u003dTrue)"},{"line_number":2650,"context_line":" if cache_value:"}],"source_content_type":"text/x-python","patch_set":4,"id":"bd41a406_6f41e1db","line":2647,"in_reply_to":"077bf804_ca2ff33e","updated":"2024-03-15 15:00:01.000000000","message":"did this happen?","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"a9bca8d92d206bd59d7923b6a00fc394e6adbac8","unresolved":false,"context_lines":[{"line_number":2644,"context_line":" retries_time_window \u003d self._token_ttl"},{"line_number":2645,"context_line":" while retries_time_window \u003e 0:"},{"line_number":2646,"context_line":" eventlet.sleep(self._token_sleep_interval)"},{"line_number":2647,"context_line":" retries_time_window -\u003d self._token_sleep_interval"},{"line_number":2648,"context_line":" cache_value \u003d self._memcache.get("},{"line_number":2649,"context_line":" self._cache_key, raise_on_error\u003dTrue)"},{"line_number":2650,"context_line":" if cache_value:"}],"source_content_type":"text/x-python","patch_set":4,"id":"15d95ad1_e1755c6c","line":2647,"in_reply_to":"bd41a406_6f41e1db","updated":"2024-03-20 05:06:51.000000000","message":"Done","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":2650,"context_line":" if cache_value:"},{"line_number":2651,"context_line":" # cache hit."},{"line_number":2652,"context_line":" self._req_served_from_cache \u003d True"},{"line_number":2653,"context_line":" return cache_value"},{"line_number":2654,"context_line":" # cache miss."},{"line_number":2655,"context_line":" continue"},{"line_number":2656,"context_line":""}],"source_content_type":"text/x-python","patch_set":4,"id":"a83310b3_46a612ad","line":2653,"updated":"2024-01-12 20:27:10.000000000","message":"what about infocache?","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":2650,"context_line":" if cache_value:"},{"line_number":2651,"context_line":" # cache hit."},{"line_number":2652,"context_line":" self._req_served_from_cache \u003d True"},{"line_number":2653,"context_line":" return cache_value"},{"line_number":2654,"context_line":" # cache miss."},{"line_number":2655,"context_line":" continue"},{"line_number":2656,"context_line":""}],"source_content_type":"text/x-python","patch_set":4,"id":"bfbc913d_9671ed48","line":2653,"in_reply_to":"a83310b3_46a612ad","updated":"2024-01-16 19:52:53.000000000","message":"this is handling of those requests without token acquired, the local infocache was a miss before reaching here and won\u0027t be set again even other requests with token went through.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":2667,"context_line":""},{"line_number":2668,"context_line":" @backend_response.setter"},{"line_number":2669,"context_line":" def backend_response(self, backend_response):"},{"line_number":2670,"context_line":" self._backend_response \u003d backend_response"},{"line_number":2671,"context_line":""},{"line_number":2672,"context_line":" @property"},{"line_number":2673,"context_line":" def req_served_from_cache(self):"}],"source_content_type":"text/x-python","patch_set":4,"id":"b3a8f5dc_e494e398","line":2670,"updated":"2024-01-12 20:27:10.000000000","message":"since this is a base class; I\u0027ll defer judgement - but this reads like a mis-step of the python philosophy for private variables.\n\nuse leading _ to encourage uses not to access \"private\" state (good)\nuse @property to give \"public\" access to \"private\" state (good)\nuse @setter to make \"private\" state \"public\" (bad) - just name it self.backend_response and be done with it.\n\nit seems like mostly \"fetch_backend_with_token\" just *returns* the backend response (or value from memcache? Or infocache? no normalization?)","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":2667,"context_line":""},{"line_number":2668,"context_line":" @backend_response.setter"},{"line_number":2669,"context_line":" def backend_response(self, backend_response):"},{"line_number":2670,"context_line":" self._backend_response \u003d backend_response"},{"line_number":2671,"context_line":""},{"line_number":2672,"context_line":" @property"},{"line_number":2673,"context_line":" def req_served_from_cache(self):"}],"source_content_type":"text/x-python","patch_set":4,"id":"0e24ccb4_a7983769","line":2670,"in_reply_to":"b3a8f5dc_e494e398","updated":"2024-01-16 19:52:53.000000000","message":"okay, I will just use ``self.backend_response``. \"fetch_backend_with_token\" is called when caller requests see cache misses, and returns the data either fetched from backend or from memcached. During the call of \"fetch_backend_with_token\", a few of internal class attributes will be set, including ``self.backend_response``, ``self._set_cache_state`` and ``self._req_served_from_cache`` which will read back from caller if needed.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"fad23fcc9a8765b6d96b3b325398b7ba3746e17a","unresolved":true,"context_lines":[{"line_number":2549,"context_line":" requests will be able to get the cooperative tokens by creating or"},{"line_number":2550,"context_line":" incrementing an internal memcache key, and then those callers with tokens"},{"line_number":2551,"context_line":" can send backend requests to fetch data from backend servers and be able"},{"line_number":2552,"context_line":" set data into memcache; all other cache miss requests without a token will"},{"line_number":2553,"context_line":" have to wait for cache filling to finish, instead of all querying the"},{"line_number":2554,"context_line":" backend servers at the same time. After those requests with token are done,"},{"line_number":2555,"context_line":" they will release the token by deleting the internal cache key and finish"}],"source_content_type":"text/x-python","patch_set":6,"id":"90061b5d_071e4b8b","line":2552,"range":{"start_line":2552,"start_character":28,"end_line":2552,"end_character":74},"updated":"2024-01-22 16:38:52.000000000","message":"should we have some upper limit on other requests and fallback to returning a 429 of this is exceeded? We are deferring work which eventually will have to be handled once we have the value in memcache. \n\nAssume a constant rate of inbound requests, R/s. If we have a cache miss that takes M seconds to fill, then for M secs after cache is filled we\u0027ll have 2*R requests/s, capped at the number of greenthreads across the system. We could just be moving the hotspot away from the container server but on to another component. Hopefully the requests are more distributed once we have shards, but might be worth some more thought.","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2549,"context_line":" requests will be able to get the cooperative tokens by creating or"},{"line_number":2550,"context_line":" incrementing an internal memcache key, and then those callers with tokens"},{"line_number":2551,"context_line":" can send backend requests to fetch data from backend servers and be able"},{"line_number":2552,"context_line":" set data into memcache; all other cache miss requests without a token will"},{"line_number":2553,"context_line":" have to wait for cache filling to finish, instead of all querying the"},{"line_number":2554,"context_line":" backend servers at the same time. After those requests with token are done,"},{"line_number":2555,"context_line":" they will release the token by deleting the internal cache key and finish"}],"source_content_type":"text/x-python","patch_set":6,"id":"2ded5cc4_f9b93679","line":2552,"range":{"start_line":2552,"start_character":28,"end_line":2552,"end_character":74},"in_reply_to":"0aaca786_b1b07fd0","updated":"2024-03-15 15:00:01.000000000","message":"I think blocking the requests behind the token_ttl is reasonable as long as token_ttl is small (around the rtt of container-server request and memcache write + some fudge) - if we find that requests are still getting memcache miss after a token_ttl we degrade back into the current state on master with only a little bit of a pause and we can go back to the drawing board.","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2549,"context_line":" requests will be able to get the cooperative tokens by creating or"},{"line_number":2550,"context_line":" incrementing an internal memcache key, and then those callers with tokens"},{"line_number":2551,"context_line":" can send backend requests to fetch data from backend servers and be able"},{"line_number":2552,"context_line":" set data into memcache; all other cache miss requests without a token will"},{"line_number":2553,"context_line":" have to wait for cache filling to finish, instead of all querying the"},{"line_number":2554,"context_line":" backend servers at the same time. After those requests with token are done,"},{"line_number":2555,"context_line":" they will release the token by deleting the internal cache key and finish"}],"source_content_type":"text/x-python","patch_set":6,"id":"dd2cb090_328fab99","line":2552,"range":{"start_line":2552,"start_character":28,"end_line":2552,"end_character":74},"in_reply_to":"2ded5cc4_f9b93679","updated":"2024-03-20 04:07:49.000000000","message":"Acknowledged","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"ac2d2981d03b6f81c5446a7e97e07dc184902594","unresolved":true,"context_lines":[{"line_number":2549,"context_line":" requests will be able to get the cooperative tokens by creating or"},{"line_number":2550,"context_line":" incrementing an internal memcache key, and then those callers with tokens"},{"line_number":2551,"context_line":" can send backend requests to fetch data from backend servers and be able"},{"line_number":2552,"context_line":" set data into memcache; all other cache miss requests without a token will"},{"line_number":2553,"context_line":" have to wait for cache filling to finish, instead of all querying the"},{"line_number":2554,"context_line":" backend servers at the same time. After those requests with token are done,"},{"line_number":2555,"context_line":" they will release the token by deleting the internal cache key and finish"}],"source_content_type":"text/x-python","patch_set":6,"id":"0aaca786_b1b07fd0","line":2552,"range":{"start_line":2552,"start_character":28,"end_line":2552,"end_character":74},"in_reply_to":"90061b5d_071e4b8b","updated":"2024-02-07 02:07:06.000000000","message":"for the usage of greenthreads, I feel the situation would be better with cooperative tokens than the current one. Currently, in the event of cache misses, all those shard range requests will go to the backend and overload container servers, while those requests are pending and stuck at the container servers, greenthreads at the proxy servers are still occupied. With cooperative tokens, all of those requests would be handled much faster combined, so those busy greenthreads will be freed in a timely manner.","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"fad23fcc9a8765b6d96b3b325398b7ba3746e17a","unresolved":true,"context_lines":[{"line_number":2558,"context_line":" The original ghetto lock only defines one token for usage, while this"},{"line_number":2559,"context_line":" cooperative token mechanism uses ``num_tokens`` to define the maximum"},{"line_number":2560,"context_line":" number of tokens during one usage session, default to be 3. This is used to"},{"line_number":2561,"context_line":" increase fault tolerance in the distributed environment, when one caller"},{"line_number":2562,"context_line":" process with token hangs or exits, any other requests with token still can"},{"line_number":2563,"context_line":" set new fetched data into memcache and finish the whole usage session. In"},{"line_number":2564,"context_line":" very rare case, when all 3 callers with tokens fails, the existing usage"},{"line_number":2565,"context_line":" session ends after ``token_ttl`` period is reached and the internal key is"},{"line_number":2566,"context_line":" expired, then all pending requests which have no token will exit waiting"}],"source_content_type":"text/x-python","patch_set":6,"id":"0472e4ba_5888bc48","line":2563,"range":{"start_line":2561,"start_character":61,"end_line":2563,"end_character":38},"updated":"2024-01-22 16:38:52.000000000","message":"is there anything to prevent 3 greenthreads in the same process taking the 3 tokens?","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"ac2d2981d03b6f81c5446a7e97e07dc184902594","unresolved":true,"context_lines":[{"line_number":2558,"context_line":" The original ghetto lock only defines one token for usage, while this"},{"line_number":2559,"context_line":" cooperative token mechanism uses ``num_tokens`` to define the maximum"},{"line_number":2560,"context_line":" number of tokens during one usage session, default to be 3. This is used to"},{"line_number":2561,"context_line":" increase fault tolerance in the distributed environment, when one caller"},{"line_number":2562,"context_line":" process with token hangs or exits, any other requests with token still can"},{"line_number":2563,"context_line":" set new fetched data into memcache and finish the whole usage session. In"},{"line_number":2564,"context_line":" very rare case, when all 3 callers with tokens fails, the existing usage"},{"line_number":2565,"context_line":" session ends after ``token_ttl`` period is reached and the internal key is"},{"line_number":2566,"context_line":" expired, then all pending requests which have no token will exit waiting"}],"source_content_type":"text/x-python","patch_set":6,"id":"326d5dd0_758d0250","line":2563,"range":{"start_line":2561,"start_character":61,"end_line":2563,"end_character":38},"in_reply_to":"0472e4ba_5888bc48","updated":"2024-02-07 02:07:06.000000000","message":"not now, the chance will be very low due to RR load balancing. I will add a TODO.","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2558,"context_line":" The original ghetto lock only defines one token for usage, while this"},{"line_number":2559,"context_line":" cooperative token mechanism uses ``num_tokens`` to define the maximum"},{"line_number":2560,"context_line":" number of tokens during one usage session, default to be 3. This is used to"},{"line_number":2561,"context_line":" increase fault tolerance in the distributed environment, when one caller"},{"line_number":2562,"context_line":" process with token hangs or exits, any other requests with token still can"},{"line_number":2563,"context_line":" set new fetched data into memcache and finish the whole usage session. In"},{"line_number":2564,"context_line":" very rare case, when all 3 callers with tokens fails, the existing usage"},{"line_number":2565,"context_line":" session ends after ``token_ttl`` period is reached and the internal key is"},{"line_number":2566,"context_line":" expired, then all pending requests which have no token will exit waiting"}],"source_content_type":"text/x-python","patch_set":6,"id":"76d7988d_1c18fb06","line":2563,"range":{"start_line":2561,"start_character":61,"end_line":2563,"end_character":38},"in_reply_to":"326d5dd0_758d0250","updated":"2024-03-15 15:00:01.000000000","message":"I think \"process\" in this setence just meant \"request process\" i.e. a greenthread in our current proxy implementation.","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2558,"context_line":" The original ghetto lock only defines one token for usage, while this"},{"line_number":2559,"context_line":" cooperative token mechanism uses ``num_tokens`` to define the maximum"},{"line_number":2560,"context_line":" number of tokens during one usage session, default to be 3. This is used to"},{"line_number":2561,"context_line":" increase fault tolerance in the distributed environment, when one caller"},{"line_number":2562,"context_line":" process with token hangs or exits, any other requests with token still can"},{"line_number":2563,"context_line":" set new fetched data into memcache and finish the whole usage session. In"},{"line_number":2564,"context_line":" very rare case, when all 3 callers with tokens fails, the existing usage"},{"line_number":2565,"context_line":" session ends after ``token_ttl`` period is reached and the internal key is"},{"line_number":2566,"context_line":" expired, then all pending requests which have no token will exit waiting"}],"source_content_type":"text/x-python","patch_set":6,"id":"fbb51c3d_98b23b91","line":2563,"range":{"start_line":2561,"start_character":61,"end_line":2563,"end_character":38},"in_reply_to":"76d7988d_1c18fb06","updated":"2024-03-20 04:07:49.000000000","message":"Acknowledged","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"fad23fcc9a8765b6d96b3b325398b7ba3746e17a","unresolved":true,"context_lines":[{"line_number":2574,"context_line":" :param cache_ttl: time-to-live of the data fetched from backend to set into"},{"line_number":2575,"context_line":" memcached."},{"line_number":2576,"context_line":" :param do_fetch_backend: a functools.partial object to be called to fetch"},{"line_number":2577,"context_line":" data from the backend; it needs to return a tuple of (data, response)."},{"line_number":2578,"context_line":" :param token_ttl: time-to-live of the global memcached cooperative token;"},{"line_number":2579,"context_line":" when all requests with tokens failed to fetch data from backend or set"},{"line_number":2580,"context_line":" data into memcached, ``token_ttl`` will expire the existing token and"}],"source_content_type":"text/x-python","patch_set":6,"id":"678894c0_4d7a96b0","line":2577,"updated":"2024-01-22 16:38:52.000000000","message":"This seems like an unfortunate mixing of concerns: the CooperativeToken is assuming the roles of fetching data from cache, or from backend (which immediately makes this class specific, at least in terms of naming, to those use cases that fetch data from backends) and for setting that data into memcache. I think this is why the type that is stored in infocache has needed to change - because this method has to be opinionated about how and where it sets backend data, and it has to be the same type everywhere it is set.\n\nI\u0027m not really sure why CooperativeToken needs to know anything about fetching from backends. The purpose of the token is to prevent/allow backend requests, so once we are making a backend request the token has done its job. OK, so we need to know when to release/delete the token, but I\u0027d still argue that the two things should be kept separate.\n\nThis isn\u0027t perfect, but a simpler (IMHO) alternative would be something like https://paste.openstack.org/show/bWWLUY02aZGi6FwdX81Q/ . This uses a context manager to abstract the getting *and* deleting the token, and have it periodically yield during a sequence of waits. The sequence is empty if the caller got the token.","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"ac2d2981d03b6f81c5446a7e97e07dc184902594","unresolved":true,"context_lines":[{"line_number":2574,"context_line":" :param cache_ttl: time-to-live of the data fetched from backend to set into"},{"line_number":2575,"context_line":" memcached."},{"line_number":2576,"context_line":" :param do_fetch_backend: a functools.partial object to be called to fetch"},{"line_number":2577,"context_line":" data from the backend; it needs to return a tuple of (data, response)."},{"line_number":2578,"context_line":" :param token_ttl: time-to-live of the global memcached cooperative token;"},{"line_number":2579,"context_line":" when all requests with tokens failed to fetch data from backend or set"},{"line_number":2580,"context_line":" data into memcached, ``token_ttl`` will expire the existing token and"}],"source_content_type":"text/x-python","patch_set":6,"id":"de258d1a_52baecdd","line":2577,"in_reply_to":"678894c0_4d7a96b0","updated":"2024-02-07 02:07:06.000000000","message":"thanks for the suggestion. when I started to think about a class abstraction, I thought about creating a \"pure\" cooperative token class without dealing with backend stuff, but then found out if we later apply this \"pure\" token class to difference use case paths, each path will have same code block to use those token classes. I want those different use cases to share those usage code block (line L34 to L51 in your code https://paste.openstack.org/show/bWWLUY02aZGi6FwdX81Q/), then later if we need change its implementation we only need change one place instead of several difference places, so I added those to function ``fetch_backend_with_token`` as well.\n\nAnother abstraction idea I had before: a \"pure cooperative token\" class and a \"fetch backend with token\" class, and the latter would call the former to implement the usage code block. what\u0027s your take on this idea?","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"8007916b6068566286569c73cf26d26c3d40b414","unresolved":false,"context_lines":[{"line_number":2574,"context_line":" :param cache_ttl: time-to-live of the data fetched from backend to set into"},{"line_number":2575,"context_line":" memcached."},{"line_number":2576,"context_line":" :param do_fetch_backend: a functools.partial object to be called to fetch"},{"line_number":2577,"context_line":" data from the backend; it needs to return a tuple of (data, response)."},{"line_number":2578,"context_line":" :param token_ttl: time-to-live of the global memcached cooperative token;"},{"line_number":2579,"context_line":" when all requests with tokens failed to fetch data from backend or set"},{"line_number":2580,"context_line":" data into memcached, ``token_ttl`` will expire the existing token and"}],"source_content_type":"text/x-python","patch_set":6,"id":"fc606195_8c520d5c","line":2577,"in_reply_to":"8df1f210_1e6f5654","updated":"2024-02-14 05:07:15.000000000","message":"I have adopted the name of ``CooperativeCachePopulator`` and the usage of context manager.","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"b0a0fc43e185708a736e7ab41f5aa910fefedd76","unresolved":true,"context_lines":[{"line_number":2574,"context_line":" :param cache_ttl: time-to-live of the data fetched from backend to set into"},{"line_number":2575,"context_line":" memcached."},{"line_number":2576,"context_line":" :param do_fetch_backend: a functools.partial object to be called to fetch"},{"line_number":2577,"context_line":" data from the backend; it needs to return a tuple of (data, response)."},{"line_number":2578,"context_line":" :param token_ttl: time-to-live of the global memcached cooperative token;"},{"line_number":2579,"context_line":" when all requests with tokens failed to fetch data from backend or set"},{"line_number":2580,"context_line":" data into memcached, ``token_ttl`` will expire the existing token and"}],"source_content_type":"text/x-python","patch_set":6,"id":"8df1f210_1e6f5654","line":2577,"in_reply_to":"de258d1a_52baecdd","updated":"2024-02-07 19:28:53.000000000","message":"all problems in computer science can be solved with another layer of abstraction ;)\n\njoking aside I think in this case a concrete focused base class that we can test independently of the backend fetching may be useful even it\u0027s only actual usage in-tree is to subclass it and extend it into a CooperativeCachePopulator\n\nBut Al may have been suggesting that the \"concrete base abstraction\" doesn\u0027t even need to be a class but a context manager. If the class implementation (that needs fetch_from_backend dependency injection) then uses the context manager (instead of some some parent class methods it inherits from) that might also look ok.\n\nBut I would imagine the general sentiment is \"build complex things out of smaller simpler things\" - there\u0027s definately going to be different ways to do that. For every N developers in the room you\u0027ll have N+m conflicting opinions?","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"fad23fcc9a8765b6d96b3b325398b7ba3746e17a","unresolved":true,"context_lines":[{"line_number":2581,"context_line":" make sure the new requests after ``token_ttl`` can continue to proceed."},{"line_number":2582,"context_line":" :param sleep_interval: sleep interval when waiting for the global"},{"line_number":2583,"context_line":" cooperative token, suggest to be set as average time spent on"},{"line_number":2584,"context_line":" ``do_fetch_backend``."},{"line_number":2585,"context_line":" :param num_tokens: the maximum number of tokens per each usage sesssion,"},{"line_number":2586,"context_line":" also the the maximum number of in-flight requests allowed to fetch data"},{"line_number":2587,"context_line":" from backend; default to be 3, which give redundancy when any request"}],"source_content_type":"text/x-python","patch_set":6,"id":"9f61d609_26903fd0","line":2584,"updated":"2024-01-22 16:38:52.000000000","message":"so this should be less than token_ttl?","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"8007916b6068566286569c73cf26d26c3d40b414","unresolved":false,"context_lines":[{"line_number":2581,"context_line":" make sure the new requests after ``token_ttl`` can continue to proceed."},{"line_number":2582,"context_line":" :param sleep_interval: sleep interval when waiting for the global"},{"line_number":2583,"context_line":" cooperative token, suggest to be set as average time spent on"},{"line_number":2584,"context_line":" ``do_fetch_backend``."},{"line_number":2585,"context_line":" :param num_tokens: the maximum number of tokens per each usage sesssion,"},{"line_number":2586,"context_line":" also the the maximum number of in-flight requests allowed to fetch data"},{"line_number":2587,"context_line":" from backend; default to be 3, which give redundancy when any request"}],"source_content_type":"text/x-python","patch_set":6,"id":"2a5358ee_ff526b0d","line":2584,"in_reply_to":"9f61d609_26903fd0","updated":"2024-02-14 05:07:15.000000000","message":"ACK. added comments.","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"fad23fcc9a8765b6d96b3b325398b7ba3746e17a","unresolved":true,"context_lines":[{"line_number":2627,"context_line":" # Try to get a cooperative token by using memcache increments."},{"line_number":2628,"context_line":" try:"},{"line_number":2629,"context_line":" total_requests \u003d self._memcache.incr("},{"line_number":2630,"context_line":" self._token_key, delta\u003d1, time\u003dself._token_ttl)"},{"line_number":2631,"context_line":" except swift.common.exceptions.MemcacheConnectionError:"},{"line_number":2632,"context_line":" self._set_cache_state \u003d \u0027error\u0027"},{"line_number":2633,"context_line":" data, self.backend_response \u003d self.do_fetch_backend()"}],"source_content_type":"text/x-python","patch_set":6,"id":"b3ca40dc_ce1e06e1","line":2630,"updated":"2024-01-22 16:38:52.000000000","message":"IIUC the first incr command that fails to find the existing key will add that key, which is when the ttl gets set. Subsequent incrs do not change the ttl. This is good, because otherwise the expiry time of the key would be forever pushed forwards in time.\n\nIt worries me a little that if a key ever became stuck in memcache (i.e. it does not expire when its ttl is passed) then we\u0027d end up being unable to make any backend requests. I remember that we did see this happen once (IIRC ~2 or 3 years ago, we had stale shard ranges stuck in memcache). This could be avoided by adding a time element to the token key.","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2627,"context_line":" # Try to get a cooperative token by using memcache increments."},{"line_number":2628,"context_line":" try:"},{"line_number":2629,"context_line":" total_requests \u003d self._memcache.incr("},{"line_number":2630,"context_line":" self._token_key, delta\u003d1, time\u003dself._token_ttl)"},{"line_number":2631,"context_line":" except swift.common.exceptions.MemcacheConnectionError:"},{"line_number":2632,"context_line":" self._set_cache_state \u003d \u0027error\u0027"},{"line_number":2633,"context_line":" data, self.backend_response \u003d self.do_fetch_backend()"}],"source_content_type":"text/x-python","patch_set":6,"id":"f0a5fecc_afa8c42c","line":2630,"in_reply_to":"44bbd830_16ebc093","updated":"2024-03-20 04:07:49.000000000","message":"Acknowledged","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"8007916b6068566286569c73cf26d26c3d40b414","unresolved":true,"context_lines":[{"line_number":2627,"context_line":" # Try to get a cooperative token by using memcache increments."},{"line_number":2628,"context_line":" try:"},{"line_number":2629,"context_line":" total_requests \u003d self._memcache.incr("},{"line_number":2630,"context_line":" self._token_key, delta\u003d1, time\u003dself._token_ttl)"},{"line_number":2631,"context_line":" except swift.common.exceptions.MemcacheConnectionError:"},{"line_number":2632,"context_line":" self._set_cache_state \u003d \u0027error\u0027"},{"line_number":2633,"context_line":" data, self.backend_response \u003d self.do_fetch_backend()"}],"source_content_type":"text/x-python","patch_set":6,"id":"e83e6972_796ab9e9","line":2630,"in_reply_to":"b3ca40dc_ce1e06e1","updated":"2024-02-14 05:07:15.000000000","message":"maybe that was an old bug with memcached 1.4 (~2 or 3 years ago)? but now prod has been upgraded with latest memcached 1.6. since ttl is such a frequently used feature from memcached, I would leave this problem to memcached developers. And worst of worst case, let\u0027s say the token_key never expires with the set ttl, all requests should still be able to proceed after sleep countdown has been exhausted.","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2627,"context_line":" # Try to get a cooperative token by using memcache increments."},{"line_number":2628,"context_line":" try:"},{"line_number":2629,"context_line":" total_requests \u003d self._memcache.incr("},{"line_number":2630,"context_line":" self._token_key, delta\u003d1, time\u003dself._token_ttl)"},{"line_number":2631,"context_line":" except swift.common.exceptions.MemcacheConnectionError:"},{"line_number":2632,"context_line":" self._set_cache_state \u003d \u0027error\u0027"},{"line_number":2633,"context_line":" data, self.backend_response \u003d self.do_fetch_backend()"}],"source_content_type":"text/x-python","patch_set":6,"id":"44bbd830_16ebc093","line":2630,"in_reply_to":"e83e6972_796ab9e9","updated":"2024-03-15 15:00:01.000000000","message":"I agree it\u0027s been sometime since we saw a stuck/stale memcache value and it was probably related to a memcache/mcrouter bug. I also imagine it would slightly more difficult to use increment if the value in memcache was a tuple of (time, counter). I agree the failure mode would degrade into all requests waiting for a token_ttl before doing the backend request - at which point I think we could argue *someone* \u0027ought to try and set the value they got from the backend in memcache!","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"fad23fcc9a8765b6d96b3b325398b7ba3746e17a","unresolved":true,"context_lines":[{"line_number":2637,"context_line":" # Acquired a cooperative token, go fetching data from backend"},{"line_number":2638,"context_line":" data, self.backend_response \u003d self.do_fetch_backend()"},{"line_number":2639,"context_line":" if not data:"},{"line_number":2640,"context_line":" return None"},{"line_number":2641,"context_line":" if self._infocache:"},{"line_number":2642,"context_line":" self._infocache[self._cache_key] \u003d data"},{"line_number":2643,"context_line":" try:"}],"source_content_type":"text/x-python","patch_set":6,"id":"36a08114_455347d1","line":2640,"updated":"2024-01-22 16:38:52.000000000","message":"As it is written. I think this restricts backend requests to 3 per token_ttl rather than 3 at any moment in time.\n\nThere are N other requests waiting token_ttl seconds assuming this one is going to succeed. If it has failed, shouldn\u0027t we reset the token and get another backend request in flight? Of course, if we reset the token after the first backend request fails then we could end up with 5 backend requests in flight 😞\n\nWe could consider using 3 independent cooperative tokens, each allowing only one backend request, and randomly choosing one of them for each inbound request. That way we could reset a token as soon as the backend request fails, knowing that at most one new backend request would be emitted. We also do a little bit of load balancing across memcache keys.","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"8007916b6068566286569c73cf26d26c3d40b414","unresolved":true,"context_lines":[{"line_number":2637,"context_line":" # Acquired a cooperative token, go fetching data from backend"},{"line_number":2638,"context_line":" data, self.backend_response \u003d self.do_fetch_backend()"},{"line_number":2639,"context_line":" if not data:"},{"line_number":2640,"context_line":" return None"},{"line_number":2641,"context_line":" if self._infocache:"},{"line_number":2642,"context_line":" self._infocache[self._cache_key] \u003d data"},{"line_number":2643,"context_line":" try:"}],"source_content_type":"text/x-python","patch_set":6,"id":"ee6f071b_ed36c291","line":2640,"in_reply_to":"36a08114_455347d1","updated":"2024-02-14 05:07:15.000000000","message":"Good question! Hope my below comments in the code explains:\n ```The original ghetto lock only defines one token for usage, while this\n cooperative token mechanism uses ``num_tokens`` to define the maximum\n number of tokens during one usage session, default to be 3. This is used to\n increase fault tolerance in the distributed environment, when one caller\n process with token hangs or exits, any other requests with token still can\n set new fetched data into memcache and finish the whole usage session. In\n very rare case, when all 3 callers with tokens fails, the existing usage\n session ends after ``token_ttl`` period is reached and the internal key is\n expired, then all pending requests which have no token will exit waiting\n and fall back to query the backend (the situation without cooperative\n token); the new requests which cache misses and need querying backend\n after ``token_ttl`` will start a new round of cooperation session.```\n\nif we set proper value for ``token_ttl`` to be much larger than the typical backend retrieval time, we should be able to avoid this situation: the first usage session ends, but requests with token have finished yet, and then second usage session enabled new requests with tokens. Worst case, let\u0027s say this happens, then two usage sessions will enable 6 in-flight requests to fetch backend; this is still okay and much much better, compared to w/o cooperative token that tens of thousands in-flight requests all going to arrive at backend.\n\nand if we want to have only one request going to fetch the data from backend and populate the cache item, we can just config ``num_tokens\u003d1``.","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2637,"context_line":" # Acquired a cooperative token, go fetching data from backend"},{"line_number":2638,"context_line":" data, self.backend_response \u003d self.do_fetch_backend()"},{"line_number":2639,"context_line":" if not data:"},{"line_number":2640,"context_line":" return None"},{"line_number":2641,"context_line":" if self._infocache:"},{"line_number":2642,"context_line":" self._infocache[self._cache_key] \u003d data"},{"line_number":2643,"context_line":" try:"}],"source_content_type":"text/x-python","patch_set":6,"id":"a6806c1a_31d5b551","line":2640,"in_reply_to":"ac8691d6_d8778739","updated":"2024-03-20 04:07:49.000000000","message":"Had added related description into the comments.","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2637,"context_line":" # Acquired a cooperative token, go fetching data from backend"},{"line_number":2638,"context_line":" data, self.backend_response \u003d self.do_fetch_backend()"},{"line_number":2639,"context_line":" if not data:"},{"line_number":2640,"context_line":" return None"},{"line_number":2641,"context_line":" if self._infocache:"},{"line_number":2642,"context_line":" self._infocache[self._cache_key] \u003d data"},{"line_number":2643,"context_line":" try:"}],"source_content_type":"text/x-python","patch_set":6,"id":"ac8691d6_d8778739","line":2640,"in_reply_to":"ee6f071b_ed36c291","updated":"2024-03-15 15:00:01.000000000","message":"i think if we always delete the counter when someone finishes it does open the window for more requests to be in-flight. I agree any kind of limit however loosely enforced is better than no-limit.\n\nBut we need to be precise in describin *exactly* how our implementation behaves and does or does not guarantee even if we discover in practice there\u0027s a lot of benifit of a \"best effort\" limit despite it being imprecise in implementation.","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"fad23fcc9a8765b6d96b3b325398b7ba3746e17a","unresolved":true,"context_lines":[{"line_number":2656,"context_line":" # will fetch data form the backend servers and update them in cache,"},{"line_number":2657,"context_line":" # let\u0027s wait for them to finish with limited retires."},{"line_number":2658,"context_line":" retries_time_window \u003d self._token_ttl"},{"line_number":2659,"context_line":" while retries_time_window \u003e 0:"},{"line_number":2660,"context_line":" eventlet.sleep(self._token_sleep_interval)"},{"line_number":2661,"context_line":" retries_time_window -\u003d self._token_sleep_interval"},{"line_number":2662,"context_line":" cache_value \u003d self._memcache.get("}],"source_content_type":"text/x-python","patch_set":6,"id":"02eeaea0_98b69b3a","line":2659,"updated":"2024-01-22 16:38:52.000000000","message":"ok, IIUC each thread will retry until it either hits memcache or token_ttl has expired since the thread reached this point. That\u0027s good: we don\u0027t get an even worse thundering herd if the token_ttl expires before any request has succeeded in setting memcache. But the thundering herd is just deferred by token_ttl.","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"8007916b6068566286569c73cf26d26c3d40b414","unresolved":false,"context_lines":[{"line_number":2656,"context_line":" # will fetch data form the backend servers and update them in cache,"},{"line_number":2657,"context_line":" # let\u0027s wait for them to finish with limited retires."},{"line_number":2658,"context_line":" retries_time_window \u003d self._token_ttl"},{"line_number":2659,"context_line":" while retries_time_window \u003e 0:"},{"line_number":2660,"context_line":" eventlet.sleep(self._token_sleep_interval)"},{"line_number":2661,"context_line":" retries_time_window -\u003d self._token_sleep_interval"},{"line_number":2662,"context_line":" cache_value \u003d self._memcache.get("}],"source_content_type":"text/x-python","patch_set":6,"id":"453f48ba_9cc1a6f1","line":2659,"in_reply_to":"02eeaea0_98b69b3a","updated":"2024-02-14 05:07:15.000000000","message":"Acknowledged","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"fad23fcc9a8765b6d96b3b325398b7ba3746e17a","unresolved":true,"context_lines":[{"line_number":2666,"context_line":" self._req_served_from_cache \u003d True"},{"line_number":2667,"context_line":" return cache_value"},{"line_number":2668,"context_line":" # cache miss."},{"line_number":2669,"context_line":" continue"},{"line_number":2670,"context_line":""},{"line_number":2671,"context_line":" # Still no cache data fetched, do the slow fetch and return the data."},{"line_number":2672,"context_line":" data, self.backend_response \u003d self.do_fetch_backend()"}],"source_content_type":"text/x-python","patch_set":6,"id":"4be72c90_cbaec7a2","line":2669,"updated":"2024-01-22 16:38:52.000000000","message":"``continue`` isn\u0027t necessary","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"8007916b6068566286569c73cf26d26c3d40b414","unresolved":false,"context_lines":[{"line_number":2666,"context_line":" self._req_served_from_cache \u003d True"},{"line_number":2667,"context_line":" return cache_value"},{"line_number":2668,"context_line":" # cache miss."},{"line_number":2669,"context_line":" continue"},{"line_number":2670,"context_line":""},{"line_number":2671,"context_line":" # Still no cache data fetched, do the slow fetch and return the data."},{"line_number":2672,"context_line":" data, self.backend_response \u003d self.do_fetch_backend()"}],"source_content_type":"text/x-python","patch_set":6,"id":"9b74a5f3_a92ab075","line":2669,"in_reply_to":"4be72c90_cbaec7a2","updated":"2024-02-14 05:07:15.000000000","message":"Acknowledged","commit_id":"c9c7a62cd01298a50ba35d9655631dd3496f52a4"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"924950a76e5d1ee1e04f9800ca3820f1a68a3d91","unresolved":true,"context_lines":[{"line_number":2615,"context_line":" from backend; default to be 3, which give redundancy when any request"},{"line_number":2616,"context_line":" with token fails to fetch data from the backend or fails to set new"},{"line_number":2617,"context_line":" data into memcached."},{"line_number":2618,"context_line":" \"\"\""},{"line_number":2619,"context_line":""},{"line_number":2620,"context_line":" def __init__(self, infocache, memcache,"},{"line_number":2621,"context_line":" cache_key, cache_ttl, do_fetch_backend,"}],"source_content_type":"text/x-python","patch_set":8,"id":"5e17aecb_38a84731","line":2618,"updated":"2024-02-15 12:58:23.000000000","message":"AFAICT this is a *class* simply so it can hold some state thatwould otherwise need to be returned from the *single* method. \n\nI\u0027m not sure how I feel about that: A class with a single method seems like it could be a function? could we explore other options for returning the state, and just have a function? see comments below...","commit_id":"f3f3e1fe92153ff527ac4ce9ff49ade7be1e85c4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42942d7218e212d05f5f4a19dd84f057c97bfb86","unresolved":false,"context_lines":[{"line_number":2615,"context_line":" from backend; default to be 3, which give redundancy when any request"},{"line_number":2616,"context_line":" with token fails to fetch data from the backend or fails to set new"},{"line_number":2617,"context_line":" data into memcached."},{"line_number":2618,"context_line":" \"\"\""},{"line_number":2619,"context_line":""},{"line_number":2620,"context_line":" def __init__(self, infocache, memcache,"},{"line_number":2621,"context_line":" cache_key, cache_ttl, do_fetch_backend,"}],"source_content_type":"text/x-python","patch_set":8,"id":"482a87b0_e9457858","line":2618,"in_reply_to":"5e17aecb_38a84731","updated":"2024-02-20 04:46:55.000000000","message":"ACK. I had some helper functions defined in this class before. since they were all removed, there is only one function left. I think it makes sense to just use a function instead of a class.","commit_id":"f3f3e1fe92153ff527ac4ce9ff49ade7be1e85c4"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"924950a76e5d1ee1e04f9800ca3820f1a68a3d91","unresolved":true,"context_lines":[{"line_number":2632,"context_line":" # the status of cache set operations used internally."},{"line_number":2633,"context_line":" self.set_cache_state \u003d None"},{"line_number":2634,"context_line":" # indicates if this request is served out of Memcached."},{"line_number":2635,"context_line":" self.req_served_from_cache \u003d False"},{"line_number":2636,"context_line":" # the response object returned by ``do_fetch_backend``."},{"line_number":2637,"context_line":" self.backend_response \u003d None"},{"line_number":2638,"context_line":""}],"source_content_type":"text/x-python","patch_set":8,"id":"a8dc759c_434f3b28","line":2635,"updated":"2024-02-15 12:58:23.000000000","message":"this seems to be equivalent to ``set_cache_state \u003d\u003d None`` so may not be necessary","commit_id":"f3f3e1fe92153ff527ac4ce9ff49ade7be1e85c4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42942d7218e212d05f5f4a19dd84f057c97bfb86","unresolved":false,"context_lines":[{"line_number":2632,"context_line":" # the status of cache set operations used internally."},{"line_number":2633,"context_line":" self.set_cache_state \u003d None"},{"line_number":2634,"context_line":" # indicates if this request is served out of Memcached."},{"line_number":2635,"context_line":" self.req_served_from_cache \u003d False"},{"line_number":2636,"context_line":" # the response object returned by ``do_fetch_backend``."},{"line_number":2637,"context_line":" self.backend_response \u003d None"},{"line_number":2638,"context_line":""}],"source_content_type":"text/x-python","patch_set":8,"id":"1d7e19dc_baafda18","line":2635,"in_reply_to":"a8dc759c_434f3b28","updated":"2024-02-20 04:46:55.000000000","message":"Acknowledged","commit_id":"f3f3e1fe92153ff527ac4ce9ff49ade7be1e85c4"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"924950a76e5d1ee1e04f9800ca3820f1a68a3d91","unresolved":true,"context_lines":[{"line_number":2680,"context_line":" self._cache_key, data,"},{"line_number":2681,"context_line":" time\u003dself._cache_ttl, raise_on_error\u003dTrue)"},{"line_number":2682,"context_line":" except swift.common.exceptions.MemcacheConnectionError:"},{"line_number":2683,"context_line":" self._set_cache_state \u003d \u0027set_error\u0027"},{"line_number":2684,"context_line":" else:"},{"line_number":2685,"context_line":" self._set_cache_state \u003d \u0027set\u0027"},{"line_number":2686,"context_line":" return data"}],"source_content_type":"text/x-python","patch_set":8,"id":"880dae7d_30db1ee0","line":2683,"updated":"2024-02-15 12:58:23.000000000","message":"instead we could raise the exception and have the caller treat it as \u0027set_error\u0027","commit_id":"f3f3e1fe92153ff527ac4ce9ff49ade7be1e85c4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42942d7218e212d05f5f4a19dd84f057c97bfb86","unresolved":false,"context_lines":[{"line_number":2680,"context_line":" self._cache_key, data,"},{"line_number":2681,"context_line":" time\u003dself._cache_ttl, raise_on_error\u003dTrue)"},{"line_number":2682,"context_line":" except swift.common.exceptions.MemcacheConnectionError:"},{"line_number":2683,"context_line":" self._set_cache_state \u003d \u0027set_error\u0027"},{"line_number":2684,"context_line":" else:"},{"line_number":2685,"context_line":" self._set_cache_state \u003d \u0027set\u0027"},{"line_number":2686,"context_line":" return data"}],"source_content_type":"text/x-python","patch_set":8,"id":"d939fb7c_c409ae8d","line":2683,"in_reply_to":"880dae7d_30db1ee0","updated":"2024-02-20 04:46:55.000000000","message":"Acknowledged","commit_id":"f3f3e1fe92153ff527ac4ce9ff49ade7be1e85c4"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"924950a76e5d1ee1e04f9800ca3820f1a68a3d91","unresolved":true,"context_lines":[{"line_number":2683,"context_line":" self._set_cache_state \u003d \u0027set_error\u0027"},{"line_number":2684,"context_line":" else:"},{"line_number":2685,"context_line":" self._set_cache_state \u003d \u0027set\u0027"},{"line_number":2686,"context_line":" return data"},{"line_number":2687,"context_line":""},{"line_number":2688,"context_line":""},{"line_number":2689,"context_line":"def read_conf_dir(parser, conf_dir):"}],"source_content_type":"text/x-python","patch_set":8,"id":"3aa41789_efd9953b","line":2686,"updated":"2024-02-15 12:58:23.000000000","message":"``data`` has been written into the caller-supplied ``infocache`` dict using the caller supplied ``cache_key``, so maybe we don\u0027t need to return it? Just mandate that caller passes ``infocache``.\n\nThen I think the method could return just ``response``, or ``None`` if read from cache ??","commit_id":"f3f3e1fe92153ff527ac4ce9ff49ade7be1e85c4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2683,"context_line":" self._set_cache_state \u003d \u0027set_error\u0027"},{"line_number":2684,"context_line":" else:"},{"line_number":2685,"context_line":" self._set_cache_state \u003d \u0027set\u0027"},{"line_number":2686,"context_line":" return data"},{"line_number":2687,"context_line":""},{"line_number":2688,"context_line":""},{"line_number":2689,"context_line":"def read_conf_dir(parser, conf_dir):"}],"source_content_type":"text/x-python","patch_set":8,"id":"12e367d2_c2755ced","line":2686,"in_reply_to":"0b93bee6_87fe2514","updated":"2024-03-20 04:07:49.000000000","message":"agreed, after implementation of both a class and function based interface, I feel a class is better too. Even though it will only has one main function, it will help encapsulate those logging/states, exception handling, also various internal states.","commit_id":"f3f3e1fe92153ff527ac4ce9ff49ade7be1e85c4"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42942d7218e212d05f5f4a19dd84f057c97bfb86","unresolved":true,"context_lines":[{"line_number":2683,"context_line":" self._set_cache_state \u003d \u0027set_error\u0027"},{"line_number":2684,"context_line":" else:"},{"line_number":2685,"context_line":" self._set_cache_state \u003d \u0027set\u0027"},{"line_number":2686,"context_line":" return data"},{"line_number":2687,"context_line":""},{"line_number":2688,"context_line":""},{"line_number":2689,"context_line":"def read_conf_dir(parser, conf_dir):"}],"source_content_type":"text/x-python","patch_set":8,"id":"ba8980b0_14493e4b","line":2686,"in_reply_to":"3aa41789_efd9953b","updated":"2024-02-20 04:46:55.000000000","message":"I feel hiding data within ``infocache`` is a little hacky. I refactor the new function ``populate_cache_with_cooperative_token`` to return a tuple of (data, backend_response), which is as same as the functools.partial function ``do_fetch_backend``.","commit_id":"f3f3e1fe92153ff527ac4ce9ff49ade7be1e85c4"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2683,"context_line":" self._set_cache_state \u003d \u0027set_error\u0027"},{"line_number":2684,"context_line":" else:"},{"line_number":2685,"context_line":" self._set_cache_state \u003d \u0027set\u0027"},{"line_number":2686,"context_line":" return data"},{"line_number":2687,"context_line":""},{"line_number":2688,"context_line":""},{"line_number":2689,"context_line":"def read_conf_dir(parser, conf_dir):"}],"source_content_type":"text/x-python","patch_set":8,"id":"0b93bee6_87fe2514","line":2686,"in_reply_to":"ba8980b0_14493e4b","updated":"2024-03-15 15:00:01.000000000","message":"there\u0027s SO much complexity to \"hide\" and \"hacky\" interfaces to support all flexibility was to provide. Mostly all callers are going to want is a way to write a method that makes a backend request and then wrap it in some well tested utils helper that makes it almost globally serialized but still robust.\n\nI keep thinking we\u0027re going to want a class - that takes care of all the logging and stats for you by default - but you can \"override\" if you want your logging and stats to be more sepcific.","commit_id":"f3f3e1fe92153ff527ac4ce9ff49ade7be1e85c4"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2545,"context_line":" face: given a cache item that is popular and difficult to recreate, in the"},{"line_number":2546,"context_line":" event of cache misses, users could end up with hundreds (or thousands) of"},{"line_number":2547,"context_line":" processes slamming the backend database at the same time in an attempt to"},{"line_number":2548,"context_line":" refill the same cache content."},{"line_number":2549,"context_line":""},{"line_number":2550,"context_line":" Here is the way how cooperative token works. When lots of in-flight callers"},{"line_number":2551,"context_line":" try to get the cached item specified by key from memcache and get cache"}],"source_content_type":"text/x-python","patch_set":10,"id":"3f978248_53f1a0a6","line":2548,"updated":"2024-03-15 15:00:01.000000000","message":"this seems to very closely restate the commit message\n\nthis doc-string is kind of long - it might be able to skip some of the \"why\" and focus on the \"how\" - mostly \"how do callers user this context manager\"; assuming they sort of already think they want it.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2545,"context_line":" face: given a cache item that is popular and difficult to recreate, in the"},{"line_number":2546,"context_line":" event of cache misses, users could end up with hundreds (or thousands) of"},{"line_number":2547,"context_line":" processes slamming the backend database at the same time in an attempt to"},{"line_number":2548,"context_line":" refill the same cache content."},{"line_number":2549,"context_line":""},{"line_number":2550,"context_line":" Here is the way how cooperative token works. When lots of in-flight callers"},{"line_number":2551,"context_line":" try to get the cached item specified by key from memcache and get cache"}],"source_content_type":"text/x-python","patch_set":10,"id":"8804f09f_c6d292d1","line":2548,"in_reply_to":"3f978248_53f1a0a6","updated":"2024-03-20 04:07:49.000000000","message":"Acknowledged","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"4251cfa176a1748522e900fb9664a6ae2dac966e","unresolved":true,"context_lines":[{"line_number":2563,"context_line":" :param token_key: the memcache key used to create the cooperative tokens"},{"line_number":2564,"context_line":" :param token_ttl: the token key time-to-live"},{"line_number":2565,"context_line":" :param num_tokens: the maximum number of tokens during one usage session"},{"line_number":2566,"context_line":" :param token_sleep_interval: sleep interval when waiting for the token"},{"line_number":2567,"context_line":""},{"line_number":2568,"context_line":" :returns: an empty list if cooperative token acquired, otherwise a"},{"line_number":2569,"context_line":" sleeper() generator."}],"source_content_type":"text/x-python","patch_set":10,"id":"366b2c00_2291980a","line":2566,"range":{"start_line":2566,"start_character":11,"end_line":2566,"end_character":31},"updated":"2024-03-15 13:35:53.000000000","message":"this might be better named ``retry_interval``","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"a9bca8d92d206bd59d7923b6a00fc394e6adbac8","unresolved":false,"context_lines":[{"line_number":2563,"context_line":" :param token_key: the memcache key used to create the cooperative tokens"},{"line_number":2564,"context_line":" :param token_ttl: the token key time-to-live"},{"line_number":2565,"context_line":" :param num_tokens: the maximum number of tokens during one usage session"},{"line_number":2566,"context_line":" :param token_sleep_interval: sleep interval when waiting for the token"},{"line_number":2567,"context_line":""},{"line_number":2568,"context_line":" :returns: an empty list if cooperative token acquired, otherwise a"},{"line_number":2569,"context_line":" sleeper() generator."}],"source_content_type":"text/x-python","patch_set":10,"id":"05f4aec7_cb4dd1f6","line":2566,"range":{"start_line":2566,"start_character":11,"end_line":2566,"end_character":31},"in_reply_to":"366b2c00_2291980a","updated":"2024-03-20 05:06:51.000000000","message":"Done","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"4251cfa176a1748522e900fb9664a6ae2dac966e","unresolved":true,"context_lines":[{"line_number":2565,"context_line":" :param num_tokens: the maximum number of tokens during one usage session"},{"line_number":2566,"context_line":" :param token_sleep_interval: sleep interval when waiting for the token"},{"line_number":2567,"context_line":""},{"line_number":2568,"context_line":" :returns: an empty list if cooperative token acquired, otherwise a"},{"line_number":2569,"context_line":" sleeper() generator."},{"line_number":2570,"context_line":" \"\"\""},{"line_number":2571,"context_line":" def sleeper():"},{"line_number":2572,"context_line":" retries \u003d 0"}],"source_content_type":"text/x-python","patch_set":10,"id":"7891c8f4_7aea09b7","line":2569,"range":{"start_line":2568,"start_character":14,"end_line":2569,"end_character":27},"updated":"2024-03-15 13:35:53.000000000","message":"this should describe the significance of the returned object:\n\n```\nAn iterable. If the token has been granted then this iterable will yield no items. If the token has not been granted then this iterable will yield every ``retry_interval`` until the token expires, giving the caller an opportunity to retry any pending operations. The yielded value is a count of the number of times the iterable has yielded. \n```","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2566,"context_line":" :param token_sleep_interval: sleep interval when waiting for the token"},{"line_number":2567,"context_line":""},{"line_number":2568,"context_line":" :returns: an empty list if cooperative token acquired, otherwise a"},{"line_number":2569,"context_line":" sleeper() generator."},{"line_number":2570,"context_line":" \"\"\""},{"line_number":2571,"context_line":" def sleeper():"},{"line_number":2572,"context_line":" retries \u003d 0"}],"source_content_type":"text/x-python","patch_set":10,"id":"6be11ab5_59de6d94","line":2569,"updated":"2024-03-15 15:00:01.000000000","message":"wtf? so it\u0027s going to be an *iterable* - which will have 0 or num_retries elements. And when you iterate the rv; it might sleep. Interesting interface!","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2566,"context_line":" :param token_sleep_interval: sleep interval when waiting for the token"},{"line_number":2567,"context_line":""},{"line_number":2568,"context_line":" :returns: an empty list if cooperative token acquired, otherwise a"},{"line_number":2569,"context_line":" sleeper() generator."},{"line_number":2570,"context_line":" \"\"\""},{"line_number":2571,"context_line":" def sleeper():"},{"line_number":2572,"context_line":" retries \u003d 0"}],"source_content_type":"text/x-python","patch_set":10,"id":"6a016db5_8d13f98e","line":2569,"in_reply_to":"6be11ab5_59de6d94","updated":"2024-03-20 04:07:49.000000000","message":"this iterable interface has been rolled back.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"9816d414da03efe9e797d1c07e9da26512571715","unresolved":false,"context_lines":[{"line_number":2565,"context_line":" :param num_tokens: the maximum number of tokens during one usage session"},{"line_number":2566,"context_line":" :param token_sleep_interval: sleep interval when waiting for the token"},{"line_number":2567,"context_line":""},{"line_number":2568,"context_line":" :returns: an empty list if cooperative token acquired, otherwise a"},{"line_number":2569,"context_line":" sleeper() generator."},{"line_number":2570,"context_line":" \"\"\""},{"line_number":2571,"context_line":" def sleeper():"},{"line_number":2572,"context_line":" retries \u003d 0"}],"source_content_type":"text/x-python","patch_set":10,"id":"e6a47e5a_64960f0a","line":2569,"range":{"start_line":2568,"start_character":14,"end_line":2569,"end_character":27},"in_reply_to":"7891c8f4_7aea09b7","updated":"2024-09-25 21:54:01.000000000","message":"comment on previous old implementation.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2573,"context_line":" retries_time_window \u003d token_ttl"},{"line_number":2574,"context_line":" while retries_time_window \u003e 0:"},{"line_number":2575,"context_line":" eventlet.sleep(token_sleep_interval)"},{"line_number":2576,"context_line":" retries_time_window -\u003d token_sleep_interval"},{"line_number":2577,"context_line":" retries +\u003d 1"},{"line_number":2578,"context_line":" yield retries"},{"line_number":2579,"context_line":""}],"source_content_type":"text/x-python","patch_set":10,"id":"af2ba588_0f6d17a9","line":2576,"updated":"2024-03-15 15:00:01.000000000","message":"given that eventlet.sleep *may* sleep \"longer\" than token_sleep_interval it *might* be more accurate to use time.time() to calculate how much sleep we should actually do.\n\n```\ncutoff_time \u003d time.time() + token_ttl\n```\n\nI\u0027m not sure I understand the significance of `while retries_time_window` instead of `for i in range(token_ttl / token_sleep_interval)` or `while time.time() \u003c cutoff_time`","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"a9bca8d92d206bd59d7923b6a00fc394e6adbac8","unresolved":false,"context_lines":[{"line_number":2573,"context_line":" retries_time_window \u003d token_ttl"},{"line_number":2574,"context_line":" while retries_time_window \u003e 0:"},{"line_number":2575,"context_line":" eventlet.sleep(token_sleep_interval)"},{"line_number":2576,"context_line":" retries_time_window -\u003d token_sleep_interval"},{"line_number":2577,"context_line":" retries +\u003d 1"},{"line_number":2578,"context_line":" yield retries"},{"line_number":2579,"context_line":""}],"source_content_type":"text/x-python","patch_set":10,"id":"fb789569_1196f1d4","line":2576,"in_reply_to":"af2ba588_0f6d17a9","updated":"2024-03-20 05:06:51.000000000","message":"Done","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"4251cfa176a1748522e900fb9664a6ae2dac966e","unresolved":true,"context_lines":[{"line_number":2583,"context_line":" # Acquired a cooperative token"},{"line_number":2584,"context_line":" yield []"},{"line_number":2585,"context_line":" # Remove all cooperative tokens related to this usage."},{"line_number":2586,"context_line":" memcache.delete(token_key)"},{"line_number":2587,"context_line":" else:"},{"line_number":2588,"context_line":" # No token acquired, will do intermittent and limited sleep"},{"line_number":2589,"context_line":" yield sleeper()"}],"source_content_type":"text/x-python","patch_set":10,"id":"0646197f_dedf6f6e","line":2586,"updated":"2024-03-15 13:35:53.000000000","message":"I\u0027m not sure where/how best to document that the caller that \"gets the token\" only \"has the token\" while they are in the context manager context. Something like this would NOT be appropriate usage:\n\n```\nwith get_cooperative_token_or_sleep() as sleeper:\n granted \u003d True\n for tick in sleeper:\n granted \u003d False\nif granted:\n # oops, I dedented and the memcache token has been deleted\n```","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2583,"context_line":" # Acquired a cooperative token"},{"line_number":2584,"context_line":" yield []"},{"line_number":2585,"context_line":" # Remove all cooperative tokens related to this usage."},{"line_number":2586,"context_line":" memcache.delete(token_key)"},{"line_number":2587,"context_line":" else:"},{"line_number":2588,"context_line":" # No token acquired, will do intermittent and limited sleep"},{"line_number":2589,"context_line":" yield sleeper()"}],"source_content_type":"text/x-python","patch_set":10,"id":"f769475c_a9871455","line":2586,"updated":"2024-03-15 15:00:01.000000000","message":"i\u0027m having these flashbacks to race conditions with a file-lock where trying to delete the lock always creates a problem. I\u0027m not sure if memcache\u0027s single-threaded-ness makes this ok.\n\nCertainly it\u0027s the case that if there\u0027s num_token-1 other waiters fetching from the backend and some *new* waiter shows up right after this delete they\u0027ll think they\u0027re the FIRST waiter instead of recognizing the other pending waiters.\n\nI recognize there\u0027s some edge-case related to decrement that could results in a negaive number of waiters; I don\u0027t know if there\u0027s \"floor\" decreemnt that would keep the counter at at-least 0; but I\u0027m skeptical that deleting the key is obviously the right thing to do - I certainly don\u0027t have any reservations about the \"wasted space\" of this key with value 0 existing in memcache even if the shard-ranges are populated, maybe whatever race conditions might exist when the key is deleted are the same as a startup scenario where the key hasn\u0027t been created yet; but it *feels* like the \"contention\" scenario at the edge is exactly the time we want the counter to be most accurate and deleting it throws away potentially useful context.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2583,"context_line":" # Acquired a cooperative token"},{"line_number":2584,"context_line":" yield []"},{"line_number":2585,"context_line":" # Remove all cooperative tokens related to this usage."},{"line_number":2586,"context_line":" memcache.delete(token_key)"},{"line_number":2587,"context_line":" else:"},{"line_number":2588,"context_line":" # No token acquired, will do intermittent and limited sleep"},{"line_number":2589,"context_line":" yield sleeper()"}],"source_content_type":"text/x-python","patch_set":10,"id":"7ec510c8_241f0397","line":2586,"in_reply_to":"0646197f_dedf6f6e","updated":"2024-03-20 04:07:49.000000000","message":"Acknowledged","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"92e4d55d3301aa03164d741370e14748e95eedeb","unresolved":false,"context_lines":[{"line_number":2583,"context_line":" # Acquired a cooperative token"},{"line_number":2584,"context_line":" yield []"},{"line_number":2585,"context_line":" # Remove all cooperative tokens related to this usage."},{"line_number":2586,"context_line":" memcache.delete(token_key)"},{"line_number":2587,"context_line":" else:"},{"line_number":2588,"context_line":" # No token acquired, will do intermittent and limited sleep"},{"line_number":2589,"context_line":" yield sleeper()"}],"source_content_type":"text/x-python","patch_set":10,"id":"83db2a11_ed8a6f7a","line":2586,"in_reply_to":"f769475c_a9871455","updated":"2024-03-26 15:06:38.000000000","message":"Done","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"366003f002afd99bdd161a8b0209b7b0bc84ca1d","unresolved":true,"context_lines":[{"line_number":2589,"context_line":" yield sleeper()"},{"line_number":2590,"context_line":""},{"line_number":2591,"context_line":""},{"line_number":2592,"context_line":"def populate_cache_with_cooperative_token(infocache, memcache, cache_key,"},{"line_number":2593,"context_line":" cache_ttl, do_fetch_backend,"},{"line_number":2594,"context_line":" token_ttl, sleep_interval,"},{"line_number":2595,"context_line":" num_tokens\u003d3):"}],"source_content_type":"text/x-python","patch_set":10,"id":"7c3f16a4_be2cb29f","line":2592,"updated":"2024-03-15 16:54:00.000000000","message":"I quite like get_cooperative_token_or_sleep but I\u0027m not so keen on populate_cache_with_cooperative_token. I tried to see how things worked out without populate_cache_with_cooperative_token:\n\nhttps://review.opendev.org/c/openstack/swift/+/913425?usp\u003demail","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"50192330b0eaa55928bcecef326b623b9faae22f","unresolved":false,"context_lines":[{"line_number":2589,"context_line":" yield sleeper()"},{"line_number":2590,"context_line":""},{"line_number":2591,"context_line":""},{"line_number":2592,"context_line":"def populate_cache_with_cooperative_token(infocache, memcache, cache_key,"},{"line_number":2593,"context_line":" cache_ttl, do_fetch_backend,"},{"line_number":2594,"context_line":" token_ttl, sleep_interval,"},{"line_number":2595,"context_line":" num_tokens\u003d3):"}],"source_content_type":"text/x-python","patch_set":10,"id":"fe4ef526_48e38c46","line":2592,"in_reply_to":"7c3f16a4_be2cb29f","updated":"2024-09-25 16:09:59.000000000","message":"didn\u0027t consider this path per offline discussions.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2590,"context_line":""},{"line_number":2591,"context_line":""},{"line_number":2592,"context_line":"def populate_cache_with_cooperative_token(infocache, memcache, cache_key,"},{"line_number":2593,"context_line":" cache_ttl, do_fetch_backend,"},{"line_number":2594,"context_line":" token_ttl, sleep_interval,"},{"line_number":2595,"context_line":" num_tokens\u003d3):"},{"line_number":2596,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":10,"id":"42906a73_a17485c6","line":2593,"updated":"2024-03-15 15:00:01.000000000","message":"I\u0027m surprised there\u0027s not also paramaters for some \"normalize_backend_result_for_memcache\" and/or \"normalize_memcache_value_for_infocache\" functions","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2590,"context_line":""},{"line_number":2591,"context_line":""},{"line_number":2592,"context_line":"def populate_cache_with_cooperative_token(infocache, memcache, cache_key,"},{"line_number":2593,"context_line":" cache_ttl, do_fetch_backend,"},{"line_number":2594,"context_line":" token_ttl, sleep_interval,"},{"line_number":2595,"context_line":" num_tokens\u003d3):"},{"line_number":2596,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":10,"id":"3a85d9f4_a2e1f01f","line":2593,"in_reply_to":"42906a73_a17485c6","updated":"2024-03-20 04:07:49.000000000","message":"good idea! will add those encoder/decoder kind of parameters.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"4251cfa176a1748522e900fb9664a6ae2dac966e","unresolved":true,"context_lines":[{"line_number":2591,"context_line":""},{"line_number":2592,"context_line":"def populate_cache_with_cooperative_token(infocache, memcache, cache_key,"},{"line_number":2593,"context_line":" cache_ttl, do_fetch_backend,"},{"line_number":2594,"context_line":" token_ttl, sleep_interval,"},{"line_number":2595,"context_line":" num_tokens\u003d3):"},{"line_number":2596,"context_line":" \"\"\""},{"line_number":2597,"context_line":" Coalescing all requests into backend into a few with cooperative token."},{"line_number":2598,"context_line":" Calling this function indicates that caller experiences cache miss when"}],"source_content_type":"text/x-python","patch_set":10,"id":"3f2810ec_c47252c6","line":2595,"range":{"start_line":2594,"start_character":53,"end_line":2595,"end_character":55},"updated":"2024-03-15 13:35:53.000000000","message":"these could be in same order as get_cooperative_token_or_sleep","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"9816d414da03efe9e797d1c07e9da26512571715","unresolved":false,"context_lines":[{"line_number":2591,"context_line":""},{"line_number":2592,"context_line":"def populate_cache_with_cooperative_token(infocache, memcache, cache_key,"},{"line_number":2593,"context_line":" cache_ttl, do_fetch_backend,"},{"line_number":2594,"context_line":" token_ttl, sleep_interval,"},{"line_number":2595,"context_line":" num_tokens\u003d3):"},{"line_number":2596,"context_line":" \"\"\""},{"line_number":2597,"context_line":" Coalescing all requests into backend into a few with cooperative token."},{"line_number":2598,"context_line":" Calling this function indicates that caller experiences cache miss when"}],"source_content_type":"text/x-python","patch_set":10,"id":"ca8420e0_e4eca1c6","line":2595,"range":{"start_line":2594,"start_character":53,"end_line":2595,"end_character":55},"in_reply_to":"3f2810ec_c47252c6","updated":"2024-09-25 21:54:01.000000000","message":"comment on previous old implementation.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2603,"context_line":""},{"line_number":2604,"context_line":" The original ghetto lock only defines one token for usage, while this"},{"line_number":2605,"context_line":" cooperative token mechanism uses ``num_tokens`` to define the maximum"},{"line_number":2606,"context_line":" number of tokens during one usage session, default to be 3. This is used to"},{"line_number":2607,"context_line":" increase fault tolerance in the distributed environment, when one caller"},{"line_number":2608,"context_line":" process with token hangs or exits, any other requests with token still can"},{"line_number":2609,"context_line":" set new fetched data into memcache and finish the whole usage session. In"}],"source_content_type":"text/x-python","patch_set":10,"id":"074f0755_13970320","line":2606,"updated":"2024-03-15 15:00:01.000000000","message":"I don\u0027t like the use of \"maximum\" in this sentence; I think it\u0027s suggesting more than we can actually guarantee.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2603,"context_line":""},{"line_number":2604,"context_line":" The original ghetto lock only defines one token for usage, while this"},{"line_number":2605,"context_line":" cooperative token mechanism uses ``num_tokens`` to define the maximum"},{"line_number":2606,"context_line":" number of tokens during one usage session, default to be 3. This is used to"},{"line_number":2607,"context_line":" increase fault tolerance in the distributed environment, when one caller"},{"line_number":2608,"context_line":" process with token hangs or exits, any other requests with token still can"},{"line_number":2609,"context_line":" set new fetched data into memcache and finish the whole usage session. In"}],"source_content_type":"text/x-python","patch_set":10,"id":"80c7359d_6c53ca69","line":2606,"in_reply_to":"074f0755_13970320","updated":"2024-03-20 04:07:49.000000000","message":"Acknowledged","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2619,"context_line":" :param cache_key: the cache key."},{"line_number":2620,"context_line":" :param cache_ttl: time-to-live of the data fetched from backend to set into"},{"line_number":2621,"context_line":" memcached."},{"line_number":2622,"context_line":" :param do_fetch_backend: a functools.partial object to be called to fetch"},{"line_number":2623,"context_line":" data from the backend; it needs to return a tuple of (data, response)."},{"line_number":2624,"context_line":" :param token_ttl: time-to-live of the global memcached cooperative token;"},{"line_number":2625,"context_line":" when all requests with tokens failed to fetch data from backend or set"}],"source_content_type":"text/x-python","patch_set":10,"id":"126424da_a740e8a9","line":2622,"updated":"2024-03-15 15:00:01.000000000","message":"is there really any requirement that it\u0027s a functools.partial - or is just \"a callable\"","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2619,"context_line":" :param cache_key: the cache key."},{"line_number":2620,"context_line":" :param cache_ttl: time-to-live of the data fetched from backend to set into"},{"line_number":2621,"context_line":" memcached."},{"line_number":2622,"context_line":" :param do_fetch_backend: a functools.partial object to be called to fetch"},{"line_number":2623,"context_line":" data from the backend; it needs to return a tuple of (data, response)."},{"line_number":2624,"context_line":" :param token_ttl: time-to-live of the global memcached cooperative token;"},{"line_number":2625,"context_line":" when all requests with tokens failed to fetch data from backend or set"}],"source_content_type":"text/x-python","patch_set":10,"id":"56c66db0_3cea90c3","line":2622,"in_reply_to":"126424da_a740e8a9","updated":"2024-03-20 04:07:49.000000000","message":"changed to \"a callable\"","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2620,"context_line":" :param cache_ttl: time-to-live of the data fetched from backend to set into"},{"line_number":2621,"context_line":" memcached."},{"line_number":2622,"context_line":" :param do_fetch_backend: a functools.partial object to be called to fetch"},{"line_number":2623,"context_line":" data from the backend; it needs to return a tuple of (data, response)."},{"line_number":2624,"context_line":" :param token_ttl: time-to-live of the global memcached cooperative token;"},{"line_number":2625,"context_line":" when all requests with tokens failed to fetch data from backend or set"},{"line_number":2626,"context_line":" data into memcached, ``token_ttl`` will expire the existing token and"}],"source_content_type":"text/x-python","patch_set":10,"id":"de824d7b_af04727e","line":2623,"updated":"2024-03-15 15:00:01.000000000","message":"what is are the types of data and response?\n\nI assume data doesn\u0027t matter; response needs to be an HttpResponse because we use it for memcache stats?","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2620,"context_line":" :param cache_ttl: time-to-live of the data fetched from backend to set into"},{"line_number":2621,"context_line":" memcached."},{"line_number":2622,"context_line":" :param do_fetch_backend: a functools.partial object to be called to fetch"},{"line_number":2623,"context_line":" data from the backend; it needs to return a tuple of (data, response)."},{"line_number":2624,"context_line":" :param token_ttl: time-to-live of the global memcached cooperative token;"},{"line_number":2625,"context_line":" when all requests with tokens failed to fetch data from backend or set"},{"line_number":2626,"context_line":" data into memcached, ``token_ttl`` will expire the existing token and"}],"source_content_type":"text/x-python","patch_set":10,"id":"5881353e_34e87813","line":2623,"in_reply_to":"de824d7b_af04727e","updated":"2024-03-20 04:07:49.000000000","message":"Done","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2624,"context_line":" :param token_ttl: time-to-live of the global memcached cooperative token;"},{"line_number":2625,"context_line":" when all requests with tokens failed to fetch data from backend or set"},{"line_number":2626,"context_line":" data into memcached, ``token_ttl`` will expire the existing token and"},{"line_number":2627,"context_line":" make sure the new requests after ``token_ttl`` can continue to proceed."},{"line_number":2628,"context_line":" :param sleep_interval: sleep interval when waiting for the global"},{"line_number":2629,"context_line":" cooperative token, suggest to be set as average time spent on"},{"line_number":2630,"context_line":" ``do_fetch_backend``; this value should be less than token_ttl."}],"source_content_type":"text/x-python","patch_set":10,"id":"f316bc0e_49fedd5b","line":2627,"updated":"2024-03-15 15:00:01.000000000","message":"can this value have a resaonable default; or does it depend on other values like \"sleep_interval\" and/or \"num_tokens\"???","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"a46afc87b420cefdffe3df7e85f19593e4edda27","unresolved":false,"context_lines":[{"line_number":2624,"context_line":" :param token_ttl: time-to-live of the global memcached cooperative token;"},{"line_number":2625,"context_line":" when all requests with tokens failed to fetch data from backend or set"},{"line_number":2626,"context_line":" data into memcached, ``token_ttl`` will expire the existing token and"},{"line_number":2627,"context_line":" make sure the new requests after ``token_ttl`` can continue to proceed."},{"line_number":2628,"context_line":" :param sleep_interval: sleep interval when waiting for the global"},{"line_number":2629,"context_line":" cooperative token, suggest to be set as average time spent on"},{"line_number":2630,"context_line":" ``do_fetch_backend``; this value should be less than token_ttl."}],"source_content_type":"text/x-python","patch_set":10,"id":"3e1cc35e_51cddbd1","line":2627,"in_reply_to":"f316bc0e_49fedd5b","updated":"2024-03-20 17:45:21.000000000","message":"good suggestion. changed to below:\nretry_interval: half of the minimum time spent on ``do_fetch_backend``\n_token_ttl: default to be 10 times of the minimum time spent on ``do_fetch_backend``.\n\nso ``self._token_ttl \u003d retry_interval * 2 * 10``","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2627,"context_line":" make sure the new requests after ``token_ttl`` can continue to proceed."},{"line_number":2628,"context_line":" :param sleep_interval: sleep interval when waiting for the global"},{"line_number":2629,"context_line":" cooperative token, suggest to be set as average time spent on"},{"line_number":2630,"context_line":" ``do_fetch_backend``; this value should be less than token_ttl."},{"line_number":2631,"context_line":" :param num_tokens: the maximum number of tokens per each usage sesssion,"},{"line_number":2632,"context_line":" also the the maximum number of in-flight requests allowed to fetch data"},{"line_number":2633,"context_line":" from backend; default to be 3, which give redundancy when any request"}],"source_content_type":"text/x-python","patch_set":10,"id":"edfbfc4b_78925c85","line":2630,"updated":"2024-03-15 15:00:01.000000000","message":"\"average time spent on do_fetch_backend\" or maybe even \"minimum\" both seem reasonable\n\n\"should be less than token_ttl\" seems more like a *must* - perhaps token_ttl must even be some *multiplier* of sleep_interval?","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2627,"context_line":" make sure the new requests after ``token_ttl`` can continue to proceed."},{"line_number":2628,"context_line":" :param sleep_interval: sleep interval when waiting for the global"},{"line_number":2629,"context_line":" cooperative token, suggest to be set as average time spent on"},{"line_number":2630,"context_line":" ``do_fetch_backend``; this value should be less than token_ttl."},{"line_number":2631,"context_line":" :param num_tokens: the maximum number of tokens per each usage sesssion,"},{"line_number":2632,"context_line":" also the the maximum number of in-flight requests allowed to fetch data"},{"line_number":2633,"context_line":" from backend; default to be 3, which give redundancy when any request"}],"source_content_type":"text/x-python","patch_set":10,"id":"986f09b2_87f76817","line":2630,"in_reply_to":"edfbfc4b_78925c85","updated":"2024-03-20 04:07:49.000000000","message":"Acknowledged","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2629,"context_line":" cooperative token, suggest to be set as average time spent on"},{"line_number":2630,"context_line":" ``do_fetch_backend``; this value should be less than token_ttl."},{"line_number":2631,"context_line":" :param num_tokens: the maximum number of tokens per each usage sesssion,"},{"line_number":2632,"context_line":" also the the maximum number of in-flight requests allowed to fetch data"},{"line_number":2633,"context_line":" from backend; default to be 3, which give redundancy when any request"},{"line_number":2634,"context_line":" with token fails to fetch data from the backend or fails to set new"},{"line_number":2635,"context_line":" data into memcached."}],"source_content_type":"text/x-python","patch_set":10,"id":"cdc4b127_800fbca6","line":2632,"updated":"2024-03-15 15:00:01.000000000","message":"again I think the use of the word \"maximum\" here is mis-leading","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2629,"context_line":" cooperative token, suggest to be set as average time spent on"},{"line_number":2630,"context_line":" ``do_fetch_backend``; this value should be less than token_ttl."},{"line_number":2631,"context_line":" :param num_tokens: the maximum number of tokens per each usage sesssion,"},{"line_number":2632,"context_line":" also the the maximum number of in-flight requests allowed to fetch data"},{"line_number":2633,"context_line":" from backend; default to be 3, which give redundancy when any request"},{"line_number":2634,"context_line":" with token fails to fetch data from the backend or fails to set new"},{"line_number":2635,"context_line":" data into memcached."}],"source_content_type":"text/x-python","patch_set":10,"id":"e64202a2_d0651513","line":2632,"in_reply_to":"cdc4b127_800fbca6","updated":"2024-03-20 04:07:49.000000000","message":"Acknowledged","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2637,"context_line":" :returns: a tuple of (data, backend_response, exception); data is the value"},{"line_number":2638,"context_line":" of the data fetched from either memcached or backend. backend_response"},{"line_number":2639,"context_line":" is the response return from backend, None if data is fetched from the"},{"line_number":2640,"context_line":" memcached. exception is the memcache exception raised."},{"line_number":2641,"context_line":" \"\"\""},{"line_number":2642,"context_line":" token_key \u003d \u0027_cache_token/%s\u0027 % cache_key"},{"line_number":2643,"context_line":" if not memcache:"}],"source_content_type":"text/x-python","patch_set":10,"id":"8cf9040c_cdee5476","line":2640,"updated":"2024-03-15 15:00:01.000000000","message":"it\u0027s unusual to see an exception being *returned* - it might be useful to explain why it\u0027s not raised or how/why callers are expected to consume this memcache error.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2637,"context_line":" :returns: a tuple of (data, backend_response, exception); data is the value"},{"line_number":2638,"context_line":" of the data fetched from either memcached or backend. backend_response"},{"line_number":2639,"context_line":" is the response return from backend, None if data is fetched from the"},{"line_number":2640,"context_line":" memcached. exception is the memcache exception raised."},{"line_number":2641,"context_line":" \"\"\""},{"line_number":2642,"context_line":" token_key \u003d \u0027_cache_token/%s\u0027 % cache_key"},{"line_number":2643,"context_line":" if not memcache:"}],"source_content_type":"text/x-python","patch_set":10,"id":"74261a18_4711bb41","line":2640,"in_reply_to":"8cf9040c_cdee5476","updated":"2024-03-20 04:07:49.000000000","message":"Acknowledged","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2640,"context_line":" memcached. exception is the memcache exception raised."},{"line_number":2641,"context_line":" \"\"\""},{"line_number":2642,"context_line":" token_key \u003d \u0027_cache_token/%s\u0027 % cache_key"},{"line_number":2643,"context_line":" if not memcache:"},{"line_number":2644,"context_line":" data, backend_response \u003d do_fetch_backend()"},{"line_number":2645,"context_line":" return data, backend_response, None"},{"line_number":2646,"context_line":" try:"}],"source_content_type":"text/x-python","patch_set":10,"id":"80c630ba_e0c60b51","line":2643,"updated":"2024-03-15 15:00:01.000000000","message":"the docstring mentions that callers are expected to have already had a memcache miss - I might assume that means they\u0027ve already looked in infocache as well so maybe we don\u0027t need to check.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2640,"context_line":" memcached. exception is the memcache exception raised."},{"line_number":2641,"context_line":" \"\"\""},{"line_number":2642,"context_line":" token_key \u003d \u0027_cache_token/%s\u0027 % cache_key"},{"line_number":2643,"context_line":" if not memcache:"},{"line_number":2644,"context_line":" data, backend_response \u003d do_fetch_backend()"},{"line_number":2645,"context_line":" return data, backend_response, None"},{"line_number":2646,"context_line":" try:"}],"source_content_type":"text/x-python","patch_set":10,"id":"38675ad7_b3d9506a","line":2643,"in_reply_to":"80c630ba_e0c60b51","updated":"2024-03-20 04:07:49.000000000","message":"This function still need to check \"not memcache\" for the usage path (updating or listing shard range cache). so I modified the docstring.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":7233,"name":"Matthew Oliver","email":"matt@oliver.net.au","username":"mattoliverau"},"change_message_id":"288c0cdfc2d2834c8643a477cd994323358d11e6","unresolved":true,"context_lines":[{"line_number":2648,"context_line":" memcache, token_key,"},{"line_number":2649,"context_line":" token_ttl, num_tokens, sleep_interval"},{"line_number":2650,"context_line":" ) as sleep_countdown:"},{"line_number":2651,"context_line":" for tick in sleep_countdown:"},{"line_number":2652,"context_line":" # No token acquired, it means that there are requests in-flight"},{"line_number":2653,"context_line":" # which will fetch data form the backend servers and update"},{"line_number":2654,"context_line":" # them in cache, wait for them to finish with limited retires."}],"source_content_type":"text/x-python","patch_set":10,"id":"3a9ab61c_40b5906b","line":2651,"range":{"start_line":2651,"start_character":24,"end_line":2651,"end_character":39},"updated":"2024-03-13 21:59:30.000000000","message":"Arguably its a count not a countdown because we return retries :P \n\nBut yeah really interesting sleep implementation. I kinda like it. The tick value seems irrevelent however, we don\u0027t log or mention it\u0027s value anywhere. But still pretty cool.\n\nDid we want to \"log\" or know when the sleep_countdown finishes and the waiting request needs to then go get the data. As that might indicate we need to up the token count maybe? I don\u0027t really want to pass in a logger, but it might be nice to log or even increment a metric when/if it happens?","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2648,"context_line":" memcache, token_key,"},{"line_number":2649,"context_line":" token_ttl, num_tokens, sleep_interval"},{"line_number":2650,"context_line":" ) as sleep_countdown:"},{"line_number":2651,"context_line":" for tick in sleep_countdown:"},{"line_number":2652,"context_line":" # No token acquired, it means that there are requests in-flight"},{"line_number":2653,"context_line":" # which will fetch data form the backend servers and update"},{"line_number":2654,"context_line":" # them in cache, wait for them to finish with limited retires."}],"source_content_type":"text/x-python","patch_set":10,"id":"e84cbc9e_a746e130","line":2651,"range":{"start_line":2651,"start_character":24,"end_line":2651,"end_character":39},"in_reply_to":"21da5117_714518c0","updated":"2024-03-20 04:07:49.000000000","message":"Acknowledged","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"4251cfa176a1748522e900fb9664a6ae2dac966e","unresolved":true,"context_lines":[{"line_number":2648,"context_line":" memcache, token_key,"},{"line_number":2649,"context_line":" token_ttl, num_tokens, sleep_interval"},{"line_number":2650,"context_line":" ) as sleep_countdown:"},{"line_number":2651,"context_line":" for tick in sleep_countdown:"},{"line_number":2652,"context_line":" # No token acquired, it means that there are requests in-flight"},{"line_number":2653,"context_line":" # which will fetch data form the backend servers and update"},{"line_number":2654,"context_line":" # them in cache, wait for them to finish with limited retires."}],"source_content_type":"text/x-python","patch_set":10,"id":"d8b47cd0_151887fe","line":2651,"range":{"start_line":2651,"start_character":24,"end_line":2651,"end_character":39},"in_reply_to":"3a9ab61c_40b5906b","updated":"2024-03-15 13:35:53.000000000","message":"IMHO if this is in utils as a generic helper then it shouldn\u0027t be emitting metrics by default - we should figure out how the callers can know the outcome of the coop token and the caller emit metrics if it wants them.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2648,"context_line":" memcache, token_key,"},{"line_number":2649,"context_line":" token_ttl, num_tokens, sleep_interval"},{"line_number":2650,"context_line":" ) as sleep_countdown:"},{"line_number":2651,"context_line":" for tick in sleep_countdown:"},{"line_number":2652,"context_line":" # No token acquired, it means that there are requests in-flight"},{"line_number":2653,"context_line":" # which will fetch data form the backend servers and update"},{"line_number":2654,"context_line":" # them in cache, wait for them to finish with limited retires."}],"source_content_type":"text/x-python","patch_set":10,"id":"21da5117_714518c0","line":2651,"range":{"start_line":2651,"start_character":24,"end_line":2651,"end_character":39},"in_reply_to":"3a9ab61c_40b5906b","updated":"2024-03-15 15:00:01.000000000","message":"we definately want to have this implemention well instrumented so that we verify it\u0027s behaving as expected in testing.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2653,"context_line":" # which will fetch data form the backend servers and update"},{"line_number":2654,"context_line":" # them in cache, wait for them to finish with limited retires."},{"line_number":2655,"context_line":" cache_value \u003d memcache.get("},{"line_number":2656,"context_line":" cache_key, raise_on_error\u003dTrue)"},{"line_number":2657,"context_line":" if cache_value:"},{"line_number":2658,"context_line":" # Cache hit due to other request finished populating cache."},{"line_number":2659,"context_line":" if infocache:"}],"source_content_type":"text/x-python","patch_set":10,"id":"45bab5d4_21942e8a","line":2656,"updated":"2024-03-15 15:00:01.000000000","message":"should we bind the memcache exception handling a little tighter here? If we\u0027d commited to reading from memcache in a loop until token_ttl anyway; maybe it\u0027s reasonable to \"retry\" on the next sleep/pass when hit a bad connection.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2653,"context_line":" # which will fetch data form the backend servers and update"},{"line_number":2654,"context_line":" # them in cache, wait for them to finish with limited retires."},{"line_number":2655,"context_line":" cache_value \u003d memcache.get("},{"line_number":2656,"context_line":" cache_key, raise_on_error\u003dTrue)"},{"line_number":2657,"context_line":" if cache_value:"},{"line_number":2658,"context_line":" # Cache hit due to other request finished populating cache."},{"line_number":2659,"context_line":" if infocache:"}],"source_content_type":"text/x-python","patch_set":10,"id":"6ecc9664_6b860d7c","line":2656,"in_reply_to":"45bab5d4_21942e8a","updated":"2024-03-20 04:07:49.000000000","message":"Acknowledged","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2657,"context_line":" if cache_value:"},{"line_number":2658,"context_line":" # Cache hit due to other request finished populating cache."},{"line_number":2659,"context_line":" if infocache:"},{"line_number":2660,"context_line":" infocache[cache_key] \u003d cache_value"},{"line_number":2661,"context_line":" return cache_value, None, None"},{"line_number":2662,"context_line":" else:"},{"line_number":2663,"context_line":" # Either we have acquired a cooperative token in the first"}],"source_content_type":"text/x-python","patch_set":10,"id":"bf599d7f_61bf2568","line":2660,"updated":"2024-03-15 15:00:01.000000000","message":"is this always what we want? I could imagine a use-case where a cache object is converted to a simple primative for serialization into memcache; but often more useful as a complex python object when stashed in infocache (e.g. NamespaceBoundList vs ns_bounds_list.bounds)","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2657,"context_line":" if cache_value:"},{"line_number":2658,"context_line":" # Cache hit due to other request finished populating cache."},{"line_number":2659,"context_line":" if infocache:"},{"line_number":2660,"context_line":" infocache[cache_key] \u003d cache_value"},{"line_number":2661,"context_line":" return cache_value, None, None"},{"line_number":2662,"context_line":" else:"},{"line_number":2663,"context_line":" # Either we have acquired a cooperative token in the first"}],"source_content_type":"text/x-python","patch_set":10,"id":"26cd36e6_73227cc8","line":2660,"in_reply_to":"bf599d7f_61bf2568","updated":"2024-03-20 04:07:49.000000000","message":"ACK. I have added memcache data encoder/decoder to store different format of data in infocache and memcache.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"4251cfa176a1748522e900fb9664a6ae2dac966e","unresolved":true,"context_lines":[{"line_number":2658,"context_line":" # Cache hit due to other request finished populating cache."},{"line_number":2659,"context_line":" if infocache:"},{"line_number":2660,"context_line":" infocache[cache_key] \u003d cache_value"},{"line_number":2661,"context_line":" return cache_value, None, None"},{"line_number":2662,"context_line":" else:"},{"line_number":2663,"context_line":" # Either we have acquired a cooperative token in the first"},{"line_number":2664,"context_line":" # place, or sleep countdown has been exhausted. Fetch data from"}],"source_content_type":"text/x-python","patch_set":10,"id":"532f0e86_3ef307ab","line":2661,"updated":"2024-03-15 13:35:53.000000000","message":"I have mixed feelings about multiple return sites. There\u0027s 5 returns in this medium length function. I find it helps if they all at least return the same var names (or None), and have a blank line after","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2658,"context_line":" # Cache hit due to other request finished populating cache."},{"line_number":2659,"context_line":" if infocache:"},{"line_number":2660,"context_line":" infocache[cache_key] \u003d cache_value"},{"line_number":2661,"context_line":" return cache_value, None, None"},{"line_number":2662,"context_line":" else:"},{"line_number":2663,"context_line":" # Either we have acquired a cooperative token in the first"},{"line_number":2664,"context_line":" # place, or sleep countdown has been exhausted. Fetch data from"}],"source_content_type":"text/x-python","patch_set":10,"id":"7839c87f_abaaab9c","line":2661,"in_reply_to":"532f0e86_3ef307ab","updated":"2024-03-20 04:07:49.000000000","message":"will consolidate those 5 returns into one or two.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2659,"context_line":" if infocache:"},{"line_number":2660,"context_line":" infocache[cache_key] \u003d cache_value"},{"line_number":2661,"context_line":" return cache_value, None, None"},{"line_number":2662,"context_line":" else:"},{"line_number":2663,"context_line":" # Either we have acquired a cooperative token in the first"},{"line_number":2664,"context_line":" # place, or sleep countdown has been exhausted. Fetch data from"},{"line_number":2665,"context_line":" # backend, and then populate the memcache."}],"source_content_type":"text/x-python","patch_set":10,"id":"47c02889_23359907","line":2662,"updated":"2024-03-15 15:00:01.000000000","message":"this is interesting; I feel like I rarely see an else clause used on a for loop w/o a break.\n\nif the sleep_countdown is empty we get to else.\n\nif the sleep_countdown is exahusted we get to else.\n\nif the sleep_countdown loop *returns* we (obviously) don\u0027t get to the else.\n\nI would imagine this context-manager/try block could be re-written with a single return if that improved the understandability of the flow control.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2659,"context_line":" if infocache:"},{"line_number":2660,"context_line":" infocache[cache_key] \u003d cache_value"},{"line_number":2661,"context_line":" return cache_value, None, None"},{"line_number":2662,"context_line":" else:"},{"line_number":2663,"context_line":" # Either we have acquired a cooperative token in the first"},{"line_number":2664,"context_line":" # place, or sleep countdown has been exhausted. Fetch data from"},{"line_number":2665,"context_line":" # backend, and then populate the memcache."}],"source_content_type":"text/x-python","patch_set":10,"id":"f9798a5a_ce787fb9","line":2662,"in_reply_to":"47c02889_23359907","updated":"2024-03-20 04:07:49.000000000","message":"the context manager interface of wrapping cooperative token acquisition, and the combination of this for loop does make this new implementation harder to understand. I have rolled back to previous patch which was more straightforward and modified it to use a single return.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2662,"context_line":" else:"},{"line_number":2663,"context_line":" # Either we have acquired a cooperative token in the first"},{"line_number":2664,"context_line":" # place, or sleep countdown has been exhausted. Fetch data from"},{"line_number":2665,"context_line":" # backend, and then populate the memcache."},{"line_number":2666,"context_line":" got_token \u003d sleep_countdown \u003d\u003d []"},{"line_number":2667,"context_line":" exception \u003d None"},{"line_number":2668,"context_line":" data, backend_response \u003d do_fetch_backend()"}],"source_content_type":"text/x-python","patch_set":10,"id":"0c90947d_f59aa411","line":2665,"updated":"2024-03-15 15:00:01.000000000","message":"so this is both the happy-path \"we got a token\" case and the degenerate \"we waited and waited but no one set the token; so act like cooperative tokens don\u0027t exist\" case. That\u0027s almost surprising!?","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2662,"context_line":" else:"},{"line_number":2663,"context_line":" # Either we have acquired a cooperative token in the first"},{"line_number":2664,"context_line":" # place, or sleep countdown has been exhausted. Fetch data from"},{"line_number":2665,"context_line":" # backend, and then populate the memcache."},{"line_number":2666,"context_line":" got_token \u003d sleep_countdown \u003d\u003d []"},{"line_number":2667,"context_line":" exception \u003d None"},{"line_number":2668,"context_line":" data, backend_response \u003d do_fetch_backend()"}],"source_content_type":"text/x-python","patch_set":10,"id":"243ea9cf_0ae5d5ef","line":2665,"in_reply_to":"0c90947d_f59aa411","updated":"2024-03-20 04:07:49.000000000","message":"Acknowledged","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2663,"context_line":" # Either we have acquired a cooperative token in the first"},{"line_number":2664,"context_line":" # place, or sleep countdown has been exhausted. Fetch data from"},{"line_number":2665,"context_line":" # backend, and then populate the memcache."},{"line_number":2666,"context_line":" got_token \u003d sleep_countdown \u003d\u003d []"},{"line_number":2667,"context_line":" exception \u003d None"},{"line_number":2668,"context_line":" data, backend_response \u003d do_fetch_backend()"},{"line_number":2669,"context_line":" if not data:"}],"source_content_type":"text/x-python","patch_set":10,"id":"435672e5_9ee0dcfc","line":2666,"updated":"2024-03-15 15:00:01.000000000","message":"wow, ok - so I mis-read this a couple of times.\n\nI think `got_token \u003d not sleep_countdown` would be the most idiomatic way to express this?","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2663,"context_line":" # Either we have acquired a cooperative token in the first"},{"line_number":2664,"context_line":" # place, or sleep countdown has been exhausted. Fetch data from"},{"line_number":2665,"context_line":" # backend, and then populate the memcache."},{"line_number":2666,"context_line":" got_token \u003d sleep_countdown \u003d\u003d []"},{"line_number":2667,"context_line":" exception \u003d None"},{"line_number":2668,"context_line":" data, backend_response \u003d do_fetch_backend()"},{"line_number":2669,"context_line":" if not data:"}],"source_content_type":"text/x-python","patch_set":10,"id":"88daceda_6403a349","line":2666,"in_reply_to":"435672e5_9ee0dcfc","updated":"2024-03-20 04:07:49.000000000","message":"Acknowledged","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2667,"context_line":" exception \u003d None"},{"line_number":2668,"context_line":" data, backend_response \u003d do_fetch_backend()"},{"line_number":2669,"context_line":" if not data:"},{"line_number":2670,"context_line":" return data, backend_response, exception"},{"line_number":2671,"context_line":" if infocache:"},{"line_number":2672,"context_line":" infocache[cache_key] \u003d data"},{"line_number":2673,"context_line":" try:"}],"source_content_type":"text/x-python","patch_set":10,"id":"230978bc_53d9334d","line":2670,"updated":"2024-03-15 15:00:01.000000000","message":"this is always exception \u003d None, right? And we only populate infocache if there\u0027s data?","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2667,"context_line":" exception \u003d None"},{"line_number":2668,"context_line":" data, backend_response \u003d do_fetch_backend()"},{"line_number":2669,"context_line":" if not data:"},{"line_number":2670,"context_line":" return data, backend_response, exception"},{"line_number":2671,"context_line":" if infocache:"},{"line_number":2672,"context_line":" infocache[cache_key] \u003d data"},{"line_number":2673,"context_line":" try:"}],"source_content_type":"text/x-python","patch_set":10,"id":"e6612a33_ea60e688","line":2670,"in_reply_to":"230978bc_53d9334d","updated":"2024-03-20 04:07:49.000000000","message":"exception is not related anymore. but yes, we only populate infocache if there\u0027s data currently.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2668,"context_line":" data, backend_response \u003d do_fetch_backend()"},{"line_number":2669,"context_line":" if not data:"},{"line_number":2670,"context_line":" return data, backend_response, exception"},{"line_number":2671,"context_line":" if infocache:"},{"line_number":2672,"context_line":" infocache[cache_key] \u003d data"},{"line_number":2673,"context_line":" try:"},{"line_number":2674,"context_line":" if got_token:"}],"source_content_type":"text/x-python","patch_set":10,"id":"f6edce3d_c2d8e3d9","line":2671,"updated":"2024-03-15 15:00:01.000000000","message":"why do we allow callers to not pass in infocache? What if infocache is just \"empty\"?","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"92e4d55d3301aa03164d741370e14748e95eedeb","unresolved":false,"context_lines":[{"line_number":2668,"context_line":" data, backend_response \u003d do_fetch_backend()"},{"line_number":2669,"context_line":" if not data:"},{"line_number":2670,"context_line":" return data, backend_response, exception"},{"line_number":2671,"context_line":" if infocache:"},{"line_number":2672,"context_line":" infocache[cache_key] \u003d data"},{"line_number":2673,"context_line":" try:"},{"line_number":2674,"context_line":" if got_token:"}],"source_content_type":"text/x-python","patch_set":10,"id":"c0c0230c_393ad6d0","line":2671,"in_reply_to":"f6edce3d_c2d8e3d9","updated":"2024-03-26 15:06:38.000000000","message":"Done","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2674,"context_line":" if got_token:"},{"line_number":2675,"context_line":" memcache.set("},{"line_number":2676,"context_line":" cache_key, data, time\u003dcache_ttl,"},{"line_number":2677,"context_line":" raise_on_error\u003dTrue)"},{"line_number":2678,"context_line":" except MemcacheConnectionError:"},{"line_number":2679,"context_line":" exception \u003d MemcacheSetConnectionError("},{"line_number":2680,"context_line":" \"Error setting value to memcache\")"}],"source_content_type":"text/x-python","patch_set":10,"id":"7e141924_9aae812f","line":2677,"updated":"2024-03-15 15:00:01.000000000","message":"why is setting the value in memcache conditional on getting the token? All the docstrings make it sound like the expensive/scary part was doing the backend request (which we\u0027ve already done). There\u0027s no mention of trying to protect memcache - is this block going out of it\u0027s way to avoid the memcache write under contention because we believe the memcache set is actually the problem and not the backend fetch?","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2674,"context_line":" if got_token:"},{"line_number":2675,"context_line":" memcache.set("},{"line_number":2676,"context_line":" cache_key, data, time\u003dcache_ttl,"},{"line_number":2677,"context_line":" raise_on_error\u003dTrue)"},{"line_number":2678,"context_line":" except MemcacheConnectionError:"},{"line_number":2679,"context_line":" exception \u003d MemcacheSetConnectionError("},{"line_number":2680,"context_line":" \"Error setting value to memcache\")"}],"source_content_type":"text/x-python","patch_set":10,"id":"7a6b104b_ee2dabe2","line":2677,"in_reply_to":"7e141924_9aae812f","updated":"2024-03-20 04:07:49.000000000","message":"make sense, of course we are protecting the backend. requests should set value fetched from backend into memcached even if they had problem connecting to memcache for token key in the past. and since shard range cache key is different from token key, they will go to different memcached server endpoint too.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2678,"context_line":" except MemcacheConnectionError:"},{"line_number":2679,"context_line":" exception \u003d MemcacheSetConnectionError("},{"line_number":2680,"context_line":" \"Error setting value to memcache\")"},{"line_number":2681,"context_line":" return data, backend_response, exception"},{"line_number":2682,"context_line":" except MemcacheConnectionError:"},{"line_number":2683,"context_line":" # we had bad connection to memcached previously when getting"},{"line_number":2684,"context_line":" # cooperative token, just fetch data from backend and skip setting"}],"source_content_type":"text/x-python","patch_set":10,"id":"6ff92e16_98ea595b","line":2681,"updated":"2024-03-15 15:00:01.000000000","message":"I\u0027m still curious what callers are expected to do with this exception; presumably they could log \"error setting \u003cshard-ranges for a/c\u003e in memcache\" where as we could only log \"error setting \u003ckey\u003e in memcache\"?","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"92e4d55d3301aa03164d741370e14748e95eedeb","unresolved":false,"context_lines":[{"line_number":2678,"context_line":" except MemcacheConnectionError:"},{"line_number":2679,"context_line":" exception \u003d MemcacheSetConnectionError("},{"line_number":2680,"context_line":" \"Error setting value to memcache\")"},{"line_number":2681,"context_line":" return data, backend_response, exception"},{"line_number":2682,"context_line":" except MemcacheConnectionError:"},{"line_number":2683,"context_line":" # we had bad connection to memcached previously when getting"},{"line_number":2684,"context_line":" # cooperative token, just fetch data from backend and skip setting"}],"source_content_type":"text/x-python","patch_set":10,"id":"599cb089_881615b6","line":2681,"in_reply_to":"6ff92e16_98ea595b","updated":"2024-03-26 15:06:38.000000000","message":"Done","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2682,"context_line":" except MemcacheConnectionError:"},{"line_number":2683,"context_line":" # we had bad connection to memcached previously when getting"},{"line_number":2684,"context_line":" # cooperative token, just fetch data from backend and skip setting"},{"line_number":2685,"context_line":" # data into memcache."},{"line_number":2686,"context_line":" exception \u003d MemcacheIncrConnectionError("},{"line_number":2687,"context_line":" \"Error incrementing value of memcache\")"},{"line_number":2688,"context_line":" data, backend_response \u003d do_fetch_backend()"}],"source_content_type":"text/x-python","patch_set":10,"id":"ce1d9187_514a4dcf","line":2685,"updated":"2024-03-15 15:00:01.000000000","message":"this could be a failure to increment the token, a failure to delete the token, a failure to read the value from memcache (after ??? retires) (we explicitly handle \"failure to write the value\" above)\n\nDo we expect we\u0027ll always want those cases to degreate into the same failure handling?","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2682,"context_line":" except MemcacheConnectionError:"},{"line_number":2683,"context_line":" # we had bad connection to memcached previously when getting"},{"line_number":2684,"context_line":" # cooperative token, just fetch data from backend and skip setting"},{"line_number":2685,"context_line":" # data into memcache."},{"line_number":2686,"context_line":" exception \u003d MemcacheIncrConnectionError("},{"line_number":2687,"context_line":" \"Error incrementing value of memcache\")"},{"line_number":2688,"context_line":" data, backend_response \u003d do_fetch_backend()"}],"source_content_type":"text/x-python","patch_set":10,"id":"87db2d13_156f20b2","line":2685,"in_reply_to":"46b0aacf_4fba99d0","updated":"2024-03-20 04:07:49.000000000","message":"made the change to still set backend value into memcache, in the case of previous failures.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"178eaf3b570d0709f0c793e2ac99ca4663672a43","unresolved":true,"context_lines":[{"line_number":2682,"context_line":" except MemcacheConnectionError:"},{"line_number":2683,"context_line":" # we had bad connection to memcached previously when getting"},{"line_number":2684,"context_line":" # cooperative token, just fetch data from backend and skip setting"},{"line_number":2685,"context_line":" # data into memcache."},{"line_number":2686,"context_line":" exception \u003d MemcacheIncrConnectionError("},{"line_number":2687,"context_line":" \"Error incrementing value of memcache\")"},{"line_number":2688,"context_line":" data, backend_response \u003d do_fetch_backend()"}],"source_content_type":"text/x-python","patch_set":10,"id":"46b0aacf_4fba99d0","line":2685,"in_reply_to":"ce1d9187_514a4dcf","updated":"2024-03-15 16:34:32.000000000","message":"I\u0027m not convinced that we should be avoiding future memcache calls based on previous failures - especially if the failure was for a different key, hitting different memcache endpoints.\n\nNo other process is going to avoid setting to the \u0027problem\u0027 key.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2684,"context_line":" # cooperative token, just fetch data from backend and skip setting"},{"line_number":2685,"context_line":" # data into memcache."},{"line_number":2686,"context_line":" exception \u003d MemcacheIncrConnectionError("},{"line_number":2687,"context_line":" \"Error incrementing value of memcache\")"},{"line_number":2688,"context_line":" data, backend_response \u003d do_fetch_backend()"},{"line_number":2689,"context_line":" return data, backend_response, exception"},{"line_number":2690,"context_line":""}],"source_content_type":"text/x-python","patch_set":10,"id":"ca8abbd1_12206c28","line":2687,"updated":"2024-03-15 15:00:01.000000000","message":"is this the correct exception when the error was trying to read the value from memcache?","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2684,"context_line":" # cooperative token, just fetch data from backend and skip setting"},{"line_number":2685,"context_line":" # data into memcache."},{"line_number":2686,"context_line":" exception \u003d MemcacheIncrConnectionError("},{"line_number":2687,"context_line":" \"Error incrementing value of memcache\")"},{"line_number":2688,"context_line":" data, backend_response \u003d do_fetch_backend()"},{"line_number":2689,"context_line":" return data, backend_response, exception"},{"line_number":2690,"context_line":""}],"source_content_type":"text/x-python","patch_set":10,"id":"fa344b86_91cff4a9","line":2687,"in_reply_to":"ca8abbd1_12206c28","updated":"2024-03-20 04:07:49.000000000","message":"not related anymore.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2685,"context_line":" # data into memcache."},{"line_number":2686,"context_line":" exception \u003d MemcacheIncrConnectionError("},{"line_number":2687,"context_line":" \"Error incrementing value of memcache\")"},{"line_number":2688,"context_line":" data, backend_response \u003d do_fetch_backend()"},{"line_number":2689,"context_line":" return data, backend_response, exception"},{"line_number":2690,"context_line":""},{"line_number":2691,"context_line":""}],"source_content_type":"text/x-python","patch_set":10,"id":"1fbbfd67_72184470","line":2688,"updated":"2024-03-15 15:00:01.000000000","message":"if we\u0027ve managed to fetch the value; do not want to even *try* to write it memcache? What if the connection Error intermittent?","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2685,"context_line":" # data into memcache."},{"line_number":2686,"context_line":" exception \u003d MemcacheIncrConnectionError("},{"line_number":2687,"context_line":" \"Error incrementing value of memcache\")"},{"line_number":2688,"context_line":" data, backend_response \u003d do_fetch_backend()"},{"line_number":2689,"context_line":" return data, backend_response, exception"},{"line_number":2690,"context_line":""},{"line_number":2691,"context_line":""}],"source_content_type":"text/x-python","patch_set":10,"id":"a55cdcbc_94ed9dba","line":2688,"in_reply_to":"1fbbfd67_72184470","updated":"2024-03-20 04:07:49.000000000","message":"have made the change to write it into memcache too in this case.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"602a1f9a5eb4130096d7d161714ab9fb43efea3e","unresolved":true,"context_lines":[{"line_number":2687,"context_line":" \"Error incrementing value of memcache\")"},{"line_number":2688,"context_line":" data, backend_response \u003d do_fetch_backend()"},{"line_number":2689,"context_line":" return data, backend_response, exception"},{"line_number":2690,"context_line":""},{"line_number":2691,"context_line":""},{"line_number":2692,"context_line":"def read_conf_dir(parser, conf_dir):"},{"line_number":2693,"context_line":" conf_files \u003d []"}],"source_content_type":"text/x-python","patch_set":10,"id":"29cec983_08841c1b","line":2690,"updated":"2024-03-15 15:00:01.000000000","message":"there\u0027s an \"implicit return None\" here that sticks out to me and I\u0027m having trouble convincing myself we can\u0027t ever hit it.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"923afc9c0155910e76778f62795889b232b31cc1","unresolved":false,"context_lines":[{"line_number":2687,"context_line":" \"Error incrementing value of memcache\")"},{"line_number":2688,"context_line":" data, backend_response \u003d do_fetch_backend()"},{"line_number":2689,"context_line":" return data, backend_response, exception"},{"line_number":2690,"context_line":""},{"line_number":2691,"context_line":""},{"line_number":2692,"context_line":"def read_conf_dir(parser, conf_dir):"},{"line_number":2693,"context_line":" conf_files \u003d []"}],"source_content_type":"text/x-python","patch_set":10,"id":"e774bac1_659027b9","line":2690,"in_reply_to":"29cec983_08841c1b","updated":"2024-03-20 04:07:49.000000000","message":"not related anymore.","commit_id":"cd5ae367d72004f62f552a44076b040bf2219967"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"c783da2689193d11c60a978c512ea3f46be8dc61","unresolved":true,"context_lines":[{"line_number":2586,"context_line":""},{"line_number":2587,"context_line":" def __init__(self, infocache, memcache,"},{"line_number":2588,"context_line":" cache_key, cache_ttl, do_fetch_backend, retry_interval,"},{"line_number":2589,"context_line":" cache_encoder\u003dNone, cache_decoder\u003dNone, num_tokens\u003d3):"},{"line_number":2590,"context_line":" self._infocache \u003d infocache"},{"line_number":2591,"context_line":" self._memcache \u003d memcache"},{"line_number":2592,"context_line":" self._cache_key \u003d cache_key"}],"source_content_type":"text/x-python","patch_set":14,"id":"884302ea_61ea0254","line":2589,"updated":"2024-03-22 18:04:02.000000000","message":"In abstract, the requirement is to manage one of two things happening:\n\n- token is granted, do A (in the concrete case, fetch from backend)\n- token is not granted, do B (in the concrete case, retry cache until some timeout then fetch from backend)\n\nThis replaces the context manager (CM) from patchset 10. The goal of using a CM was that the CM retained responsibility for the token, in particular releasing/deleting it, whilst handing responsibility back to the caller via a yield for actually doing A or B fetches. The CM also implemented the retry sleeping which made it a little funky!\n\nThe class takes an alternative approach: rather than a CM passing control back to the controller via a yield, the caller passes in callback functions. The class can still be responsible for deleting the token, and the caller can still be responsible for defining how to do A or B (backend fetch or cache fetch).\n\nA third approach would be to have an abstract class that must be extended with concrete implementations of A and B, rather than passing in callbacks.\n\nIn each case though, I\u0027m only expecting the caller/subclass to have to provide TWO functions: A and B. But this class requires 3: do_fetch_backend, cache_encoder and cache_decoder. I think that is because this class is very opinionated about when data should be set in cache, and I\u0027m not sure I agree with that. I expected two callbacks:\n\nA -\u003e get data from backend and set them in cache\nB -\u003e get data from memcache","commit_id":"50bc9bc0e169490bd9506ff77ff468e59d771564"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"92e4d55d3301aa03164d741370e14748e95eedeb","unresolved":true,"context_lines":[{"line_number":2586,"context_line":""},{"line_number":2587,"context_line":" def __init__(self, infocache, memcache,"},{"line_number":2588,"context_line":" cache_key, cache_ttl, do_fetch_backend, retry_interval,"},{"line_number":2589,"context_line":" cache_encoder\u003dNone, cache_decoder\u003dNone, num_tokens\u003d3):"},{"line_number":2590,"context_line":" self._infocache \u003d infocache"},{"line_number":2591,"context_line":" self._memcache \u003d memcache"},{"line_number":2592,"context_line":" self._cache_key \u003d cache_key"}],"source_content_type":"text/x-python","patch_set":14,"id":"a9dc2f1f_6ae9b374","line":2589,"in_reply_to":"884302ea_61ea0254","updated":"2024-03-26 15:06:38.000000000","message":"\u003e have an abstract class that must be extended with concrete implementations of \u003cthe required behaviors\u003e, rather than passing in callbacks\n\nthat\u0027s how I would expect a class to work; dependency injection is a weird pattern\n\n\u003e this class is very opinionated about when data should be set in cache, and I\u0027m not sure I agree with that. I expected two callbacks\n\ndefinately going to be possible to have some reasonable disagreement here; there\u0027s some design trade-offs. As much as reasonable I\u0027d prefer the \"logic\" of \"cooperative caching\" to be implemented as part of the util - with the requirement on the consumers/callers being mostly only the backend requests and formatting.\n\nIf we had an interface for a \"cachable object type\" we might be able to require just the fetch_from_backend NotImplemented method, and a \"cacheable_class\" class attribute. Our concrete methods in the abstract class would look something like:\n\n cacheable \u003d self.cacheable_class.from_backend_resp(resp)\n memcache.set(cacheable.encode_for_memcache())\n infocache.set(cacheable)\n \nor\n\n raw_memcache_json \u003d memcache.get(self.cache_key)\n cacheable \u003d self.cacheable_class.from_memcache_resp(raw_memcache_json)\n infocache.set(cacheable)\n \n^ and all the appropirate cache error-handling/statsd logic.","commit_id":"50bc9bc0e169490bd9506ff77ff468e59d771564"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"9816d414da03efe9e797d1c07e9da26512571715","unresolved":false,"context_lines":[{"line_number":2586,"context_line":""},{"line_number":2587,"context_line":" def __init__(self, infocache, memcache,"},{"line_number":2588,"context_line":" cache_key, cache_ttl, do_fetch_backend, retry_interval,"},{"line_number":2589,"context_line":" cache_encoder\u003dNone, cache_decoder\u003dNone, num_tokens\u003d3):"},{"line_number":2590,"context_line":" self._infocache \u003d infocache"},{"line_number":2591,"context_line":" self._memcache \u003d memcache"},{"line_number":2592,"context_line":" self._cache_key \u003d cache_key"}],"source_content_type":"text/x-python","patch_set":14,"id":"289281f0_3da4eb18","line":2589,"in_reply_to":"a9dc2f1f_6ae9b374","updated":"2024-09-25 21:54:01.000000000","message":"Done","commit_id":"50bc9bc0e169490bd9506ff77ff468e59d771564"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"c783da2689193d11c60a978c512ea3f46be8dc61","unresolved":true,"context_lines":[{"line_number":2594,"context_line":" self._token_key \u003d \u0027_cache_token/%s\u0027 % cache_key"},{"line_number":2595,"context_line":" self._retry_interval \u003d retry_interval"},{"line_number":2596,"context_line":" # Time-to-live of the cooperative token when set in memcached, default"},{"line_number":2597,"context_line":" # to be 10 times of the minimum time spent on ``do_fetch_backend``."},{"line_number":2598,"context_line":" self._token_ttl \u003d retry_interval * 2 * 10"},{"line_number":2599,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":2600,"context_line":" self._do_fetch_backend \u003d do_fetch_backend"}],"source_content_type":"text/x-python","patch_set":14,"id":"5aa35511_569aaf6f","line":2597,"range":{"start_line":2597,"start_character":32,"end_line":2597,"end_character":74},"updated":"2024-03-22 18:04:02.000000000","message":"I\u0027m not sure I understand where this minimum time is defined?","commit_id":"50bc9bc0e169490bd9506ff77ff468e59d771564"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f7b3200aeeca5c25ea1abbd10a4dc43c76f245a5","unresolved":false,"context_lines":[{"line_number":2594,"context_line":" self._token_key \u003d \u0027_cache_token/%s\u0027 % cache_key"},{"line_number":2595,"context_line":" self._retry_interval \u003d retry_interval"},{"line_number":2596,"context_line":" # Time-to-live of the cooperative token when set in memcached, default"},{"line_number":2597,"context_line":" # to be 10 times of the minimum time spent on ``do_fetch_backend``."},{"line_number":2598,"context_line":" self._token_ttl \u003d retry_interval * 2 * 10"},{"line_number":2599,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":2600,"context_line":" self._do_fetch_backend \u003d do_fetch_backend"}],"source_content_type":"text/x-python","patch_set":14,"id":"b8455f35_4a7fb0de","line":2597,"range":{"start_line":2597,"start_character":32,"end_line":2597,"end_character":74},"in_reply_to":"5aa35511_569aaf6f","updated":"2024-03-25 05:30:17.000000000","message":"changed to \"average\".","commit_id":"50bc9bc0e169490bd9506ff77ff468e59d771564"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"c783da2689193d11c60a978c512ea3f46be8dc61","unresolved":true,"context_lines":[{"line_number":2616,"context_line":" :returns: value of the data fetched from backend; None if not exist."},{"line_number":2617,"context_line":" \"\"\""},{"line_number":2618,"context_line":" data, self.backend_response \u003d self._do_fetch_backend()"},{"line_number":2619,"context_line":" if not data:"},{"line_number":2620,"context_line":" return None"},{"line_number":2621,"context_line":""},{"line_number":2622,"context_line":" if self._infocache:"}],"source_content_type":"text/x-python","patch_set":14,"id":"604f7617_129d28f6","line":2619,"updated":"2024-03-22 18:04:02.000000000","message":"the token is not released here (because there\u0027s another two threads using it), but if those other two threads also fail then *nobody* gets to fetch from backend for the rest of the token_ttl.\n\nI still suspect that it will be easier to reason about the corner cases of the token behaviour if there is just ONE successful thread per token, and a pool of (e.g. 3) tokens with different token_keys. Then we can think in terms of \"this thread got the token, this thread must release it\" rather than \"three threads got the token, can I convince myself that one of them releases it?\"","commit_id":"50bc9bc0e169490bd9506ff77ff468e59d771564"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"50192330b0eaa55928bcecef326b623b9faae22f","unresolved":false,"context_lines":[{"line_number":2616,"context_line":" :returns: value of the data fetched from backend; None if not exist."},{"line_number":2617,"context_line":" \"\"\""},{"line_number":2618,"context_line":" data, self.backend_response \u003d self._do_fetch_backend()"},{"line_number":2619,"context_line":" if not data:"},{"line_number":2620,"context_line":" return None"},{"line_number":2621,"context_line":""},{"line_number":2622,"context_line":" if self._infocache:"}],"source_content_type":"text/x-python","patch_set":14,"id":"b2355185_c7c9a7d1","line":2619,"in_reply_to":"1ce1b6c6_2edf8d37","updated":"2024-09-25 16:09:59.000000000","message":"Done","commit_id":"50bc9bc0e169490bd9506ff77ff468e59d771564"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"733c95f580ec1b966602ea300c994c863a36e5de","unresolved":true,"context_lines":[{"line_number":2616,"context_line":" :returns: value of the data fetched from backend; None if not exist."},{"line_number":2617,"context_line":" \"\"\""},{"line_number":2618,"context_line":" data, self.backend_response \u003d self._do_fetch_backend()"},{"line_number":2619,"context_line":" if not data:"},{"line_number":2620,"context_line":" return None"},{"line_number":2621,"context_line":""},{"line_number":2622,"context_line":" if self._infocache:"}],"source_content_type":"text/x-python","patch_set":14,"id":"1ce1b6c6_2edf8d37","line":2619,"in_reply_to":"232127a3_995df3f1","updated":"2024-04-04 00:51:08.000000000","message":"\u003eIf they all just get a timeout fetching from the backend\n\nActually, in the case of all three token requests get timeout, the situation with current implementation is still much better than the original (w/o token), as I realized it when working on this test case:\nhttps://review.opendev.org/c/openstack/swift/+/890174/20/test/unit/common/test_utils.py#4753\n\nWhen that happens, and after token ttl (default as 10x of average backend get latency) finishes, those requests which didn\u0027t get token and in sleeping will try to reach backend at the time (TIME_RECEIVED_AT_PROXY + token ttl), and if the first request in that batch does get data from backend and set it in the memcache, the other requests could still get the data directly from memcache with retrying before token ttl kicks in. Thus it still can reduce the amount of requests into the backend dramatically.\n\nThe first request w/o token to reach the backend does need to wait for the time of token_ttl, and it\u0027s reasonable. since if all three token requests get timeout, that means the backend is overloaded, we don\u0027t want to retry GET very soon. Setting a proper token_ttl makes sense here as well.","commit_id":"50bc9bc0e169490bd9506ff77ff468e59d771564"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f7b3200aeeca5c25ea1abbd10a4dc43c76f245a5","unresolved":true,"context_lines":[{"line_number":2616,"context_line":" :returns: value of the data fetched from backend; None if not exist."},{"line_number":2617,"context_line":" \"\"\""},{"line_number":2618,"context_line":" data, self.backend_response \u003d self._do_fetch_backend()"},{"line_number":2619,"context_line":" if not data:"},{"line_number":2620,"context_line":" return None"},{"line_number":2621,"context_line":""},{"line_number":2622,"context_line":" if self._infocache:"}],"source_content_type":"text/x-python","patch_set":14,"id":"b2f717fa_f358faf9","line":2619,"in_reply_to":"604f7617_129d28f6","updated":"2024-03-25 05:30:17.000000000","message":"It\u0027s design considerations to not to release cooperative token here. Since the successful finish of one cooperative token session only depends on one successful request. So when any request with token finishes both backend fetching and memcache set successful, it can remove all cooperative tokens within this token session. For one request with token which fails to fetch backend or set memcache, a token session is not done yet, other two requests with token still have chance to finish it.\n\nIn a distributed environment where most likely those three requests will land into three different proxy servers, the chance of \"all three requests\" failure is quite low. Even if that happens, that would be the same thundering herd situation as without cooperation token mechanism.\n\nMy take on the idea of \"a pool of (e.g. 3) tokens with different token_keys. this thread got the token, this thread must release it\" is, it\u0027s much more complicated to enforce three different tokens and make sure each request must release its own token.","commit_id":"50bc9bc0e169490bd9506ff77ff468e59d771564"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"619e2c2898764382962905311bc930e42956e463","unresolved":true,"context_lines":[{"line_number":2616,"context_line":" :returns: value of the data fetched from backend; None if not exist."},{"line_number":2617,"context_line":" \"\"\""},{"line_number":2618,"context_line":" data, self.backend_response \u003d self._do_fetch_backend()"},{"line_number":2619,"context_line":" if not data:"},{"line_number":2620,"context_line":" return None"},{"line_number":2621,"context_line":""},{"line_number":2622,"context_line":" if self._infocache:"}],"source_content_type":"text/x-python","patch_set":14,"id":"232127a3_995df3f1","line":2619,"in_reply_to":"77b7fe94_807ea3ba","updated":"2024-04-02 03:21:47.000000000","message":"\u003e If they all just get a timeout fetching from the backend they could release their locks sooner and lets other requests reach for the backend earlier - is that what we want? \nwith the current implementation, we can just set token ttl to be a smaller value, then a second round of cooperative fetching session will kick off.\n``self._token_ttl \u003d retry_interval * 2 * 10``\n\n\u003e It might not be strictly equivilent; as Al points out: at a minimum we\u0027ve slowed down the thundering herd for some small ttl before we unleash the flood-gates. If it was implementable we might want a cache ttl timeout to trigger another round of cooperative stalling until someone finally succeeds!\nI will add metrics to monitor this. then we can decide if this fancier feature is needed or not.","commit_id":"50bc9bc0e169490bd9506ff77ff468e59d771564"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"92e4d55d3301aa03164d741370e14748e95eedeb","unresolved":true,"context_lines":[{"line_number":2616,"context_line":" :returns: value of the data fetched from backend; None if not exist."},{"line_number":2617,"context_line":" \"\"\""},{"line_number":2618,"context_line":" data, self.backend_response \u003d self._do_fetch_backend()"},{"line_number":2619,"context_line":" if not data:"},{"line_number":2620,"context_line":" return None"},{"line_number":2621,"context_line":""},{"line_number":2622,"context_line":" if self._infocache:"}],"source_content_type":"text/x-python","patch_set":14,"id":"77b7fe94_807ea3ba","line":2619,"in_reply_to":"b2f717fa_f358faf9","updated":"2024-03-26 15:06:38.000000000","message":"\u003e if there is just ONE successful thread per token, and a pool of (e.g. 3) tokens with different token_keys.\n\nthat makes me think of the \"limit\u003dN\" we implement in `lock_path`:\n\nhttps://github.com/NVIDIA/swift/blob/master/swift/common/utils/__init__.py#L2300\n\n\u003e if those other two threads also fail then nobody gets to fetch from backend for the rest of the token_ttl\n\nthis is kind of interesting; I\u0027d been thinking of \"the memcache timeout\" case as mainly happening when the winners died for some unexpected/unknown reason. If they all just get a timeout fetching from the backend they could release their locks sooner and lets other requests reach for the backend earlier - is that what we want? Is an explicit failure of \"fetch_from_backend\" the ONLY time we\u0027d want to let MORE requests hit the backend SOONER?\n\n\u003e the chance of \"all three requests\" failure is quite low. Even if that happens, that would be the same thundering herd situation as without cooperation token mechanism.\n\nIt might not be strictly equivilent; as Al points out: at a minimum we\u0027ve slowed down the thundering herd for some small ttl before we unleash the flood-gates. If it was implementable we might want a cache ttl timeout to trigger another round of cooperative stalling until someone finally succeeds!","commit_id":"50bc9bc0e169490bd9506ff77ff468e59d771564"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"c783da2689193d11c60a978c512ea3f46be8dc61","unresolved":true,"context_lines":[{"line_number":2686,"context_line":" # to increment the token key, go fetching data from backend, and"},{"line_number":2687,"context_line":" # set the data in memcache."},{"line_number":2688,"context_line":" need_delete_token \u003d True if total_requests else False"},{"line_number":2689,"context_line":" data \u003d self._fetch_data_from_backend(need_delete_token)"},{"line_number":2690,"context_line":" else:"},{"line_number":2691,"context_line":" # No token acquired, it means that there are requests in-flight"},{"line_number":2692,"context_line":" # which will fetch data form the backend servers and update them in"}],"source_content_type":"text/x-python","patch_set":14,"id":"d2626fd5_77639258","line":2689,"updated":"2024-03-22 18:04:02.000000000","message":"I\u0027d prefer to see this written something like:\n\n```\ntry:\n data \u003d self._fetch_data_from_backend(need_delete_token)\nexcept Exception:\n if need_delete_token:\n self._memcache.delete(self._token_key)\n```\n\nI understand why it isn\u0027t (because the token isn\u0027t deleted if the backend fetch fails), but IMHO that is problematic. It is much easier to reason about the lifecycle of the token with the try/finally construction.","commit_id":"50bc9bc0e169490bd9506ff77ff468e59d771564"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"619e2c2898764382962905311bc930e42956e463","unresolved":false,"context_lines":[{"line_number":2686,"context_line":" # to increment the token key, go fetching data from backend, and"},{"line_number":2687,"context_line":" # set the data in memcache."},{"line_number":2688,"context_line":" need_delete_token \u003d True if total_requests else False"},{"line_number":2689,"context_line":" data \u003d self._fetch_data_from_backend(need_delete_token)"},{"line_number":2690,"context_line":" else:"},{"line_number":2691,"context_line":" # No token acquired, it means that there are requests in-flight"},{"line_number":2692,"context_line":" # which will fetch data form the backend servers and update them in"}],"source_content_type":"text/x-python","patch_set":14,"id":"bc26b7b2_b6c28aec","line":2689,"in_reply_to":"1e5a02e4_e6e83791","updated":"2024-04-02 03:21:47.000000000","message":"Done","commit_id":"50bc9bc0e169490bd9506ff77ff468e59d771564"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f7b3200aeeca5c25ea1abbd10a4dc43c76f245a5","unresolved":true,"context_lines":[{"line_number":2686,"context_line":" # to increment the token key, go fetching data from backend, and"},{"line_number":2687,"context_line":" # set the data in memcache."},{"line_number":2688,"context_line":" need_delete_token \u003d True if total_requests else False"},{"line_number":2689,"context_line":" data \u003d self._fetch_data_from_backend(need_delete_token)"},{"line_number":2690,"context_line":" else:"},{"line_number":2691,"context_line":" # No token acquired, it means that there are requests in-flight"},{"line_number":2692,"context_line":" # which will fetch data form the backend servers and update them in"}],"source_content_type":"text/x-python","patch_set":14,"id":"1e5a02e4_e6e83791","line":2689,"in_reply_to":"d2626fd5_77639258","updated":"2024-03-25 05:30:17.000000000","message":"See another comment discussion thread, the successful finish of one cooperative token session (and deletion of the token) should only depends on one successful request, not the failure cases.","commit_id":"50bc9bc0e169490bd9506ff77ff468e59d771564"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"c783da2689193d11c60a978c512ea3f46be8dc61","unresolved":true,"context_lines":[{"line_number":2694,"context_line":" data \u003d self._sleep_and_retry_memcache()"},{"line_number":2695,"context_line":" if not data:"},{"line_number":2696,"context_line":" # Still no cache data fetched, do the slow fetch."},{"line_number":2697,"context_line":" data, self.backend_response \u003d self._do_fetch_backend()"},{"line_number":2698,"context_line":""},{"line_number":2699,"context_line":" return data"},{"line_number":2700,"context_line":""}],"source_content_type":"text/x-python","patch_set":14,"id":"e7d8a782_8f92c7da","line":2697,"updated":"2024-03-22 18:04:02.000000000","message":"I don\u0027t understand why this would not also set the data in cache?\n\nIIUC, if all three token-holders fail to get for backend, then no other thread will try the backend until the token ttl expires, and then all other threads try to fetch from backend but none of them sets in memcache??","commit_id":"50bc9bc0e169490bd9506ff77ff468e59d771564"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f7b3200aeeca5c25ea1abbd10a4dc43c76f245a5","unresolved":false,"context_lines":[{"line_number":2694,"context_line":" data \u003d self._sleep_and_retry_memcache()"},{"line_number":2695,"context_line":" if not data:"},{"line_number":2696,"context_line":" # Still no cache data fetched, do the slow fetch."},{"line_number":2697,"context_line":" data, self.backend_response \u003d self._do_fetch_backend()"},{"line_number":2698,"context_line":""},{"line_number":2699,"context_line":" return data"},{"line_number":2700,"context_line":""}],"source_content_type":"text/x-python","patch_set":14,"id":"73b78d28_c210f508","line":2697,"in_reply_to":"e7d8a782_8f92c7da","updated":"2024-03-25 05:30:17.000000000","message":"Good point, changed to set memcache for this case too.","commit_id":"50bc9bc0e169490bd9506ff77ff468e59d771564"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"92e4d55d3301aa03164d741370e14748e95eedeb","unresolved":true,"context_lines":[{"line_number":2561,"context_line":" will start a new round of cooperation session. In the rare case of all 3"},{"line_number":2562,"context_line":" requests spend more time than ``token_ttl`` with proper settings, the new"},{"line_number":2563,"context_line":" requests after ``token_ttl`` will still start a new usage session, and"},{"line_number":2564,"context_line":" there will be possibly 6 in-flight backend requests in total."},{"line_number":2565,"context_line":""},{"line_number":2566,"context_line":" :param infocache: the infocache instance."},{"line_number":2567,"context_line":" :param memcache: the memcache instance."}],"source_content_type":"text/x-python","patch_set":15,"id":"3920b3d8_5e021e1c","line":2564,"updated":"2024-03-26 15:06:38.000000000","message":"and if those fail nine, and if those fail 12 - I think this paragraph could end one sentance earlier: \"the situation w/o cooperative token\"\n\nmaybe it\u0027s worth noting that after ttl timeout there\u0027s a potential for a \"new round\" of cooperation, but I think we hope that should be rare or else we might want to just increase num_tokens? We should get some stats around that and see if it ever happens in practice.","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"619e2c2898764382962905311bc930e42956e463","unresolved":true,"context_lines":[{"line_number":2561,"context_line":" will start a new round of cooperation session. In the rare case of all 3"},{"line_number":2562,"context_line":" requests spend more time than ``token_ttl`` with proper settings, the new"},{"line_number":2563,"context_line":" requests after ``token_ttl`` will still start a new usage session, and"},{"line_number":2564,"context_line":" there will be possibly 6 in-flight backend requests in total."},{"line_number":2565,"context_line":""},{"line_number":2566,"context_line":" :param infocache: the infocache instance."},{"line_number":2567,"context_line":" :param memcache: the memcache instance."}],"source_content_type":"text/x-python","patch_set":15,"id":"861b8480_f4960674","line":2564,"in_reply_to":"3920b3d8_5e021e1c","updated":"2024-04-02 03:21:47.000000000","message":"yes, I have comments above \"the new requests which cache miss and need querying backend after ``token_ttl`` will start a new round of cooperation session.\"\n\nI will add code to emit several metrics later.","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"49205ebacd44070d9afb148c56b81fc9c5274f1b","unresolved":false,"context_lines":[{"line_number":2561,"context_line":" will start a new round of cooperation session. In the rare case of all 3"},{"line_number":2562,"context_line":" requests spend more time than ``token_ttl`` with proper settings, the new"},{"line_number":2563,"context_line":" requests after ``token_ttl`` will still start a new usage session, and"},{"line_number":2564,"context_line":" there will be possibly 6 in-flight backend requests in total."},{"line_number":2565,"context_line":""},{"line_number":2566,"context_line":" :param infocache: the infocache instance."},{"line_number":2567,"context_line":" :param memcache: the memcache instance."}],"source_content_type":"text/x-python","patch_set":15,"id":"983c085c_ba6e2bf9","line":2564,"in_reply_to":"861b8480_f4960674","updated":"2024-04-19 23:05:14.000000000","message":"metrics have been added in the proxy server follow-up patch.\nhttps://review.opendev.org/c/openstack/swift/+/908969/14/swift/proxy/controllers/base.py#825","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"92e4d55d3301aa03164d741370e14748e95eedeb","unresolved":true,"context_lines":[{"line_number":2588,"context_line":" cache_key, cache_ttl, do_fetch_backend, retry_interval,"},{"line_number":2589,"context_line":" cache_encoder\u003dNone, cache_decoder\u003dNone, num_tokens\u003d3):"},{"line_number":2590,"context_line":" self._infocache \u003d infocache"},{"line_number":2591,"context_line":" self._memcache \u003d memcache"},{"line_number":2592,"context_line":" self._cache_key \u003d cache_key"},{"line_number":2593,"context_line":" self._cache_ttl \u003d cache_ttl"},{"line_number":2594,"context_line":" self._token_key \u003d \u0027_cache_token/%s\u0027 % cache_key"}],"source_content_type":"text/x-python","patch_set":15,"id":"d26c021e_fee8227e","line":2591,"updated":"2024-03-26 15:06:38.000000000","message":"I could imagine concrete implementation choosing to pass a request object to it\u0027s `__init__` and extracting these before calling `super` - but if we don\u0027t have any concrete callers that don\u0027t expect these to come from a request object (can you HAVE an infocache w/o a request object?) we might choose to simplify this signature and just make it a bit more opionated.","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"9816d414da03efe9e797d1c07e9da26512571715","unresolved":true,"context_lines":[{"line_number":2588,"context_line":" cache_key, cache_ttl, do_fetch_backend, retry_interval,"},{"line_number":2589,"context_line":" cache_encoder\u003dNone, cache_decoder\u003dNone, num_tokens\u003d3):"},{"line_number":2590,"context_line":" self._infocache \u003d infocache"},{"line_number":2591,"context_line":" self._memcache \u003d memcache"},{"line_number":2592,"context_line":" self._cache_key \u003d cache_key"},{"line_number":2593,"context_line":" self._cache_ttl \u003d cache_ttl"},{"line_number":2594,"context_line":" self._token_key \u003d \u0027_cache_token/%s\u0027 % cache_key"}],"source_content_type":"text/x-python","patch_set":15,"id":"fd9a9c7a_1d742e6b","line":2591,"in_reply_to":"d26c021e_fee8227e","updated":"2024-09-25 21:54:01.000000000","message":"a request object can only replace two input parameters: ``infocache`` and ``memcache``, seems not that helpful; also, a less opionated and general interface might be easier to be applied to other places later on.","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"8bdc21d35e3e48c56a5ea5fa92dcad13ecb4881f","unresolved":false,"context_lines":[{"line_number":2588,"context_line":" cache_key, cache_ttl, do_fetch_backend, retry_interval,"},{"line_number":2589,"context_line":" cache_encoder\u003dNone, cache_decoder\u003dNone, num_tokens\u003d3):"},{"line_number":2590,"context_line":" self._infocache \u003d infocache"},{"line_number":2591,"context_line":" self._memcache \u003d memcache"},{"line_number":2592,"context_line":" self._cache_key \u003d cache_key"},{"line_number":2593,"context_line":" self._cache_ttl \u003d cache_ttl"},{"line_number":2594,"context_line":" self._token_key \u003d \u0027_cache_token/%s\u0027 % cache_key"}],"source_content_type":"text/x-python","patch_set":15,"id":"bd9f6d93_211916b4","line":2591,"in_reply_to":"f3c356ce_3d5aec55","updated":"2025-03-04 20:13:36.000000000","message":"I used ``request`` in the proxy-server shard range cache sub-class; also removed a few parameters after the sub-class refactoring.","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"df0123ddc6a5118de24bcab42bf97548be03393e","unresolved":true,"context_lines":[{"line_number":2588,"context_line":" cache_key, cache_ttl, do_fetch_backend, retry_interval,"},{"line_number":2589,"context_line":" cache_encoder\u003dNone, cache_decoder\u003dNone, num_tokens\u003d3):"},{"line_number":2590,"context_line":" self._infocache \u003d infocache"},{"line_number":2591,"context_line":" self._memcache \u003d memcache"},{"line_number":2592,"context_line":" self._cache_key \u003d cache_key"},{"line_number":2593,"context_line":" self._cache_ttl \u003d cache_ttl"},{"line_number":2594,"context_line":" self._token_key \u003d \u0027_cache_token/%s\u0027 % cache_key"}],"source_content_type":"text/x-python","patch_set":15,"id":"f3c356ce_3d5aec55","line":2591,"in_reply_to":"fd9a9c7a_1d742e6b","updated":"2025-02-05 19:43:04.000000000","message":"\u003e request object can only replace two input parameters\n\none is less than two!\n\nI don\u0027t think a an infocache is any \"less opionated and general\" compared to a request object - in fact I think it\u0027d be more general should we ever want to \"ehance\" the way that requests encapsulate infocache (or their environ/cache instances)","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"92e4d55d3301aa03164d741370e14748e95eedeb","unresolved":true,"context_lines":[{"line_number":2615,"context_line":" after operations are done."},{"line_number":2616,"context_line":" :returns: value of the data fetched from backend; None if not exist."},{"line_number":2617,"context_line":" \"\"\""},{"line_number":2618,"context_line":" data, self.backend_response \u003d self._do_fetch_backend()"},{"line_number":2619,"context_line":" if not data:"},{"line_number":2620,"context_line":" return None"},{"line_number":2621,"context_line":""}],"source_content_type":"text/x-python","patch_set":15,"id":"b7eb3a33_d897434c","line":2618,"updated":"2024-03-26 15:06:38.000000000","message":"oh wow, so it\u0027s up to the implementer to re give us a function that returns a 2-tuple of (data, resp) - where data is the complex type derived from the resp suitable for storing in infocache.","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"9816d414da03efe9e797d1c07e9da26512571715","unresolved":false,"context_lines":[{"line_number":2615,"context_line":" after operations are done."},{"line_number":2616,"context_line":" :returns: value of the data fetched from backend; None if not exist."},{"line_number":2617,"context_line":" \"\"\""},{"line_number":2618,"context_line":" data, self.backend_response \u003d self._do_fetch_backend()"},{"line_number":2619,"context_line":" if not data:"},{"line_number":2620,"context_line":" return None"},{"line_number":2621,"context_line":""}],"source_content_type":"text/x-python","patch_set":15,"id":"2ef9f064_798d8603","line":2618,"in_reply_to":"b7eb3a33_d897434c","updated":"2024-09-25 21:54:01.000000000","message":"Acknowledged","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"92e4d55d3301aa03164d741370e14748e95eedeb","unresolved":true,"context_lines":[{"line_number":2619,"context_line":" if not data:"},{"line_number":2620,"context_line":" return None"},{"line_number":2621,"context_line":""},{"line_number":2622,"context_line":" if self._infocache:"},{"line_number":2623,"context_line":" self._infocache[self._cache_key] \u003d data"},{"line_number":2624,"context_line":" try:"},{"line_number":2625,"context_line":" encoded_data \u003d self._cache_encoder("}],"source_content_type":"text/x-python","patch_set":15,"id":"54d5b360_fcc58346","line":2622,"updated":"2024-03-26 15:06:38.000000000","message":"oh intresting, infocache is optional - I don\u0027t think the class signature tried to make that obvious with a kwarg\u003dNone\n\nI think we need to be careful about the Falsy-ness of getting an explicitly empty infocache\u003d{}; i\u0027m sure that would come-up/fixed in testing.","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"619e2c2898764382962905311bc930e42956e463","unresolved":false,"context_lines":[{"line_number":2619,"context_line":" if not data:"},{"line_number":2620,"context_line":" return None"},{"line_number":2621,"context_line":""},{"line_number":2622,"context_line":" if self._infocache:"},{"line_number":2623,"context_line":" self._infocache[self._cache_key] \u003d data"},{"line_number":2624,"context_line":" try:"},{"line_number":2625,"context_line":" encoded_data \u003d self._cache_encoder("}],"source_content_type":"text/x-python","patch_set":15,"id":"d3e8b34f_fc35cacd","line":2622,"in_reply_to":"54d5b360_fcc58346","updated":"2024-04-02 03:21:47.000000000","message":"You are correct! that did caught up in testing, and I got that fixed. :-)","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"92e4d55d3301aa03164d741370e14748e95eedeb","unresolved":true,"context_lines":[{"line_number":2623,"context_line":" self._infocache[self._cache_key] \u003d data"},{"line_number":2624,"context_line":" try:"},{"line_number":2625,"context_line":" encoded_data \u003d self._cache_encoder("},{"line_number":2626,"context_line":" data) if self._cache_encoder else data"},{"line_number":2627,"context_line":" self._memcache.set("},{"line_number":2628,"context_line":" self._cache_key, encoded_data,"},{"line_number":2629,"context_line":" time\u003dself._cache_ttl, raise_on_error\u003dTrue)"}],"source_content_type":"text/x-python","patch_set":15,"id":"a59ea09e_2a6aec1c","line":2626,"updated":"2024-03-26 15:06:38.000000000","message":"maybe simpler in `__init__` to ensure that `self._cache_encoder` is always a callable (even if it\u0027s just the identity function)\n\n self._cache_encoder \u003d cache_encoder or lambda x: x","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"9816d414da03efe9e797d1c07e9da26512571715","unresolved":false,"context_lines":[{"line_number":2623,"context_line":" self._infocache[self._cache_key] \u003d data"},{"line_number":2624,"context_line":" try:"},{"line_number":2625,"context_line":" encoded_data \u003d self._cache_encoder("},{"line_number":2626,"context_line":" data) if self._cache_encoder else data"},{"line_number":2627,"context_line":" self._memcache.set("},{"line_number":2628,"context_line":" self._cache_key, encoded_data,"},{"line_number":2629,"context_line":" time\u003dself._cache_ttl, raise_on_error\u003dTrue)"}],"source_content_type":"text/x-python","patch_set":15,"id":"d6b02152_eb7d51f9","line":2626,"in_reply_to":"a59ea09e_2a6aec1c","updated":"2024-09-25 21:54:01.000000000","message":"Done","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"92e4d55d3301aa03164d741370e14748e95eedeb","unresolved":true,"context_lines":[{"line_number":2633,"context_line":" # with a token finishes both backend fetching and memcache set"},{"line_number":2634,"context_line":" # successful, it can remove all cooperative tokens of this"},{"line_number":2635,"context_line":" # token session."},{"line_number":2636,"context_line":" self._memcache.delete(self._token_key)"},{"line_number":2637,"context_line":" except swift.common.exceptions.MemcacheConnectionError:"},{"line_number":2638,"context_line":" self.set_cache_state \u003d \u0027set_error\u0027"},{"line_number":2639,"context_line":" else:"}],"source_content_type":"text/x-python","patch_set":15,"id":"f34ea167_ff118453","line":2636,"updated":"2024-03-26 15:06:38.000000000","message":"regardless of the logic we choose to implement WRT incr/delete of `self._token_key` I think can agree it would be *ideal* for mainteance if all that logic was in one method with an execution flow that made the life-cycle as clear as possible.","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"619e2c2898764382962905311bc930e42956e463","unresolved":false,"context_lines":[{"line_number":2633,"context_line":" # with a token finishes both backend fetching and memcache set"},{"line_number":2634,"context_line":" # successful, it can remove all cooperative tokens of this"},{"line_number":2635,"context_line":" # token session."},{"line_number":2636,"context_line":" self._memcache.delete(self._token_key)"},{"line_number":2637,"context_line":" except swift.common.exceptions.MemcacheConnectionError:"},{"line_number":2638,"context_line":" self.set_cache_state \u003d \u0027set_error\u0027"},{"line_number":2639,"context_line":" else:"}],"source_content_type":"text/x-python","patch_set":15,"id":"73c2657e_11949cc2","line":2636,"in_reply_to":"f34ea167_ff118453","updated":"2024-04-02 03:21:47.000000000","message":"agreed. moved to token deletion to the main logic within \"fetch_data\" function.","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"92e4d55d3301aa03164d741370e14748e95eedeb","unresolved":true,"context_lines":[{"line_number":2652,"context_line":" while cur_time \u003c cutoff_time:"},{"line_number":2653,"context_line":" eventlet.sleep(self._retry_interval)"},{"line_number":2654,"context_line":" cache_data \u003d self._memcache.get("},{"line_number":2655,"context_line":" self._cache_key, raise_on_error\u003dTrue)"},{"line_number":2656,"context_line":" if cache_data:"},{"line_number":2657,"context_line":" # cache hit."},{"line_number":2658,"context_line":" self._req_served_from_cache \u003d True"}],"source_content_type":"text/x-python","patch_set":15,"id":"33a4b335_1f0c3898","line":2655,"updated":"2024-03-26 15:06:38.000000000","message":"who catches this - the *caller*? And why wouldn\u0027t we retry upto cutoff_time?","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"619e2c2898764382962905311bc930e42956e463","unresolved":false,"context_lines":[{"line_number":2652,"context_line":" while cur_time \u003c cutoff_time:"},{"line_number":2653,"context_line":" eventlet.sleep(self._retry_interval)"},{"line_number":2654,"context_line":" cache_data \u003d self._memcache.get("},{"line_number":2655,"context_line":" self._cache_key, raise_on_error\u003dTrue)"},{"line_number":2656,"context_line":" if cache_data:"},{"line_number":2657,"context_line":" # cache hit."},{"line_number":2658,"context_line":" self._req_served_from_cache \u003d True"}],"source_content_type":"text/x-python","patch_set":15,"id":"f2d2093d_5dbb6f08","line":2655,"in_reply_to":"33a4b335_1f0c3898","updated":"2024-04-02 03:21:47.000000000","message":"yes, we should. And I caught this too in my own test case!","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"92e4d55d3301aa03164d741370e14748e95eedeb","unresolved":true,"context_lines":[{"line_number":2657,"context_line":" # cache hit."},{"line_number":2658,"context_line":" self._req_served_from_cache \u003d True"},{"line_number":2659,"context_line":" decoded_data \u003d self._cache_decoder("},{"line_number":2660,"context_line":" cache_data) if self._cache_decoder else cache_data"},{"line_number":2661,"context_line":" return decoded_data"},{"line_number":2662,"context_line":" # cache miss."},{"line_number":2663,"context_line":" cur_time \u003d time.time()"}],"source_content_type":"text/x-python","patch_set":15,"id":"c2bfb53b_f999d69b","line":2660,"updated":"2024-03-26 15:06:38.000000000","message":"maybe simpler in `__init__` to ensure that `self._cache_decoder` is always a callable (even if it\u0027s just the identity function)\n\n self._cache_decoder \u003d cache_decoder or lambda x: x","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"9816d414da03efe9e797d1c07e9da26512571715","unresolved":false,"context_lines":[{"line_number":2657,"context_line":" # cache hit."},{"line_number":2658,"context_line":" self._req_served_from_cache \u003d True"},{"line_number":2659,"context_line":" decoded_data \u003d self._cache_decoder("},{"line_number":2660,"context_line":" cache_data) if self._cache_decoder else cache_data"},{"line_number":2661,"context_line":" return decoded_data"},{"line_number":2662,"context_line":" # cache miss."},{"line_number":2663,"context_line":" cur_time \u003d time.time()"}],"source_content_type":"text/x-python","patch_set":15,"id":"42cda1f1_9ff51e77","line":2660,"in_reply_to":"c2bfb53b_f999d69b","updated":"2024-09-25 21:54:01.000000000","message":"Done","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"92e4d55d3301aa03164d741370e14748e95eedeb","unresolved":true,"context_lines":[{"line_number":2662,"context_line":" # cache miss."},{"line_number":2663,"context_line":" cur_time \u003d time.time()"},{"line_number":2664,"context_line":" continue"},{"line_number":2665,"context_line":" return None"},{"line_number":2666,"context_line":""},{"line_number":2667,"context_line":" def fetch_data(self):"},{"line_number":2668,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":15,"id":"6d361535_d747b37f","line":2665,"updated":"2024-03-26 15:06:38.000000000","message":"I really want stats around this case - if we have very many of these at ALL then our cooperative token implementation is failing us in some significant way.","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"49205ebacd44070d9afb148c56b81fc9c5274f1b","unresolved":false,"context_lines":[{"line_number":2662,"context_line":" # cache miss."},{"line_number":2663,"context_line":" cur_time \u003d time.time()"},{"line_number":2664,"context_line":" continue"},{"line_number":2665,"context_line":" return None"},{"line_number":2666,"context_line":""},{"line_number":2667,"context_line":" def fetch_data(self):"},{"line_number":2668,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":15,"id":"c9c68d4b_dc0fb92f","line":2665,"in_reply_to":"64630490_90e685bb","updated":"2024-04-19 23:05:14.000000000","message":"Clay, I have added the related metrics in the proxy-server patch, at here:\nhttps://review.opendev.org/c/openstack/swift/+/908969/14/swift/proxy/controllers/base.py#825\n\n``token.updating_shard.done_token_reqs: total requests have finished with token acquired.\ntoken.updating_shard.cache_served_reqs: total requests have been served out of cache without token acquired.\ntoken.updating_shard.backend_reqs: total requests need go to backend.``\n\nso the specific ``cache misses`` you are asking here will be:\n``(token.updating_shard.backend_reqs - token.updating_shard.done_token_reqs)``","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4a3409f2c7557fb623f094089756f527fcf64b4b","unresolved":true,"context_lines":[{"line_number":2662,"context_line":" # cache miss."},{"line_number":2663,"context_line":" cur_time \u003d time.time()"},{"line_number":2664,"context_line":" continue"},{"line_number":2665,"context_line":" return None"},{"line_number":2666,"context_line":""},{"line_number":2667,"context_line":" def fetch_data(self):"},{"line_number":2668,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":15,"id":"64630490_90e685bb","line":2665,"in_reply_to":"6d361535_d747b37f","updated":"2024-04-19 19:45:42.000000000","message":"BUMP! This is a BIG part of the whole debate going on here. If we can get some stats around this and convince ourselves an \"off-by-default\" behavior is equivilant to master maybe we can carry this.","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"92e4d55d3301aa03164d741370e14748e95eedeb","unresolved":true,"context_lines":[{"line_number":2698,"context_line":" data \u003d self._sleep_and_retry_memcache()"},{"line_number":2699,"context_line":" if not data:"},{"line_number":2700,"context_line":" # Still no cache data fetched."},{"line_number":2701,"context_line":" data \u003d self._fetch_data_from_backend()"},{"line_number":2702,"context_line":""},{"line_number":2703,"context_line":" return data"},{"line_number":2704,"context_line":""}],"source_content_type":"text/x-python","patch_set":15,"id":"76fd394b_b7ea66db","line":2701,"updated":"2024-03-26 15:06:38.000000000","message":"i can kind of read this as branching the logic into 4 paths:\n\n1) incr returned \u003c num_tokens - we win! do the backend fetch\n1a) incr *failed* - go ahead and do the backend request\n2) incr returned \u003e num_tokens - we loose! keep trying memcache until a timeout\n2a) all num_token winners died - go ahead and do the backend request\n\nBut, how different are the 1a \u0026 2a \"error handling\" paths - it seems in both they just `fetch_from_backend(delete_token\u003dFalse)`; could that be more obvious?\n\n try:\n am_winner \u003d get_token() # raises CooperationError on incr fail\n if am_winner:\n fetch_from_backend_and_set_in_memcache()\n delete_token()\n else:\n wait_for_memcache() # raises CooperationError on memcache tll\n except CooperationError:\n fetch_from_backend_and_set_in_memcache()","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"619e2c2898764382962905311bc930e42956e463","unresolved":false,"context_lines":[{"line_number":2698,"context_line":" data \u003d self._sleep_and_retry_memcache()"},{"line_number":2699,"context_line":" if not data:"},{"line_number":2700,"context_line":" # Still no cache data fetched."},{"line_number":2701,"context_line":" data \u003d self._fetch_data_from_backend()"},{"line_number":2702,"context_line":""},{"line_number":2703,"context_line":" return data"},{"line_number":2704,"context_line":""}],"source_content_type":"text/x-python","patch_set":15,"id":"e8185810_268d20aa","line":2701,"in_reply_to":"76fd394b_b7ea66db","updated":"2024-04-02 03:21:47.000000000","message":"I have adopted the suggested style, thanks.","commit_id":"294d13029786815e294d150f29e2dad6d12b7e12"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"90cc4c9845589c3cd20fbd3b6e065b82b17b0348","unresolved":true,"context_lines":[{"line_number":2566,"context_line":" half of the average time spent on ``do_fetch_backend``."},{"line_number":2567,"context_line":" :param num_tokens: the minimum limit of tokens per each usage sesssion,"},{"line_number":2568,"context_line":" also the the minimum limit of in-flight requests allowed to fetch data"},{"line_number":2569,"context_line":" from backend; default to be 3, which give redundancy when any request"},{"line_number":2570,"context_line":" with token fails to fetch data from the backend or fails to set new"},{"line_number":2571,"context_line":" data into memcached."},{"line_number":2572,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":25,"id":"45022ec6_e2483f2e","line":2569,"range":{"start_line":2569,"start_character":21,"end_line":2569,"end_character":37},"updated":"2024-04-18 11:47:10.000000000","message":"I think we should use a higher default. If my maths is correct, with a 3 replica container policy, there\u0027s a 1/9 chance that all 3 of these requests go to the same server and only 2/9 chance that they will go to three different servers.","commit_id":"9f475b7a7bae512c7cb7c53ab12c58e388dc1257"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"50192330b0eaa55928bcecef326b623b9faae22f","unresolved":false,"context_lines":[{"line_number":2566,"context_line":" half of the average time spent on ``do_fetch_backend``."},{"line_number":2567,"context_line":" :param num_tokens: the minimum limit of tokens per each usage sesssion,"},{"line_number":2568,"context_line":" also the the minimum limit of in-flight requests allowed to fetch data"},{"line_number":2569,"context_line":" from backend; default to be 3, which give redundancy when any request"},{"line_number":2570,"context_line":" with token fails to fetch data from the backend or fails to set new"},{"line_number":2571,"context_line":" data into memcached."},{"line_number":2572,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":25,"id":"ac3e0ce3_8a181c5a","line":2569,"range":{"start_line":2569,"start_character":21,"end_line":2569,"end_character":37},"in_reply_to":"115c9c15_350910df","updated":"2024-09-25 16:09:59.000000000","message":"Done","commit_id":"9f475b7a7bae512c7cb7c53ab12c58e388dc1257"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1e6f89ad9d2f1f4afd8604a2ae65f6ee0d7fb494","unresolved":true,"context_lines":[{"line_number":2566,"context_line":" half of the average time spent on ``do_fetch_backend``."},{"line_number":2567,"context_line":" :param num_tokens: the minimum limit of tokens per each usage sesssion,"},{"line_number":2568,"context_line":" also the the minimum limit of in-flight requests allowed to fetch data"},{"line_number":2569,"context_line":" from backend; default to be 3, which give redundancy when any request"},{"line_number":2570,"context_line":" with token fails to fetch data from the backend or fails to set new"},{"line_number":2571,"context_line":" data into memcached."},{"line_number":2572,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":25,"id":"115c9c15_350910df","line":2569,"range":{"start_line":2569,"start_character":21,"end_line":2569,"end_character":37},"in_reply_to":"45022ec6_e2483f2e","updated":"2024-04-19 03:36:39.000000000","message":"your calculation is correct. since cooperative token is intended to solve the backend overloading issue in the first place, I am not that worried about spreading all token requests into different backend server, but rather want to prevent the situation that a single token request hangs or dies and then the whole token session would be wasted. that\u0027s why I felt 3 token requests are enough (kind of like 3 replicas for container DB). But since this is adjustable, we can also observe and make changes with the new added metrics.","commit_id":"9f475b7a7bae512c7cb7c53ab12c58e388dc1257"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"90cc4c9845589c3cd20fbd3b6e065b82b17b0348","unresolved":true,"context_lines":[{"line_number":2580,"context_line":" self._cache_ttl \u003d cache_ttl"},{"line_number":2581,"context_line":" self._token_key \u003d \u0027_cache_token/%s\u0027 % cache_key"},{"line_number":2582,"context_line":" self._retry_interval \u003d retry_interval"},{"line_number":2583,"context_line":" # Time-to-live of the cooperative token when set in memcached, default"},{"line_number":2584,"context_line":" # to be 10 times of the average time spent on ``do_fetch_backend``."},{"line_number":2585,"context_line":" self._token_ttl \u003d retry_interval * 2 * 10"},{"line_number":2586,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":2587,"context_line":" self._do_fetch_backend \u003d do_fetch_backend"}],"source_content_type":"text/x-python","patch_set":25,"id":"c8429f27_ec1024c6","line":2584,"range":{"start_line":2583,"start_character":71,"end_line":2584,"end_character":75},"updated":"2024-04-18 11:47:10.000000000","message":"is this comment stale?","commit_id":"9f475b7a7bae512c7cb7c53ab12c58e388dc1257"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1e6f89ad9d2f1f4afd8604a2ae65f6ee0d7fb494","unresolved":false,"context_lines":[{"line_number":2580,"context_line":" self._cache_ttl \u003d cache_ttl"},{"line_number":2581,"context_line":" self._token_key \u003d \u0027_cache_token/%s\u0027 % cache_key"},{"line_number":2582,"context_line":" self._retry_interval \u003d retry_interval"},{"line_number":2583,"context_line":" # Time-to-live of the cooperative token when set in memcached, default"},{"line_number":2584,"context_line":" # to be 10 times of the average time spent on ``do_fetch_backend``."},{"line_number":2585,"context_line":" self._token_ttl \u003d retry_interval * 2 * 10"},{"line_number":2586,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":2587,"context_line":" self._do_fetch_backend \u003d do_fetch_backend"}],"source_content_type":"text/x-python","patch_set":25,"id":"4234e3b2_e9dfeb88","line":2584,"range":{"start_line":2583,"start_character":71,"end_line":2584,"end_character":75},"in_reply_to":"c8429f27_ec1024c6","updated":"2024-04-19 03:36:39.000000000","message":"no. retry_interval is half of the average time spent on ``do_fetch_backend``, so\n``self._token_ttl \u003d retry_interval * 2 * 10``","commit_id":"9f475b7a7bae512c7cb7c53ab12c58e388dc1257"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"90cc4c9845589c3cd20fbd3b6e065b82b17b0348","unresolved":true,"context_lines":[{"line_number":2582,"context_line":" self._retry_interval \u003d retry_interval"},{"line_number":2583,"context_line":" # Time-to-live of the cooperative token when set in memcached, default"},{"line_number":2584,"context_line":" # to be 10 times of the average time spent on ``do_fetch_backend``."},{"line_number":2585,"context_line":" self._token_ttl \u003d retry_interval * 2 * 10"},{"line_number":2586,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":2587,"context_line":" self._do_fetch_backend \u003d do_fetch_backend"},{"line_number":2588,"context_line":" self._cache_encoder \u003d cache_encoder"}],"source_content_type":"text/x-python","patch_set":25,"id":"b7de98fb_893f7d24","line":2585,"updated":"2024-04-18 11:47:10.000000000","message":"I think this means that while the backend requests are in flight we can issue up to 20x memcache gets vs non-token behaviour (traded off against fewer backend GETs of course).\n\nIn the following patch the default value for retry_interval is 0.05, so if we have 10k req/s, almost all enter the token wait loop and start issuing memcache gets at 20/s, so over the course to token_ttl that rises to 200k/s memcache gets per token. Is that reasonable? Should the retry_interval start bigger and maybe shrink exponentially, or be inversely proportional to time since token was taken (so later requests try cache sooner than the request immediately after the one that won the token)?","commit_id":"9f475b7a7bae512c7cb7c53ab12c58e388dc1257"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1e6f89ad9d2f1f4afd8604a2ae65f6ee0d7fb494","unresolved":true,"context_lines":[{"line_number":2582,"context_line":" self._retry_interval \u003d retry_interval"},{"line_number":2583,"context_line":" # Time-to-live of the cooperative token when set in memcached, default"},{"line_number":2584,"context_line":" # to be 10 times of the average time spent on ``do_fetch_backend``."},{"line_number":2585,"context_line":" self._token_ttl \u003d retry_interval * 2 * 10"},{"line_number":2586,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":2587,"context_line":" self._do_fetch_backend \u003d do_fetch_backend"},{"line_number":2588,"context_line":" self._cache_encoder \u003d cache_encoder"}],"source_content_type":"text/x-python","patch_set":25,"id":"ff4c8db9_17d22f99","line":2585,"in_reply_to":"b7de98fb_893f7d24","updated":"2024-04-19 03:36:39.000000000","message":"good point! I am working on a change to reduce memcache gets.","commit_id":"9f475b7a7bae512c7cb7c53ab12c58e388dc1257"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"49205ebacd44070d9afb148c56b81fc9c5274f1b","unresolved":false,"context_lines":[{"line_number":2582,"context_line":" self._retry_interval \u003d retry_interval"},{"line_number":2583,"context_line":" # Time-to-live of the cooperative token when set in memcached, default"},{"line_number":2584,"context_line":" # to be 10 times of the average time spent on ``do_fetch_backend``."},{"line_number":2585,"context_line":" self._token_ttl \u003d retry_interval * 2 * 10"},{"line_number":2586,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":2587,"context_line":" self._do_fetch_backend \u003d do_fetch_backend"},{"line_number":2588,"context_line":" self._cache_encoder \u003d cache_encoder"}],"source_content_type":"text/x-python","patch_set":25,"id":"a2757c34_f680c22a","line":2585,"in_reply_to":"ff4c8db9_17d22f99","updated":"2024-04-19 23:05:14.000000000","message":"reduced the number of memcache retries/gets by increasing the sleep interval and adding exponential backoff. for the default setting ``self._token_ttl \u003d retry_interval * 10``, there will be only three retries: the first sleep internal is ``self._retry_interval * 1.5``, then second is ``self._retry_interval * 3``, and third is ``self._retry_interval * 6``.","commit_id":"9f475b7a7bae512c7cb7c53ab12c58e388dc1257"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"90cc4c9845589c3cd20fbd3b6e065b82b17b0348","unresolved":true,"context_lines":[{"line_number":2662,"context_line":" total_requests \u003d 0"},{"line_number":2663,"context_line":" try:"},{"line_number":2664,"context_line":" total_requests \u003d self._memcache.incr("},{"line_number":2665,"context_line":" self._token_key, delta\u003d1, time\u003dself._token_ttl)"},{"line_number":2666,"context_line":" except swift.common.exceptions.MemcacheConnectionError:"},{"line_number":2667,"context_line":" self.set_cache_state \u003d \u0027inc_error\u0027"},{"line_number":2668,"context_line":""}],"source_content_type":"text/x-python","patch_set":25,"id":"63635b01_c3664d81","line":2665,"updated":"2024-04-18 11:47:10.000000000","message":"The ghetto lock description https://github.com/memcached/memcached/wiki/ProgrammingTricks#ghetto-central-locking suggests using an add, but it only has one \"winner\". Intuitively, doing multiple incrs is going to be more \"expensive\" than multiple adds when only the first succeeds - it might be worth benchmarking that?","commit_id":"9f475b7a7bae512c7cb7c53ab12c58e388dc1257"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"9816d414da03efe9e797d1c07e9da26512571715","unresolved":false,"context_lines":[{"line_number":2662,"context_line":" total_requests \u003d 0"},{"line_number":2663,"context_line":" try:"},{"line_number":2664,"context_line":" total_requests \u003d self._memcache.incr("},{"line_number":2665,"context_line":" self._token_key, delta\u003d1, time\u003dself._token_ttl)"},{"line_number":2666,"context_line":" except swift.common.exceptions.MemcacheConnectionError:"},{"line_number":2667,"context_line":" self.set_cache_state \u003d \u0027inc_error\u0027"},{"line_number":2668,"context_line":""}],"source_content_type":"text/x-python","patch_set":25,"id":"956407b5_cac851a5","line":2665,"in_reply_to":"25ea1f44_b4a84cc1","updated":"2024-09-25 21:54:01.000000000","message":"Since we have introduced a new config num_tokens_per_session, it will be a good small optimization to use memcache.add() to replace the usage of memcache.incr() when num_tokens_per_session \u003d 1. I have this to-do work written down, will work on it as the follow-up patches.","commit_id":"9f475b7a7bae512c7cb7c53ab12c58e388dc1257"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1e6f89ad9d2f1f4afd8604a2ae65f6ee0d7fb494","unresolved":true,"context_lines":[{"line_number":2662,"context_line":" total_requests \u003d 0"},{"line_number":2663,"context_line":" try:"},{"line_number":2664,"context_line":" total_requests \u003d self._memcache.incr("},{"line_number":2665,"context_line":" self._token_key, delta\u003d1, time\u003dself._token_ttl)"},{"line_number":2666,"context_line":" except swift.common.exceptions.MemcacheConnectionError:"},{"line_number":2667,"context_line":" self.set_cache_state \u003d \u0027inc_error\u0027"},{"line_number":2668,"context_line":""}],"source_content_type":"text/x-python","patch_set":25,"id":"25ea1f44_b4a84cc1","line":2665,"in_reply_to":"63635b01_c3664d81","updated":"2024-04-19 03:36:39.000000000","message":"yes, I plan to test that, will see how does it go. hopefully it won\u0027t be an issue, since each operation is within 1 milli-second on average. If that\u0027s an issue, we can fall back to use ``memcache.add()`` and only have one token winner.","commit_id":"9f475b7a7bae512c7cb7c53ab12c58e388dc1257"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"cff29d6ff1264ec43fd6f8a5190009ba1ee8c4e7","unresolved":true,"context_lines":[{"line_number":1433,"context_line":" self._retry_interval \u003d retry_interval"},{"line_number":1434,"context_line":" # Time-to-live of the cooperative token when set in memcached, default"},{"line_number":1435,"context_line":" # to be 10 times of the average time spent on ``do_fetch_backend``."},{"line_number":1436,"context_line":" self._token_ttl \u003d retry_interval * 10"},{"line_number":1437,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":1438,"context_line":" self._do_fetch_backend \u003d do_fetch_backend"},{"line_number":1439,"context_line":" self._cache_encoder \u003d cache_encoder"}],"source_content_type":"text/x-python","patch_set":31,"id":"91a3228e_14c8cc9c","line":1436,"updated":"2024-07-02 22:56:48.000000000","message":"so the interval * 10 is a magic number","commit_id":"dd72d79cbbf80c66ee590e9dac8941eb7f2e8631"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f3064c1509afef499bf100fa5ef7b516368dcf6c","unresolved":false,"context_lines":[{"line_number":1433,"context_line":" self._retry_interval \u003d retry_interval"},{"line_number":1434,"context_line":" # Time-to-live of the cooperative token when set in memcached, default"},{"line_number":1435,"context_line":" # to be 10 times of the average time spent on ``do_fetch_backend``."},{"line_number":1436,"context_line":" self._token_ttl \u003d retry_interval * 10"},{"line_number":1437,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":1438,"context_line":" self._do_fetch_backend \u003d do_fetch_backend"},{"line_number":1439,"context_line":" self._cache_encoder \u003d cache_encoder"}],"source_content_type":"text/x-python","patch_set":31,"id":"621419b2_0b2321d6","line":1436,"in_reply_to":"378eebb7_71a41653","updated":"2024-07-09 14:28:54.000000000","message":"Added below comments to the ``_sleep_and_retry_memcache`` function.\n``when ``token_ttl`` is 10 times of ``retry_interval`` and the exponential backoff doubles the retry interval after each retry, normally this function will only sleep and retry 3 times.``\n\nI picked 10, first I felt it\u0027s a reasonable ratio. for example, if backend latency is 200ms, then 2 seconds of total token session seems about right; second, I want each request w/o token to only retry 3 times, and with the doubling exponential backoff, 10 is just about right.\n\nbut of course we can add more configs to replace those magic numbers later if needed.","commit_id":"dd72d79cbbf80c66ee590e9dac8941eb7f2e8631"},{"author":{"_account_id":7233,"name":"Matthew Oliver","email":"matt@oliver.net.au","username":"mattoliverau"},"change_message_id":"7b315613f27a881219393fdb50c21e051627af79","unresolved":true,"context_lines":[{"line_number":1433,"context_line":" self._retry_interval \u003d retry_interval"},{"line_number":1434,"context_line":" # Time-to-live of the cooperative token when set in memcached, default"},{"line_number":1435,"context_line":" # to be 10 times of the average time spent on ``do_fetch_backend``."},{"line_number":1436,"context_line":" self._token_ttl \u003d retry_interval * 10"},{"line_number":1437,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":1438,"context_line":" self._do_fetch_backend \u003d do_fetch_backend"},{"line_number":1439,"context_line":" self._cache_encoder \u003d cache_encoder"}],"source_content_type":"text/x-python","patch_set":31,"id":"378eebb7_71a41653","line":1436,"in_reply_to":"91a3228e_14c8cc9c","updated":"2024-07-08 07:46:50.000000000","message":"yeah, I was just wondering why 10 was picked here?","commit_id":"dd72d79cbbf80c66ee590e9dac8941eb7f2e8631"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"1531ae79327f5615c743507667d90fbd16588f07","unresolved":true,"context_lines":[{"line_number":1407,"context_line":" :param cache_ttl: time-to-live of the data fetched from backend to set into"},{"line_number":1408,"context_line":" memcached."},{"line_number":1409,"context_line":" :param do_fetch_backend: a callable object to be called to fetch data from"},{"line_number":1410,"context_line":" the backend; it needs to return a tuple of (data, response)."},{"line_number":1411,"context_line":" :param cache_encoder: a callable object to be called to convert the data"},{"line_number":1412,"context_line":" retrieved from the backend to a different format to store in memcache."},{"line_number":1413,"context_line":" :param cache_decoder: a callable object to be called to convert the data"}],"source_content_type":"text/x-python","patch_set":32,"id":"5b99a7bf_f9936fc3","line":1410,"range":{"start_line":1410,"start_character":58,"end_line":1410,"end_character":66},"updated":"2024-10-17 22:02:34.000000000","message":"Having a *response* as part of the API is interesting... It\u0027s not immediately obvious that it should *need* to be.\n\nI suppose I ought to see how it\u0027s used in the next patch.","commit_id":"a2d4b75fb2e361d8ecdadbbc525129602760ec25"},{"author":{"_account_id":35790,"name":"Shreeya Deshpande","email":"shreeyad@nvidia.com","username":"shreeyad"},"change_message_id":"ef6f61ef3e6e4337c5c7932be7b8720883945908","unresolved":true,"context_lines":[{"line_number":1407,"context_line":" :param cache_ttl: time-to-live of the data fetched from backend to set into"},{"line_number":1408,"context_line":" memcached."},{"line_number":1409,"context_line":" :param do_fetch_backend: a callable object to be called to fetch data from"},{"line_number":1410,"context_line":" the backend; it needs to return a tuple of (data, response)."},{"line_number":1411,"context_line":" :param cache_encoder: a callable object to be called to convert the data"},{"line_number":1412,"context_line":" retrieved from the backend to a different format to store in memcache."},{"line_number":1413,"context_line":" :param cache_decoder: a callable object to be called to convert the data"}],"source_content_type":"text/x-python","patch_set":32,"id":"c89fe430_cbceb1df","line":1410,"range":{"start_line":1410,"start_character":58,"end_line":1410,"end_character":66},"in_reply_to":"0acdb1d2_effbaaf1","updated":"2025-02-11 15:05:31.000000000","message":"I agree, it would be helpful later to emit metrics easily. I\u0027ve seen at some places where we do not have data leaked and to add metrics there is a need to go back in code to get metrics somehow.","commit_id":"a2d4b75fb2e361d8ecdadbbc525129602760ec25"},{"author":{"_account_id":35790,"name":"Shreeya Deshpande","email":"shreeyad@nvidia.com","username":"shreeyad"},"change_message_id":"ef6f61ef3e6e4337c5c7932be7b8720883945908","unresolved":true,"context_lines":[{"line_number":1407,"context_line":" :param cache_ttl: time-to-live of the data fetched from backend to set into"},{"line_number":1408,"context_line":" memcached."},{"line_number":1409,"context_line":" :param do_fetch_backend: a callable object to be called to fetch data from"},{"line_number":1410,"context_line":" the backend; it needs to return a tuple of (data, response)."},{"line_number":1411,"context_line":" :param cache_encoder: a callable object to be called to convert the data"},{"line_number":1412,"context_line":" retrieved from the backend to a different format to store in memcache."},{"line_number":1413,"context_line":" :param cache_decoder: a callable object to be called to convert the data"}],"source_content_type":"text/x-python","patch_set":32,"id":"47c7b635_193332e2","line":1410,"range":{"start_line":1410,"start_character":58,"end_line":1410,"end_character":66},"in_reply_to":"0acdb1d2_effbaaf1","updated":"2025-02-11 15:05:31.000000000","message":"I agree, we definitely could use the entire response to emit metrics later.\nI found a few places where I don\u0027t get all the details of the request/ response and we need to go back inside the code to get those. It would be great to have this ready for the next time we do \"emit_metrics_from_cache\" or \"emit_metrics_from_backend\" or something :)","commit_id":"a2d4b75fb2e361d8ecdadbbc525129602760ec25"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"df0123ddc6a5118de24bcab42bf97548be03393e","unresolved":true,"context_lines":[{"line_number":1407,"context_line":" :param cache_ttl: time-to-live of the data fetched from backend to set into"},{"line_number":1408,"context_line":" memcached."},{"line_number":1409,"context_line":" :param do_fetch_backend: a callable object to be called to fetch data from"},{"line_number":1410,"context_line":" the backend; it needs to return a tuple of (data, response)."},{"line_number":1411,"context_line":" :param cache_encoder: a callable object to be called to convert the data"},{"line_number":1412,"context_line":" retrieved from the backend to a different format to store in memcache."},{"line_number":1413,"context_line":" :param cache_decoder: a callable object to be called to convert the data"}],"source_content_type":"text/x-python","patch_set":32,"id":"0acdb1d2_effbaaf1","line":1410,"range":{"start_line":1410,"start_character":58,"end_line":1410,"end_character":66},"in_reply_to":"3dcd8750_88b9b572","updated":"2025-02-05 19:43:04.000000000","message":"Right, it\u0027s for metrics - the way this concrete implementation is designed it\u0027s agnostic to what the caller might want to emit in terms of telemetry.\n\n1) I think we might actually find once we have two use-case for cooperative-token that we WANT them to emit consistent telemetry\n2) I think a better encapsulated design that \"leaking\" all the gory details of what this class is doing behind the scenes so the caller can emit telemetry about what\u0027s happening behind this abstraction boundary would be for the callers that need to \"change the default telemetry\" that comes with a cooperative-cache-token would be using subclasses. We could even have nice hooks with simple implementations like \"emit_metrics_from_cache\" or \"emit_metrics_from_backend\" or whatever we need.\n\nBut in truth OOP is as much as a terrible disaster as any other software pattern; any just because \"all abstractions are leaky\" is an unavoidable truth it doesn\u0027t mean we shouldn\u0027t try to make them as watertight as possible.","commit_id":"a2d4b75fb2e361d8ecdadbbc525129602760ec25"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"49dd3537db5741f8d4531ffa1110fcd73b6f2811","unresolved":true,"context_lines":[{"line_number":1407,"context_line":" :param cache_ttl: time-to-live of the data fetched from backend to set into"},{"line_number":1408,"context_line":" memcached."},{"line_number":1409,"context_line":" :param do_fetch_backend: a callable object to be called to fetch data from"},{"line_number":1410,"context_line":" the backend; it needs to return a tuple of (data, response)."},{"line_number":1411,"context_line":" :param cache_encoder: a callable object to be called to convert the data"},{"line_number":1412,"context_line":" retrieved from the backend to a different format to store in memcache."},{"line_number":1413,"context_line":" :param cache_decoder: a callable object to be called to convert the data"}],"source_content_type":"text/x-python","patch_set":32,"id":"3dcd8750_88b9b572","line":1410,"range":{"start_line":1410,"start_character":58,"end_line":1410,"end_character":66},"in_reply_to":"5b99a7bf_f9936fc3","updated":"2024-10-24 01:03:53.000000000","message":"so far, only ``status_int`` within the returned ``response`` is used to emit metrics, we could just return the ``status_int``, I used the whole ``response`` just in case we may need something else in future.","commit_id":"a2d4b75fb2e361d8ecdadbbc525129602760ec25"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"9b5f9df7ef8e676e7130812d1a8131291f80b2a1","unresolved":false,"context_lines":[{"line_number":1407,"context_line":" :param cache_ttl: time-to-live of the data fetched from backend to set into"},{"line_number":1408,"context_line":" memcached."},{"line_number":1409,"context_line":" :param do_fetch_backend: a callable object to be called to fetch data from"},{"line_number":1410,"context_line":" the backend; it needs to return a tuple of (data, response)."},{"line_number":1411,"context_line":" :param cache_encoder: a callable object to be called to convert the data"},{"line_number":1412,"context_line":" retrieved from the backend to a different format to store in memcache."},{"line_number":1413,"context_line":" :param cache_decoder: a callable object to be called to convert the data"}],"source_content_type":"text/x-python","patch_set":32,"id":"0be6d211_1a28a4ac","line":1410,"range":{"start_line":1410,"start_character":58,"end_line":1410,"end_character":66},"in_reply_to":"6730d585_b0d21cc2","updated":"2025-05-05 21:32:09.000000000","message":"Done","commit_id":"a2d4b75fb2e361d8ecdadbbc525129602760ec25"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"8bdc21d35e3e48c56a5ea5fa92dcad13ecb4881f","unresolved":true,"context_lines":[{"line_number":1407,"context_line":" :param cache_ttl: time-to-live of the data fetched from backend to set into"},{"line_number":1408,"context_line":" memcached."},{"line_number":1409,"context_line":" :param do_fetch_backend: a callable object to be called to fetch data from"},{"line_number":1410,"context_line":" the backend; it needs to return a tuple of (data, response)."},{"line_number":1411,"context_line":" :param cache_encoder: a callable object to be called to convert the data"},{"line_number":1412,"context_line":" retrieved from the backend to a different format to store in memcache."},{"line_number":1413,"context_line":" :param cache_decoder: a callable object to be called to convert the data"}],"source_content_type":"text/x-python","patch_set":32,"id":"6730d585_b0d21cc2","line":1410,"range":{"start_line":1410,"start_character":58,"end_line":1410,"end_character":66},"in_reply_to":"c89fe430_cbceb1df","updated":"2025-03-04 20:13:36.000000000","message":"okay, I implemented the sub-class design to be more OOP. \n\nFor the returned ``backend response``, we will convert it to just ``status_int``. But it requires refactoring of the API signature of existing ``record_cache_op_metrics`` and its usages, I left a ``TODO`` in the code, will do it with a follow-up.","commit_id":"a2d4b75fb2e361d8ecdadbbc525129602760ec25"},{"author":{"_account_id":7233,"name":"Matthew Oliver","email":"matt@oliver.net.au","username":"mattoliverau"},"change_message_id":"7b315613f27a881219393fdb50c21e051627af79","unresolved":true,"context_lines":[{"line_number":1408,"context_line":" memcached."},{"line_number":1409,"context_line":" :param do_fetch_backend: a callable object to be called to fetch data from"},{"line_number":1410,"context_line":" the backend; it needs to return a tuple of (data, response)."},{"line_number":1411,"context_line":" :param cache_encoder: a callable object to be called to convert the data"},{"line_number":1412,"context_line":" retrieved from the backend to a different format to store in memcache."},{"line_number":1413,"context_line":" :param cache_decoder: a callable object to be called to convert the data"},{"line_number":1414,"context_line":" retrieved from memcache to the same format as returned from backend."}],"source_content_type":"text/x-python","patch_set":32,"id":"2be7b2a7_fd41eb2b","line":1411,"range":{"start_line":1411,"start_character":11,"end_line":1411,"end_character":24},"updated":"2024-07-08 07:46:50.000000000","message":"Should we mention that this and cache_decoder is optional? I guess that doesn\u0027t matter. we can see they default to None.","commit_id":"a2d4b75fb2e361d8ecdadbbc525129602760ec25"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f3064c1509afef499bf100fa5ef7b516368dcf6c","unresolved":false,"context_lines":[{"line_number":1408,"context_line":" memcached."},{"line_number":1409,"context_line":" :param do_fetch_backend: a callable object to be called to fetch data from"},{"line_number":1410,"context_line":" the backend; it needs to return a tuple of (data, response)."},{"line_number":1411,"context_line":" :param cache_encoder: a callable object to be called to convert the data"},{"line_number":1412,"context_line":" retrieved from the backend to a different format to store in memcache."},{"line_number":1413,"context_line":" :param cache_decoder: a callable object to be called to convert the data"},{"line_number":1414,"context_line":" retrieved from memcache to the same format as returned from backend."}],"source_content_type":"text/x-python","patch_set":32,"id":"21d49ebf_6f331287","line":1411,"range":{"start_line":1411,"start_character":11,"end_line":1411,"end_character":24},"in_reply_to":"2be7b2a7_fd41eb2b","updated":"2024-07-09 14:28:54.000000000","message":"Done","commit_id":"a2d4b75fb2e361d8ecdadbbc525129602760ec25"},{"author":{"_account_id":7233,"name":"Matthew Oliver","email":"matt@oliver.net.au","username":"mattoliverau"},"change_message_id":"7b315613f27a881219393fdb50c21e051627af79","unresolved":true,"context_lines":[{"line_number":1412,"context_line":" retrieved from the backend to a different format to store in memcache."},{"line_number":1413,"context_line":" :param cache_decoder: a callable object to be called to convert the data"},{"line_number":1414,"context_line":" retrieved from memcache to the same format as returned from backend."},{"line_number":1415,"context_line":" :param retry_interval: the basic interval to retry getting data from cache"},{"line_number":1416,"context_line":" when waiting for other requests to populate the cache, suggest to be"},{"line_number":1417,"context_line":" set as the average time spent on ``do_fetch_backend``."},{"line_number":1418,"context_line":" :param num_tokens: the minimum limit of tokens per each usage sesssion,"}],"source_content_type":"text/x-python","patch_set":32,"id":"16498357_84e3b254","line":1415,"range":{"start_line":1415,"start_character":4,"end_line":1415,"end_character":26},"updated":"2024-07-08 07:46:50.000000000","message":"This should be moved up to before cache_encoder (line 1411). To maintain the call order of the function.","commit_id":"a2d4b75fb2e361d8ecdadbbc525129602760ec25"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f3064c1509afef499bf100fa5ef7b516368dcf6c","unresolved":false,"context_lines":[{"line_number":1412,"context_line":" retrieved from the backend to a different format to store in memcache."},{"line_number":1413,"context_line":" :param cache_decoder: a callable object to be called to convert the data"},{"line_number":1414,"context_line":" retrieved from memcache to the same format as returned from backend."},{"line_number":1415,"context_line":" :param retry_interval: the basic interval to retry getting data from cache"},{"line_number":1416,"context_line":" when waiting for other requests to populate the cache, suggest to be"},{"line_number":1417,"context_line":" set as the average time spent on ``do_fetch_backend``."},{"line_number":1418,"context_line":" :param num_tokens: the minimum limit of tokens per each usage sesssion,"}],"source_content_type":"text/x-python","patch_set":32,"id":"9613bf18_77827289","line":1415,"range":{"start_line":1415,"start_character":4,"end_line":1415,"end_character":26},"in_reply_to":"16498357_84e3b254","updated":"2024-07-09 14:28:54.000000000","message":"Done","commit_id":"a2d4b75fb2e361d8ecdadbbc525129602760ec25"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"1531ae79327f5615c743507667d90fbd16588f07","unresolved":true,"context_lines":[{"line_number":1520,"context_line":" \"\"\""},{"line_number":1521,"context_line":" data \u003d None"},{"line_number":1522,"context_line":" if not self._memcache:"},{"line_number":1523,"context_line":" data, self.backend_response \u003d self._do_fetch_backend()"},{"line_number":1524,"context_line":" return data"},{"line_number":1525,"context_line":""},{"line_number":1526,"context_line":" # Try to get a cooperative token by using memcache increments."}],"source_content_type":"text/x-python","patch_set":36,"id":"eab9fa1b_f513caf6","line":1523,"updated":"2024-10-17 22:02:34.000000000","message":"We don\u0027t want to check `_infocache` at all? Nor do we *set it* in this case 😕","commit_id":"67886f908de53e499ae9232b5cbb12f51682a9ba"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"49dd3537db5741f8d4531ffa1110fcd73b6f2811","unresolved":false,"context_lines":[{"line_number":1520,"context_line":" \"\"\""},{"line_number":1521,"context_line":" data \u003d None"},{"line_number":1522,"context_line":" if not self._memcache:"},{"line_number":1523,"context_line":" data, self.backend_response \u003d self._do_fetch_backend()"},{"line_number":1524,"context_line":" return data"},{"line_number":1525,"context_line":""},{"line_number":1526,"context_line":" # Try to get a cooperative token by using memcache increments."}],"source_content_type":"text/x-python","patch_set":36,"id":"680af3d1_84b845a5","line":1523,"in_reply_to":"eab9fa1b_f513caf6","updated":"2024-10-24 01:03:53.000000000","message":"nice catch, thanks!","commit_id":"67886f908de53e499ae9232b5cbb12f51682a9ba"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"1531ae79327f5615c743507667d90fbd16588f07","unresolved":true,"context_lines":[{"line_number":1527,"context_line":" total_requests \u003d 0"},{"line_number":1528,"context_line":" try:"},{"line_number":1529,"context_line":" total_requests \u003d self._memcache.incr("},{"line_number":1530,"context_line":" self._token_key, delta\u003d1, time\u003dself._token_ttl)"},{"line_number":1531,"context_line":" except swift.common.exceptions.MemcacheConnectionError:"},{"line_number":1532,"context_line":" self.set_cache_state \u003d \u0027inc_error\u0027"},{"line_number":1533,"context_line":""}],"source_content_type":"text/x-python","patch_set":36,"id":"d6953bdd_78e4b733","line":1530,"range":{"start_line":1530,"start_character":33,"end_line":1530,"end_character":41},"updated":"2024-10-17 22:02:34.000000000","message":"nit: Pretty sure this is the default; we could just leave it off.","commit_id":"67886f908de53e499ae9232b5cbb12f51682a9ba"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"49dd3537db5741f8d4531ffa1110fcd73b6f2811","unresolved":false,"context_lines":[{"line_number":1527,"context_line":" total_requests \u003d 0"},{"line_number":1528,"context_line":" try:"},{"line_number":1529,"context_line":" total_requests \u003d self._memcache.incr("},{"line_number":1530,"context_line":" self._token_key, delta\u003d1, time\u003dself._token_ttl)"},{"line_number":1531,"context_line":" except swift.common.exceptions.MemcacheConnectionError:"},{"line_number":1532,"context_line":" self.set_cache_state \u003d \u0027inc_error\u0027"},{"line_number":1533,"context_line":""}],"source_content_type":"text/x-python","patch_set":36,"id":"762de318_be1adb18","line":1530,"range":{"start_line":1530,"start_character":33,"end_line":1530,"end_character":41},"in_reply_to":"d6953bdd_78e4b733","updated":"2024-10-24 01:03:53.000000000","message":"Acknowledged","commit_id":"67886f908de53e499ae9232b5cbb12f51682a9ba"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"1531ae79327f5615c743507667d90fbd16588f07","unresolved":true,"context_lines":[{"line_number":1533,"context_line":""},{"line_number":1534,"context_line":" if not total_requests:"},{"line_number":1535,"context_line":" # Couldn\u0027t connect to the memcache to increment the token key"},{"line_number":1536,"context_line":" data \u003d self.query_backend_and_set_cache()"},{"line_number":1537,"context_line":" elif total_requests \u003c\u003d self._num_tokens:"},{"line_number":1538,"context_line":" # Acquired a cooperative token, go fetching data from backend and"},{"line_number":1539,"context_line":" # set the data in memcache."}],"source_content_type":"text/x-python","patch_set":36,"id":"67ad9cb1_012aa0f8","line":1536,"updated":"2024-10-17 22:02:34.000000000","message":"I wonder if we might prefer to fail closed here -- if we\u0027re so overloaded that even memcache is having trouble, do we really want to be making more backend requests?","commit_id":"67886f908de53e499ae9232b5cbb12f51682a9ba"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"49dd3537db5741f8d4531ffa1110fcd73b6f2811","unresolved":true,"context_lines":[{"line_number":1533,"context_line":""},{"line_number":1534,"context_line":" if not total_requests:"},{"line_number":1535,"context_line":" # Couldn\u0027t connect to the memcache to increment the token key"},{"line_number":1536,"context_line":" data \u003d self.query_backend_and_set_cache()"},{"line_number":1537,"context_line":" elif total_requests \u003c\u003d self._num_tokens:"},{"line_number":1538,"context_line":" # Acquired a cooperative token, go fetching data from backend and"},{"line_number":1539,"context_line":" # set the data in memcache."}],"source_content_type":"text/x-python","patch_set":36,"id":"b54489fa_b6e4c8d2","line":1536,"in_reply_to":"67ad9cb1_012aa0f8","updated":"2024-10-24 01:03:53.000000000","message":"it could be connection issues which cause the proxy couldn\u0027t access memcache, maybe backend is still okay, so backend requests still have chance to go through?","commit_id":"67886f908de53e499ae9232b5cbb12f51682a9ba"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"df0123ddc6a5118de24bcab42bf97548be03393e","unresolved":true,"context_lines":[{"line_number":1533,"context_line":""},{"line_number":1534,"context_line":" if not total_requests:"},{"line_number":1535,"context_line":" # Couldn\u0027t connect to the memcache to increment the token key"},{"line_number":1536,"context_line":" data \u003d self.query_backend_and_set_cache()"},{"line_number":1537,"context_line":" elif total_requests \u003c\u003d self._num_tokens:"},{"line_number":1538,"context_line":" # Acquired a cooperative token, go fetching data from backend and"},{"line_number":1539,"context_line":" # set the data in memcache."}],"source_content_type":"text/x-python","patch_set":36,"id":"f4f136dc_014ab795","line":1536,"in_reply_to":"b54489fa_b6e4c8d2","updated":"2025-02-05 19:43:04.000000000","message":"yeah I\u0027m not sure about fail open or fail closed - it seems like either might be reasonable and knowing which one is more robust in practice probably requires operational experience.\n\nIf anything we could argue that \"for the namespace-cache objects we\u0027ve seen open failure lead to backend storms we don\u0027t want\". But IIUC we\u0027re more likely to have memcache die than overloaded backend namespace-get containers these days - so maybe it turned out open failure is more robust in practice?\n\nFor the generic implementation you could also probably consider if a future concrete implementation discovered the opposite case how would it change this code code to have the opposite behavior without breaking namespace caching?\n\nThere again, I think sublcasses could more easily grow a \"if self._fail_open\" option that defaults to `True` for backwards compat.","commit_id":"67886f908de53e499ae9232b5cbb12f51682a9ba"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"8bdc21d35e3e48c56a5ea5fa92dcad13ecb4881f","unresolved":false,"context_lines":[{"line_number":1533,"context_line":""},{"line_number":1534,"context_line":" if not total_requests:"},{"line_number":1535,"context_line":" # Couldn\u0027t connect to the memcache to increment the token key"},{"line_number":1536,"context_line":" data \u003d self.query_backend_and_set_cache()"},{"line_number":1537,"context_line":" elif total_requests \u003c\u003d self._num_tokens:"},{"line_number":1538,"context_line":" # Acquired a cooperative token, go fetching data from backend and"},{"line_number":1539,"context_line":" # set the data in memcache."}],"source_content_type":"text/x-python","patch_set":36,"id":"bd224a6b_c9266c8f","line":1536,"in_reply_to":"f4f136dc_014ab795","updated":"2025-03-04 20:13:36.000000000","message":"from recent operational experience and offline discussions with @clay.gerrard@gmail.com, ``fail closed`` is the way to go, since we\u0027re more likely to overload the backend namespace-get containers than to overload memcache. It\u0027ll be an improvement to better cope with some cases, I will implement it as a follow-up patch.","commit_id":"67886f908de53e499ae9232b5cbb12f51682a9ba"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"1531ae79327f5615c743507667d90fbd16588f07","unresolved":true,"context_lines":[{"line_number":1539,"context_line":" # set the data in memcache."},{"line_number":1540,"context_line":" self.token_acquired \u003d True"},{"line_number":1541,"context_line":" data \u003d self.query_backend_and_set_cache()"},{"line_number":1542,"context_line":" if self.set_cache_state \u003d\u003d \u0027set\u0027:"},{"line_number":1543,"context_line":" # Since the successful finish of one whole cooperative token"},{"line_number":1544,"context_line":" # session only depends on a single successful request. So when"},{"line_number":1545,"context_line":" # any request with a token finishes both backend fetching and"}],"source_content_type":"text/x-python","patch_set":36,"id":"cface748_47a429d3","line":1542,"updated":"2024-10-17 22:02:34.000000000","message":"If this is ``set_error`` or `None`, do we ever release the token? I don\u0027t see any `decr`s... I guess we just wait for the token key to time out?","commit_id":"67886f908de53e499ae9232b5cbb12f51682a9ba"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"49dd3537db5741f8d4531ffa1110fcd73b6f2811","unresolved":false,"context_lines":[{"line_number":1539,"context_line":" # set the data in memcache."},{"line_number":1540,"context_line":" self.token_acquired \u003d True"},{"line_number":1541,"context_line":" data \u003d self.query_backend_and_set_cache()"},{"line_number":1542,"context_line":" if self.set_cache_state \u003d\u003d \u0027set\u0027:"},{"line_number":1543,"context_line":" # Since the successful finish of one whole cooperative token"},{"line_number":1544,"context_line":" # session only depends on a single successful request. So when"},{"line_number":1545,"context_line":" # any request with a token finishes both backend fetching and"}],"source_content_type":"text/x-python","patch_set":36,"id":"e98416a4_9fc39093","line":1542,"in_reply_to":"cface748_47a429d3","updated":"2024-10-24 01:03:53.000000000","message":"yes, the token key will be released only if any token request finishes both backend fetching and memcache set successful, then it can remove the token for this token session. if this is ``set_error`` or ``None``, yeah, we will just wait for the token to time out.\n\n```\n # Time-to-live of the cooperative token when set in memcached, default\n # to be 10 times of the average time spent on ``do_fetch_backend``.\n self._token_ttl \u003d retry_interval * 10\n```","commit_id":"67886f908de53e499ae9232b5cbb12f51682a9ba"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"df0123ddc6a5118de24bcab42bf97548be03393e","unresolved":true,"context_lines":[{"line_number":1528,"context_line":" the backend; it needs to return a tuple of (data, response)."},{"line_number":1529,"context_line":" :param retry_interval: the basic interval to retry getting data from cache"},{"line_number":1530,"context_line":" when waiting for other requests to populate the cache, suggest to be"},{"line_number":1531,"context_line":" set as the average time spent on ``do_fetch_backend``."},{"line_number":1532,"context_line":" :param cache_encoder: an optional callable object to convert the data"},{"line_number":1533,"context_line":" retrieved from the backend to a different format to store in memcache."},{"line_number":1534,"context_line":" :param cache_decoder: an optional callable object to convert the data"}],"source_content_type":"text/x-python","patch_set":38,"id":"2a1d31ef_0695cf67","line":1531,"updated":"2025-02-05 19:43:04.000000000","message":"In the next patch when we add shard-range-cache-object support using this object there\u0027s two configuration options:\n\nnamespace_cache_token_retry_interval \nnamespace_cache_tokens_per_session\n\nit\u0027s not exactly obvious from the names which of these kwargs map to which config names.\n\nI think should we ever end up re-using this class in another context (which I hope we do!) it might be nice if various servers/daemons could use a single namespace_cache_* prefix w/ config_read_prefixed_options to store a `_cooperative_token_kwargs` dict that they pass in.\n\nhttps://github.com/NVIDIA/swift/blob/master/swift/common/utils/config.py#L160\n\nI guess honestly what you\u0027d want is\n\n```\nself.namespace_cache_populator \u003d ShardRangeCooperativeCacheFactory(\n **config_read_prefixed_options(\u0027namespace_cache_\u0027, config))\n```\n\nthat would blow up during `__init__` and then later you:\n\n```\ncache_populator_resp \u003d self.namespace_cache_populator.fetch(req)\n```\n\nand then that `cache_populator_resp` has a `result` attribute (which is derived from `resp_from_cache` or `resp_from_backend` attributes) as well as any additional encpsulated state we need for metrics.","commit_id":"4249d3819d24c6e4ba0c440301a6ba935846f0a1"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"8bdc21d35e3e48c56a5ea5fa92dcad13ecb4881f","unresolved":false,"context_lines":[{"line_number":1528,"context_line":" the backend; it needs to return a tuple of (data, response)."},{"line_number":1529,"context_line":" :param retry_interval: the basic interval to retry getting data from cache"},{"line_number":1530,"context_line":" when waiting for other requests to populate the cache, suggest to be"},{"line_number":1531,"context_line":" set as the average time spent on ``do_fetch_backend``."},{"line_number":1532,"context_line":" :param cache_encoder: an optional callable object to convert the data"},{"line_number":1533,"context_line":" retrieved from the backend to a different format to store in memcache."},{"line_number":1534,"context_line":" :param cache_decoder: an optional callable object to convert the data"}],"source_content_type":"text/x-python","patch_set":38,"id":"b8994fd3_4f7bf7b2","line":1531,"in_reply_to":"2a1d31ef_0695cf67","updated":"2025-03-04 20:13:36.000000000","message":"I am refactoring the follow-up proxy patch (https://review.opendev.org/c/openstack/swift/+/908969) to use a new subclass based inherited from the base class ``CooperativeCachePopulator``. The prefix \"namespace_cache_*\" of new configs are only for shard range namespace cache which proxy server stores in memcache, other future usages probably would need a total new prefix. Also, one server (e.g. proxy server) could have two or multiple usages of ``CooperativeCachePopulator``, so we do need to use different prefix/name for them.","commit_id":"4249d3819d24c6e4ba0c440301a6ba935846f0a1"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"df0123ddc6a5118de24bcab42bf97548be03393e","unresolved":true,"context_lines":[{"line_number":1555,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":1556,"context_line":" self._do_fetch_backend \u003d do_fetch_backend"},{"line_number":1557,"context_line":" self._cache_encoder \u003d cache_encoder if cache_encoder else (lambda x: x)"},{"line_number":1558,"context_line":" self._cache_decoder \u003d cache_decoder if cache_decoder else (lambda x: x)"},{"line_number":1559,"context_line":" # The status of cache set operations used internally."},{"line_number":1560,"context_line":" self.set_cache_state \u003d None"},{"line_number":1561,"context_line":" # Indicates if this request has acquired one token."}],"source_content_type":"text/x-python","patch_set":38,"id":"30e6450a_922dae9e","line":1558,"updated":"2025-02-05 19:43:04.000000000","message":"coming back to this after some time I\u0027m imaging that this would look better as \"normal\" OOO based subclass - the \"do_fetch_backend\" might be the only method that couldn\u0027t have an default implementation.\n\nI can understand the appeal of the single concrete class with dependency injection as well tho - so I\u0027m not sure I can make a strong argument that it \"should\" change as long as there\u0027s only one concrete implementation/use-case for shard-range-cache-objects.","commit_id":"4249d3819d24c6e4ba0c440301a6ba935846f0a1"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"8bdc21d35e3e48c56a5ea5fa92dcad13ecb4881f","unresolved":false,"context_lines":[{"line_number":1555,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":1556,"context_line":" self._do_fetch_backend \u003d do_fetch_backend"},{"line_number":1557,"context_line":" self._cache_encoder \u003d cache_encoder if cache_encoder else (lambda x: x)"},{"line_number":1558,"context_line":" self._cache_decoder \u003d cache_decoder if cache_decoder else (lambda x: x)"},{"line_number":1559,"context_line":" # The status of cache set operations used internally."},{"line_number":1560,"context_line":" self.set_cache_state \u003d None"},{"line_number":1561,"context_line":" # Indicates if this request has acquired one token."}],"source_content_type":"text/x-python","patch_set":38,"id":"59e39da9_e3fc51e2","line":1558,"in_reply_to":"30e6450a_922dae9e","updated":"2025-03-04 20:13:36.000000000","message":"Done","commit_id":"4249d3819d24c6e4ba0c440301a6ba935846f0a1"},{"author":{"_account_id":35790,"name":"Shreeya Deshpande","email":"shreeyad@nvidia.com","username":"shreeyad"},"change_message_id":"ef6f61ef3e6e4337c5c7932be7b8720883945908","unresolved":true,"context_lines":[{"line_number":1625,"context_line":" # cache miss, retry again with exponential backoff"},{"line_number":1626,"context_line":" retry_interval *\u003d 2"},{"line_number":1627,"context_line":" cur_time \u003d time.time()"},{"line_number":1628,"context_line":" continue"},{"line_number":1629,"context_line":" return None"},{"line_number":1630,"context_line":""},{"line_number":1631,"context_line":" def fetch_data(self):"}],"source_content_type":"text/x-python","patch_set":38,"id":"2918e609_cfffa232","line":1628,"range":{"start_line":1628,"start_character":12,"end_line":1628,"end_character":20},"updated":"2025-02-11 15:05:31.000000000","message":"Do we need a continue keyword here? I see that all tests do pass without it. Additionally if I put something after the keyword continue, it never executes (rightly because it continues to the next loop).","commit_id":"4249d3819d24c6e4ba0c440301a6ba935846f0a1"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"0806682f5503c59f52832926e105687e4337a8b4","unresolved":false,"context_lines":[{"line_number":1625,"context_line":" # cache miss, retry again with exponential backoff"},{"line_number":1626,"context_line":" retry_interval *\u003d 2"},{"line_number":1627,"context_line":" cur_time \u003d time.time()"},{"line_number":1628,"context_line":" continue"},{"line_number":1629,"context_line":" return None"},{"line_number":1630,"context_line":""},{"line_number":1631,"context_line":" def fetch_data(self):"}],"source_content_type":"text/x-python","patch_set":38,"id":"2557a604_643c7b0d","line":1628,"range":{"start_line":1628,"start_character":12,"end_line":1628,"end_character":20},"in_reply_to":"2918e609_cfffa232","updated":"2025-03-04 21:39:45.000000000","message":"Done","commit_id":"4249d3819d24c6e4ba0c440301a6ba935846f0a1"},{"author":{"_account_id":35790,"name":"Shreeya Deshpande","email":"shreeyad@nvidia.com","username":"shreeyad"},"change_message_id":"ef6f61ef3e6e4337c5c7932be7b8720883945908","unresolved":true,"context_lines":[{"line_number":1636,"context_line":" :returns: value of the data fetched from backend or memcache; None if"},{"line_number":1637,"context_line":" not exist."},{"line_number":1638,"context_line":" \"\"\""},{"line_number":1639,"context_line":" data \u003d None"},{"line_number":1640,"context_line":" if not self._memcache:"},{"line_number":1641,"context_line":" data, self.backend_response \u003d self._do_fetch_backend()"},{"line_number":1642,"context_line":" if self._infocache is not None and data:"}],"source_content_type":"text/x-python","patch_set":38,"id":"d2697d6c_469d4043","line":1639,"range":{"start_line":1639,"start_character":8,"end_line":1639,"end_character":19},"updated":"2025-02-11 15:05:31.000000000","message":"Commenting this out passes all the tests. We don\u0027t have a test where we don\u0027t get data and it has to take this line\u0027s data\u003dNone! Or do we always get data no matter what?","commit_id":"4249d3819d24c6e4ba0c440301a6ba935846f0a1"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"0806682f5503c59f52832926e105687e4337a8b4","unresolved":false,"context_lines":[{"line_number":1636,"context_line":" :returns: value of the data fetched from backend or memcache; None if"},{"line_number":1637,"context_line":" not exist."},{"line_number":1638,"context_line":" \"\"\""},{"line_number":1639,"context_line":" data \u003d None"},{"line_number":1640,"context_line":" if not self._memcache:"},{"line_number":1641,"context_line":" data, self.backend_response \u003d self._do_fetch_backend()"},{"line_number":1642,"context_line":" if self._infocache is not None and data:"}],"source_content_type":"text/x-python","patch_set":38,"id":"2d44628c_5187e8a0","line":1639,"range":{"start_line":1639,"start_character":8,"end_line":1639,"end_character":19},"in_reply_to":"d2697d6c_469d4043","updated":"2025-03-04 21:39:45.000000000","message":"thanks for pointing it out, I got this line removed.\n\nWe do have test case that we don\u0027t get data from backend, see ``test_concurrent_requests_all_token_requests_fail``.","commit_id":"4249d3819d24c6e4ba0c440301a6ba935846f0a1"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"fd187de69854f2137151bab18c2f131b23d38789","unresolved":true,"context_lines":[{"line_number":1514,"context_line":" without a token should wait for cache filling to finish, instead of all"},{"line_number":1515,"context_line":" querying the backend servers at the same time. After those requests with"},{"line_number":1516,"context_line":" token are done, they will release the token by deleting the internal cache"},{"line_number":1517,"context_line":" key and finish this usage session."},{"line_number":1518,"context_line":""},{"line_number":1519,"context_line":" Cooperative cache populator uses ``num_tokens`` to define the minimum limit"},{"line_number":1520,"context_line":" of tokens during one usage session, default to be 3. This is used to"}],"source_content_type":"text/x-python","patch_set":43,"id":"4887b920_25614e71","line":1517,"range":{"start_line":1517,"start_character":30,"end_line":1517,"end_character":37},"updated":"2025-04-30 20:19:47.000000000","message":"What do we mean by \"session\" Is this something local to a single request, or some time period shared by all requests?","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1e31abe36b5c77333acfe388107d315e43177622","unresolved":false,"context_lines":[{"line_number":1514,"context_line":" without a token should wait for cache filling to finish, instead of all"},{"line_number":1515,"context_line":" querying the backend servers at the same time. After those requests with"},{"line_number":1516,"context_line":" token are done, they will release the token by deleting the internal cache"},{"line_number":1517,"context_line":" key and finish this usage session."},{"line_number":1518,"context_line":""},{"line_number":1519,"context_line":" Cooperative cache populator uses ``num_tokens`` to define the minimum limit"},{"line_number":1520,"context_line":" of tokens during one usage session, default to be 3. This is used to"}],"source_content_type":"text/x-python","patch_set":43,"id":"86ca73a2_b6c24125","line":1517,"range":{"start_line":1517,"start_character":30,"end_line":1517,"end_character":37},"in_reply_to":"4887b920_25614e71","updated":"2025-05-01 21:34:53.000000000","message":"I added more words to describe it.","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"fd187de69854f2137151bab18c2f131b23d38789","unresolved":true,"context_lines":[{"line_number":1516,"context_line":" token are done, they will release the token by deleting the internal cache"},{"line_number":1517,"context_line":" key and finish this usage session."},{"line_number":1518,"context_line":""},{"line_number":1519,"context_line":" Cooperative cache populator uses ``num_tokens`` to define the minimum limit"},{"line_number":1520,"context_line":" of tokens during one usage session, default to be 3. This is used to"},{"line_number":1521,"context_line":" increase fault tolerance in the distributed environment, when one request"},{"line_number":1522,"context_line":" with token hangs or exits, any other requests with token still can set new"}],"source_content_type":"text/x-python","patch_set":43,"id":"e4ce4b76_b7ec1e37","line":1519,"range":{"start_line":1519,"start_character":66,"end_line":1519,"end_character":79},"updated":"2025-04-30 20:19:47.000000000","message":"Should this be \"maximum number\"?","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1e31abe36b5c77333acfe388107d315e43177622","unresolved":false,"context_lines":[{"line_number":1516,"context_line":" token are done, they will release the token by deleting the internal cache"},{"line_number":1517,"context_line":" key and finish this usage session."},{"line_number":1518,"context_line":""},{"line_number":1519,"context_line":" Cooperative cache populator uses ``num_tokens`` to define the minimum limit"},{"line_number":1520,"context_line":" of tokens during one usage session, default to be 3. This is used to"},{"line_number":1521,"context_line":" increase fault tolerance in the distributed environment, when one request"},{"line_number":1522,"context_line":" with token hangs or exits, any other requests with token still can set new"}],"source_content_type":"text/x-python","patch_set":43,"id":"08d1f54a_baceedc1","line":1519,"range":{"start_line":1519,"start_character":66,"end_line":1519,"end_character":79},"in_reply_to":"e4ce4b76_b7ec1e37","updated":"2025-05-01 21:34:53.000000000","message":"Acknowledged","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"8cb0bedc0a1b6930e475200ca55c5b594a829656","unresolved":true,"context_lines":[{"line_number":1566,"context_line":" self._token_ttl \u003d retry_interval * 10"},{"line_number":1567,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":1568,"context_line":" # Indicates if this request is served out of Memcached."},{"line_number":1569,"context_line":" self._req_served_from_cache \u003d False"},{"line_number":1570,"context_line":" # The status of cache operation which sets backend data into Memcached."},{"line_number":1571,"context_line":" self.set_cache_state \u003d None"},{"line_number":1572,"context_line":" # Indicates if this request has acquired one token."}],"source_content_type":"text/x-python","patch_set":43,"id":"00d5653b_7f0b3735","line":1569,"updated":"2025-04-29 22:33:26.000000000","message":"I feel like this could be a local in `fetch_data` if we base it off whether the result of `_sleep_and_retry_memcache` is `None` or not.","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1e31abe36b5c77333acfe388107d315e43177622","unresolved":false,"context_lines":[{"line_number":1566,"context_line":" self._token_ttl \u003d retry_interval * 10"},{"line_number":1567,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":1568,"context_line":" # Indicates if this request is served out of Memcached."},{"line_number":1569,"context_line":" self._req_served_from_cache \u003d False"},{"line_number":1570,"context_line":" # The status of cache operation which sets backend data into Memcached."},{"line_number":1571,"context_line":" self.set_cache_state \u003d None"},{"line_number":1572,"context_line":" # Indicates if this request has acquired one token."}],"source_content_type":"text/x-python","patch_set":43,"id":"f7f4982a_3a280926","line":1569,"in_reply_to":"00d5653b_7f0b3735","updated":"2025-05-01 21:34:53.000000000","message":"Yes! actually its also better the be local to the ``fetch_data`` function, since the below code in the ``_sleep_and_retry_memcache`` could also return None even ``cache_data`` is not None.\n\n```\n if cache_data:\n # cache hit.\n decoded_data \u003d self.cache_decoder(cache_data)\n return decoded_data\n```","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"8cb0bedc0a1b6930e475200ca55c5b594a829656","unresolved":true,"context_lines":[{"line_number":1580,"context_line":""},{"line_number":1581,"context_line":" :returns: a tuple of (data, response)."},{"line_number":1582,"context_line":" \"\"\""},{"line_number":1583,"context_line":" pass"},{"line_number":1584,"context_line":""},{"line_number":1585,"context_line":" def cache_encoder(self, data):"},{"line_number":1586,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":43,"id":"88af1137_d50c053b","line":1583,"updated":"2025-04-29 22:33:26.000000000","message":"Not `raise NotImplementedError`?","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1e31abe36b5c77333acfe388107d315e43177622","unresolved":false,"context_lines":[{"line_number":1580,"context_line":""},{"line_number":1581,"context_line":" :returns: a tuple of (data, response)."},{"line_number":1582,"context_line":" \"\"\""},{"line_number":1583,"context_line":" pass"},{"line_number":1584,"context_line":""},{"line_number":1585,"context_line":" def cache_encoder(self, data):"},{"line_number":1586,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":43,"id":"b1373faf_4a78129c","line":1583,"in_reply_to":"88af1137_d50c053b","updated":"2025-05-01 21:34:53.000000000","message":"Acknowledged","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"8cb0bedc0a1b6930e475200ca55c5b594a829656","unresolved":true,"context_lines":[{"line_number":1589,"context_line":""},{"line_number":1590,"context_line":" :returns: encoded data."},{"line_number":1591,"context_line":" \"\"\""},{"line_number":1592,"context_line":" return data"},{"line_number":1593,"context_line":""},{"line_number":1594,"context_line":" def cache_decoder(self, data):"},{"line_number":1595,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":43,"id":"ef14caf5_ce10b8af","line":1592,"updated":"2025-04-29 22:33:26.000000000","message":"This seems like it _might_ be a reasonable default implementation, but then we should probably clarify the docstring.","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1e31abe36b5c77333acfe388107d315e43177622","unresolved":false,"context_lines":[{"line_number":1589,"context_line":""},{"line_number":1590,"context_line":" :returns: encoded data."},{"line_number":1591,"context_line":" \"\"\""},{"line_number":1592,"context_line":" return data"},{"line_number":1593,"context_line":""},{"line_number":1594,"context_line":" def cache_decoder(self, data):"},{"line_number":1595,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":43,"id":"33271ce6_6feb1b00","line":1592,"in_reply_to":"ef14caf5_ce10b8af","updated":"2025-05-01 21:34:53.000000000","message":"Done","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"8cb0bedc0a1b6930e475200ca55c5b594a829656","unresolved":true,"context_lines":[{"line_number":1653,"context_line":" # cache hit."},{"line_number":1654,"context_line":" self._req_served_from_cache \u003d True"},{"line_number":1655,"context_line":" self._logger.increment("},{"line_number":1656,"context_line":" \u0027token.%s.cache_served_reqs\u0027 % self._op_type)"},{"line_number":1657,"context_line":" decoded_data \u003d self.cache_decoder(cache_data)"},{"line_number":1658,"context_line":" return decoded_data"},{"line_number":1659,"context_line":" # cache miss, retry again with exponential backoff"}],"source_content_type":"text/x-python","patch_set":43,"id":"bf18a8bf_de7ca7f4","line":1656,"updated":"2025-04-29 22:33:26.000000000","message":"Might be better to do this in `fetch_data`, where most of the other increments occur -- _especially_ since that\u0027s where all the other increments happen to ensure that\n```\n # Total number of requests equals to \u0027cache_served_reqs\u0027 plus\n # \u0027backend_reqs.with_token\u0027 and \u0027backend_reqs.no_token\u0027.\n```","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1e31abe36b5c77333acfe388107d315e43177622","unresolved":false,"context_lines":[{"line_number":1653,"context_line":" # cache hit."},{"line_number":1654,"context_line":" self._req_served_from_cache \u003d True"},{"line_number":1655,"context_line":" self._logger.increment("},{"line_number":1656,"context_line":" \u0027token.%s.cache_served_reqs\u0027 % self._op_type)"},{"line_number":1657,"context_line":" decoded_data \u003d self.cache_decoder(cache_data)"},{"line_number":1658,"context_line":" return decoded_data"},{"line_number":1659,"context_line":" # cache miss, retry again with exponential backoff"}],"source_content_type":"text/x-python","patch_set":43,"id":"9e70aab5_c9e6fc48","line":1656,"in_reply_to":"bf18a8bf_de7ca7f4","updated":"2025-05-01 21:34:53.000000000","message":"Acknowledged","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"8cb0bedc0a1b6930e475200ca55c5b594a829656","unresolved":true,"context_lines":[{"line_number":1697,"context_line":" # set the data in memcache."},{"line_number":1698,"context_line":" self.token_acquired \u003d True"},{"line_number":1699,"context_line":" data \u003d self._query_backend_and_set_cache()"},{"line_number":1700,"context_line":" if self.backend_resp:"},{"line_number":1701,"context_line":" self._logger.increment("},{"line_number":1702,"context_line":" \u0027token.%s.backend_reqs.with_token.%d\u0027 %"},{"line_number":1703,"context_line":" (self._op_type, self.backend_resp.status_int)"}],"source_content_type":"text/x-python","patch_set":43,"id":"da5bcf66_6ce7be12","line":1700,"updated":"2025-04-29 22:33:26.000000000","message":"When will we _not_ have a response here?","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"a89ce01fc06197ae7d361499c8aefb52f52772c0","unresolved":true,"context_lines":[{"line_number":1697,"context_line":" # set the data in memcache."},{"line_number":1698,"context_line":" self.token_acquired \u003d True"},{"line_number":1699,"context_line":" data \u003d self._query_backend_and_set_cache()"},{"line_number":1700,"context_line":" if self.backend_resp:"},{"line_number":1701,"context_line":" self._logger.increment("},{"line_number":1702,"context_line":" \u0027token.%s.backend_reqs.with_token.%d\u0027 %"},{"line_number":1703,"context_line":" (self._op_type, self.backend_resp.status_int)"}],"source_content_type":"text/x-python","patch_set":43,"id":"2c7a489d_a63f4064","line":1700,"in_reply_to":"15bd7e42_fd7213c6","updated":"2025-05-02 17:56:01.000000000","message":"I guess I\u0027m trying to make sure I understand what counters get incremented in what circumstances -- there\u0027s a lot of branches here, and a lot of tightly (but maybe not obviously) coupled variables! It\u0027s not immediately obvious that the comment about `Total number of requests equals ...` is true.\n\nIt seems like we only increment `token.*.backend_reqs.*` if we have a _response_ -- and it\u0027s not clear that covers all _requests_. What about when there\u0027s a timeout or socket error?","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"9b5f9df7ef8e676e7130812d1a8131291f80b2a1","unresolved":true,"context_lines":[{"line_number":1697,"context_line":" # set the data in memcache."},{"line_number":1698,"context_line":" self.token_acquired \u003d True"},{"line_number":1699,"context_line":" data \u003d self._query_backend_and_set_cache()"},{"line_number":1700,"context_line":" if self.backend_resp:"},{"line_number":1701,"context_line":" self._logger.increment("},{"line_number":1702,"context_line":" \u0027token.%s.backend_reqs.with_token.%d\u0027 %"},{"line_number":1703,"context_line":" (self._op_type, self.backend_resp.status_int)"}],"source_content_type":"text/x-python","patch_set":43,"id":"d55c6ece_e2d336e2","line":1700,"in_reply_to":"2c7a489d_a63f4064","updated":"2025-05-05 21:32:09.000000000","message":"\u003e this will be when the query of backend return None\n\nif we\u0027re trying to handle `data \u003d None` then why are we testing `not self.backend_resp`\n\nI don\u0027t see how we can ever not have a backend_resp short of... I don\u0027t know... getting the response from cache or something? Way down in base controller this is a call into the proxy app itself - if there\u0027s a backend error/timeout we should still generate a 503 resp:\n\n```\n response \u003d self.app.handle_request(subreq)\n```\n\n... maybe a unhandled exception could throw stack to who knows where, but I think that\u0027s out of scope.","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"3b5bb973ba22ccc90551db803b513941f4f5b925","unresolved":false,"context_lines":[{"line_number":1697,"context_line":" # set the data in memcache."},{"line_number":1698,"context_line":" self.token_acquired \u003d True"},{"line_number":1699,"context_line":" data \u003d self._query_backend_and_set_cache()"},{"line_number":1700,"context_line":" if self.backend_resp:"},{"line_number":1701,"context_line":" self._logger.increment("},{"line_number":1702,"context_line":" \u0027token.%s.backend_reqs.with_token.%d\u0027 %"},{"line_number":1703,"context_line":" (self._op_type, self.backend_resp.status_int)"}],"source_content_type":"text/x-python","patch_set":43,"id":"d155b6f9_a957bc3f","line":1700,"in_reply_to":"d55c6ece_e2d336e2","updated":"2025-05-07 18:07:31.000000000","message":"I see, so ``self.backend_resp`` will still be a valid resp with error/timeouts, I got them fixed.","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1e31abe36b5c77333acfe388107d315e43177622","unresolved":true,"context_lines":[{"line_number":1697,"context_line":" # set the data in memcache."},{"line_number":1698,"context_line":" self.token_acquired \u003d True"},{"line_number":1699,"context_line":" data \u003d self._query_backend_and_set_cache()"},{"line_number":1700,"context_line":" if self.backend_resp:"},{"line_number":1701,"context_line":" self._logger.increment("},{"line_number":1702,"context_line":" \u0027token.%s.backend_reqs.with_token.%d\u0027 %"},{"line_number":1703,"context_line":" (self._op_type, self.backend_resp.status_int)"}],"source_content_type":"text/x-python","patch_set":43,"id":"15bd7e42_fd7213c6","line":1700,"in_reply_to":"da5bcf66_6ce7be12","updated":"2025-05-01 21:34:53.000000000","message":"this will be when the query of backend return ``None``, probably during some kind of failure and this proxy-server node can\u0027t connect to any backend servers?\n\n\n```\n data, self.backend_resp \u003d self.do_fetch_backend()\n if not data:\n return None\n```","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"8cb0bedc0a1b6930e475200ca55c5b594a829656","unresolved":true,"context_lines":[{"line_number":1715,"context_line":" else:"},{"line_number":1716,"context_line":" # No token acquired, it means that there are requests in-flight"},{"line_number":1717,"context_line":" # which will fetch data form the backend servers and update them in"},{"line_number":1718,"context_line":" # cache, let\u0027s wait for them to finish with limited retires."},{"line_number":1719,"context_line":" data \u003d self._sleep_and_retry_memcache()"},{"line_number":1720,"context_line":" if not data:"},{"line_number":1721,"context_line":" # Still no cache data fetched."}],"source_content_type":"text/x-python","patch_set":43,"id":"079197d6_bd66fa2d","line":1718,"range":{"start_line":1718,"start_character":64,"end_line":1718,"end_character":71},"updated":"2025-04-29 22:33:26.000000000","message":"\"retries\"","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1e31abe36b5c77333acfe388107d315e43177622","unresolved":false,"context_lines":[{"line_number":1715,"context_line":" else:"},{"line_number":1716,"context_line":" # No token acquired, it means that there are requests in-flight"},{"line_number":1717,"context_line":" # which will fetch data form the backend servers and update them in"},{"line_number":1718,"context_line":" # cache, let\u0027s wait for them to finish with limited retires."},{"line_number":1719,"context_line":" data \u003d self._sleep_and_retry_memcache()"},{"line_number":1720,"context_line":" if not data:"},{"line_number":1721,"context_line":" # Still no cache data fetched."}],"source_content_type":"text/x-python","patch_set":43,"id":"2994e570_85cbd9d9","line":1718,"range":{"start_line":1718,"start_character":64,"end_line":1718,"end_character":71},"in_reply_to":"079197d6_bd66fa2d","updated":"2025-05-01 21:34:53.000000000","message":"Acknowledged","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"8cb0bedc0a1b6930e475200ca55c5b594a829656","unresolved":true,"context_lines":[{"line_number":1731,"context_line":" )"},{"line_number":1732,"context_line":" return data"},{"line_number":1733,"context_line":""},{"line_number":1734,"context_line":" def is_token_request_done(self):"},{"line_number":1735,"context_line":" \"\"\""},{"line_number":1736,"context_line":" Indicates if this request has acquired one token and finished all"},{"line_number":1737,"context_line":" operations, both backend and memcache, successfully."}],"source_content_type":"text/x-python","patch_set":43,"id":"1410ced5_df3fa801","line":1734,"updated":"2025-04-29 22:33:26.000000000","message":"Does anything use this?","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"1e31abe36b5c77333acfe388107d315e43177622","unresolved":false,"context_lines":[{"line_number":1731,"context_line":" )"},{"line_number":1732,"context_line":" return data"},{"line_number":1733,"context_line":""},{"line_number":1734,"context_line":" def is_token_request_done(self):"},{"line_number":1735,"context_line":" \"\"\""},{"line_number":1736,"context_line":" Indicates if this request has acquired one token and finished all"},{"line_number":1737,"context_line":" operations, both backend and memcache, successfully."}],"source_content_type":"text/x-python","patch_set":43,"id":"f5cedb95_3f9118e2","line":1734,"in_reply_to":"1410ced5_df3fa801","updated":"2025-05-01 21:34:53.000000000","message":"good catch! we don\u0027t need it anymore after the refactoring.","commit_id":"13c72a81b1cf362615a4a1cfed7aa24d4848fc9e"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"9b5f9df7ef8e676e7130812d1a8131291f80b2a1","unresolved":true,"context_lines":[{"line_number":1591,"context_line":" self._retry_interval \u003d retry_interval"},{"line_number":1592,"context_line":" # Time-to-live of the cooperative token when set in memcached, default"},{"line_number":1593,"context_line":" # to be 10 times of the average time spent on ``do_fetch_backend``."},{"line_number":1594,"context_line":" self._token_ttl \u003d retry_interval * 10"},{"line_number":1595,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":1596,"context_line":" # The status of cache operation which sets backend data into Memcached."},{"line_number":1597,"context_line":" self.set_cache_state \u003d None"}],"source_content_type":"text/x-python","patch_set":44,"id":"901bdc71_732535ed","line":1594,"updated":"2025-05-05 21:32:09.000000000","message":"we\u0027re relying on this memcache ttl doing a lot of work to free up for the next round of tokens, but only if all the winners fail (otherwise they\u0027ll del)...\n\nI strictly do not like token_ttl being coupled with `retry_interval` - they are orthogonal concerns with different trade-offs.","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"fd079a543129d979551147dd455b511d4d553c6e","unresolved":false,"context_lines":[{"line_number":1591,"context_line":" self._retry_interval \u003d retry_interval"},{"line_number":1592,"context_line":" # Time-to-live of the cooperative token when set in memcached, default"},{"line_number":1593,"context_line":" # to be 10 times of the average time spent on ``do_fetch_backend``."},{"line_number":1594,"context_line":" self._token_ttl \u003d retry_interval * 10"},{"line_number":1595,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":1596,"context_line":" # The status of cache operation which sets backend data into Memcached."},{"line_number":1597,"context_line":" self.set_cache_state \u003d None"}],"source_content_type":"text/x-python","patch_set":44,"id":"8a57ad03_d1b105e9","line":1594,"in_reply_to":"901bdc71_732535ed","updated":"2025-05-07 05:08:33.000000000","message":"as discussed offline, will rename ``retry_interval`` to ``avg_backend_fetch_time``, and also explore new configs to tune retry algorithm later on.","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"9b5f9df7ef8e676e7130812d1a8131291f80b2a1","unresolved":true,"context_lines":[{"line_number":1594,"context_line":" self._token_ttl \u003d retry_interval * 10"},{"line_number":1595,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":1596,"context_line":" # The status of cache operation which sets backend data into Memcached."},{"line_number":1597,"context_line":" self.set_cache_state \u003d None"},{"line_number":1598,"context_line":" # Indicates if this request has acquired one token."},{"line_number":1599,"context_line":" self.token_acquired \u003d False"},{"line_number":1600,"context_line":" # The HttpResponse object returned by ``do_fetch_backend`` if called."}],"source_content_type":"text/x-python","patch_set":44,"id":"986c293f_276b9cae","line":1597,"updated":"2025-05-05 21:32:09.000000000","message":"this isn\u0027t mentioned at ALL in the doc-string? But the `CooperativeNamespaceCachePopulator` absolutely requires it when calling `record_cache_op_metrics`\n\nI think we could reduce the burden on the caller if we took the responsibility for logging the memcache metrics into this class.\n\n```\ndef record_cache_op_metrics(\n logger, server_type, op_type, cache_state, resp\u003dNone):\n```\n\nwe already have `op_type` - and the only concrete implementation passes in a ctrl so we could steal `server_type` from there.","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"fd079a543129d979551147dd455b511d4d553c6e","unresolved":false,"context_lines":[{"line_number":1594,"context_line":" self._token_ttl \u003d retry_interval * 10"},{"line_number":1595,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":1596,"context_line":" # The status of cache operation which sets backend data into Memcached."},{"line_number":1597,"context_line":" self.set_cache_state \u003d None"},{"line_number":1598,"context_line":" # Indicates if this request has acquired one token."},{"line_number":1599,"context_line":" self.token_acquired \u003d False"},{"line_number":1600,"context_line":" # The HttpResponse object returned by ``do_fetch_backend`` if called."}],"source_content_type":"text/x-python","patch_set":44,"id":"fb2c027c_4343bc40","line":1597,"in_reply_to":"986c293f_276b9cae","updated":"2025-05-07 05:08:33.000000000","message":"Done","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"9b5f9df7ef8e676e7130812d1a8131291f80b2a1","unresolved":true,"context_lines":[{"line_number":1654,"context_line":" limited number of sleeps. when ``token_ttl`` is 10 times of"},{"line_number":1655,"context_line":" ``retry_interval`` and the exponential backoff doubles the retry"},{"line_number":1656,"context_line":" interval after each retry, normally this function will only sleep and"},{"line_number":1657,"context_line":" retry 3 times."},{"line_number":1658,"context_line":""},{"line_number":1659,"context_line":" :returns: value of the data fetched from Memcached; None if not exist."},{"line_number":1660,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":44,"id":"d2f49720_156f98ae","line":1657,"updated":"2025-05-05 21:32:09.000000000","message":"what evidence do we have this is the correct memcache retry behavior?\n\nI could see ops wanting an explicit control over the number of retries, or possibly the ability to set `retry_interval \u003d 0` to mean \"retry memcache as fast as possible\"","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"fd079a543129d979551147dd455b511d4d553c6e","unresolved":false,"context_lines":[{"line_number":1654,"context_line":" limited number of sleeps. when ``token_ttl`` is 10 times of"},{"line_number":1655,"context_line":" ``retry_interval`` and the exponential backoff doubles the retry"},{"line_number":1656,"context_line":" interval after each retry, normally this function will only sleep and"},{"line_number":1657,"context_line":" retry 3 times."},{"line_number":1658,"context_line":""},{"line_number":1659,"context_line":" :returns: value of the data fetched from Memcached; None if not exist."},{"line_number":1660,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":44,"id":"5615f312_ef07eba5","line":1657,"in_reply_to":"d2f49720_156f98ae","updated":"2025-05-07 05:08:33.000000000","message":"as discussed offline, will explore new configs to tune retry algorithm later on.","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"9b5f9df7ef8e676e7130812d1a8131291f80b2a1","unresolved":true,"context_lines":[{"line_number":1660,"context_line":" \"\"\""},{"line_number":1661,"context_line":" cur_time \u003d time.time()"},{"line_number":1662,"context_line":" cutoff_time \u003d cur_time + self._token_ttl"},{"line_number":1663,"context_line":" retry_interval \u003d self._retry_interval * 1.5"},{"line_number":1664,"context_line":" num_waits \u003d 0"},{"line_number":1665,"context_line":" while cur_time \u003c cutoff_time or num_waits \u003c 3:"},{"line_number":1666,"context_line":" if cur_time \u003c cutoff_time:"}],"source_content_type":"text/x-python","patch_set":44,"id":"dea8c4aa_7d239273","line":1663,"updated":"2025-05-05 21:32:09.000000000","message":"why do we start with a 1.5x retry interval? Shouldn\u0027t at least the *first* sleep be equal to the configured value?","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"fd079a543129d979551147dd455b511d4d553c6e","unresolved":false,"context_lines":[{"line_number":1660,"context_line":" \"\"\""},{"line_number":1661,"context_line":" cur_time \u003d time.time()"},{"line_number":1662,"context_line":" cutoff_time \u003d cur_time + self._token_ttl"},{"line_number":1663,"context_line":" retry_interval \u003d self._retry_interval * 1.5"},{"line_number":1664,"context_line":" num_waits \u003d 0"},{"line_number":1665,"context_line":" while cur_time \u003c cutoff_time or num_waits \u003c 3:"},{"line_number":1666,"context_line":" if cur_time \u003c cutoff_time:"}],"source_content_type":"text/x-python","patch_set":44,"id":"71278845_ab7f339c","line":1663,"in_reply_to":"dea8c4aa_7d239273","updated":"2025-05-07 05:08:33.000000000","message":"will rename ``retry_interval`` to ``avg_backend_fetch_time``","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"9b5f9df7ef8e676e7130812d1a8131291f80b2a1","unresolved":true,"context_lines":[{"line_number":1668,"context_line":" num_waits +\u003d 1"},{"line_number":1669,"context_line":" else:"},{"line_number":1670,"context_line":" # Request has no token and doesn\u0027t get enough retries."},{"line_number":1671,"context_line":" self._logger.increment(\u0027token.%s.lack_retries\u0027 % self._op_type)"},{"line_number":1672,"context_line":" # To have one last check, when eventlet scheduling didn\u0027t give"},{"line_number":1673,"context_line":" # this greenthread enough cpu cycles and it didn\u0027t have enough"},{"line_number":1674,"context_line":" # times of retries."}],"source_content_type":"text/x-python","patch_set":44,"id":"3cf4536a_ff3b3d70","line":1671,"updated":"2025-05-05 21:32:09.000000000","message":"now that we have labeled metrics we could probably make the case that these should be re-worked. I\u0027m not sure how well I understand the existing legacy metrics anyway and less increment sites with some thoughtful labeled metrics might make them easier to reason about.","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"fd079a543129d979551147dd455b511d4d553c6e","unresolved":false,"context_lines":[{"line_number":1668,"context_line":" num_waits +\u003d 1"},{"line_number":1669,"context_line":" else:"},{"line_number":1670,"context_line":" # Request has no token and doesn\u0027t get enough retries."},{"line_number":1671,"context_line":" self._logger.increment(\u0027token.%s.lack_retries\u0027 % self._op_type)"},{"line_number":1672,"context_line":" # To have one last check, when eventlet scheduling didn\u0027t give"},{"line_number":1673,"context_line":" # this greenthread enough cpu cycles and it didn\u0027t have enough"},{"line_number":1674,"context_line":" # times of retries."}],"source_content_type":"text/x-python","patch_set":44,"id":"69095599_225d6161","line":1671,"in_reply_to":"3cf4536a_ff3b3d70","updated":"2025-05-07 05:08:33.000000000","message":"Acknowledged","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"9b5f9df7ef8e676e7130812d1a8131291f80b2a1","unresolved":true,"context_lines":[{"line_number":1675,"context_line":" num_waits \u003d 3"},{"line_number":1676,"context_line":" cache_data \u003d self._memcache.get("},{"line_number":1677,"context_line":" self._cache_key, raise_on_error\u003dFalse)"},{"line_number":1678,"context_line":" if cache_data:"},{"line_number":1679,"context_line":" # cache hit."},{"line_number":1680,"context_line":" decoded_data \u003d self.cache_decoder(cache_data)"},{"line_number":1681,"context_line":" return decoded_data"}],"source_content_type":"text/x-python","patch_set":44,"id":"4946bceb_7367ff6a","line":1678,"updated":"2025-05-05 21:32:09.000000000","message":"this is a little strange; over in the caller we\u0027d already fetched from memcache *once* and we\u0027re going to log a `get_cache_state \u003d miss|skip` but then in using the CooperativePopulator we may end up getting the value from memcache without any `record_cache_op_metrics`.\n\nI think if we moved the \"initial-fetch-from-memcache-or-skip\" behavior into the `CooperativePopulator` it\u0027d be easier to have consistent handling of `record_cache_op_metrics` and more control over what is the correct metrics to emit for the final `cache_op_metric{}","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"bc33ca00177d71d1674f2f9caec1a06429b7cb19","unresolved":false,"context_lines":[{"line_number":1675,"context_line":" num_waits \u003d 3"},{"line_number":1676,"context_line":" cache_data \u003d self._memcache.get("},{"line_number":1677,"context_line":" self._cache_key, raise_on_error\u003dFalse)"},{"line_number":1678,"context_line":" if cache_data:"},{"line_number":1679,"context_line":" # cache hit."},{"line_number":1680,"context_line":" decoded_data \u003d self.cache_decoder(cache_data)"},{"line_number":1681,"context_line":" return decoded_data"}],"source_content_type":"text/x-python","patch_set":44,"id":"bdaa73a4_d6792143","line":1678,"in_reply_to":"41c93806_59a44d13","updated":"2025-05-13 14:20:39.000000000","message":"I suppose it\u0027s possible we\u0027ll want a `CacheGetterWithSkips` that doesn\u0027t need `CooperativeCachePopulator` - but I think most of the reasoning that applies to updating-shard-ranges would also apply to get-info calls. However, we can wait to combine them when there\u0027s more than one use-case that all want the same behavior.","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"3b5bb973ba22ccc90551db803b513941f4f5b925","unresolved":true,"context_lines":[{"line_number":1675,"context_line":" num_waits \u003d 3"},{"line_number":1676,"context_line":" cache_data \u003d self._memcache.get("},{"line_number":1677,"context_line":" self._cache_key, raise_on_error\u003dFalse)"},{"line_number":1678,"context_line":" if cache_data:"},{"line_number":1679,"context_line":" # cache hit."},{"line_number":1680,"context_line":" decoded_data \u003d self.cache_decoder(cache_data)"},{"line_number":1681,"context_line":" return decoded_data"}],"source_content_type":"text/x-python","patch_set":44,"id":"41c93806_59a44d13","line":1678,"in_reply_to":"4946bceb_7367ff6a","updated":"2025-05-07 18:07:31.000000000","message":"Ack, I have moved ``record_cache_op_metrics`` to be within ``CooperativeCachePopulator``.\n\nFor the \"initial-fetch-from-memcache-or-skip\" behavior, maybe that can go to a separate class like ``CacheGetterWithSkips``. currently we only use random skip, probably we can use different skipping strategy when needed. I am inclined to have the caller to decide how to use ``CacheGetterWithSkips`` to get cached data, and use ``CooperativeCachePopulator`` to cooperatively fetch data from backend.","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"9b5f9df7ef8e676e7130812d1a8131291f80b2a1","unresolved":true,"context_lines":[{"line_number":1702,"context_line":" \u0027token.%s.backend_reqs.no_cache.%d\u0027 %"},{"line_number":1703,"context_line":" (self._op_type, self.backend_resp.status_int)"},{"line_number":1704,"context_line":" )"},{"line_number":1705,"context_line":" return data"},{"line_number":1706,"context_line":""},{"line_number":1707,"context_line":" # Try to get a cooperative token by using memcache increments."},{"line_number":1708,"context_line":" total_requests \u003d 0"}],"source_content_type":"text/x-python","patch_set":44,"id":"ba25d7d2_3c9bb426","line":1705,"updated":"2025-05-05 21:32:09.000000000","message":"it seems a little akward that this class has to deal with the `memcache \u003d None` case ... in the follow-on patch we have a `DirectNamespaceCachePopulator` that we use when `if self.app.namespace_cache_use_token \u003d\u003d False` ...\n\nif we expect to have to implement an alternative `DirectCachePopulator` for every consumer of a `CooperativeCachePopulator` maybe we remove the `memcache \u003d None` code here, or create a Factory method that will return a `DirectInfoCachePopulator` when `memcache \u003d\u003d None`","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"476f2dfe4612e7f396645c4a222e864af4780da2","unresolved":false,"context_lines":[{"line_number":1702,"context_line":" \u0027token.%s.backend_reqs.no_cache.%d\u0027 %"},{"line_number":1703,"context_line":" (self._op_type, self.backend_resp.status_int)"},{"line_number":1704,"context_line":" )"},{"line_number":1705,"context_line":" return data"},{"line_number":1706,"context_line":""},{"line_number":1707,"context_line":" # Try to get a cooperative token by using memcache increments."},{"line_number":1708,"context_line":" total_requests \u003d 0"}],"source_content_type":"text/x-python","patch_set":44,"id":"444ad011_fb5da0b9","line":1705,"in_reply_to":"232c6491_fd63adaf","updated":"2025-05-08 05:29:56.000000000","message":"I also changed ``CooperativeCachePopulator`` to require a valid memcache instance, and have proxy patch to handle case that ``memcache \u003d\u003d None``.","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"fd079a543129d979551147dd455b511d4d553c6e","unresolved":false,"context_lines":[{"line_number":1702,"context_line":" \u0027token.%s.backend_reqs.no_cache.%d\u0027 %"},{"line_number":1703,"context_line":" (self._op_type, self.backend_resp.status_int)"},{"line_number":1704,"context_line":" )"},{"line_number":1705,"context_line":" return data"},{"line_number":1706,"context_line":""},{"line_number":1707,"context_line":" # Try to get a cooperative token by using memcache increments."},{"line_number":1708,"context_line":" total_requests \u003d 0"}],"source_content_type":"text/x-python","patch_set":44,"id":"232c6491_fd63adaf","line":1705,"in_reply_to":"ba25d7d2_3c9bb426","updated":"2025-05-07 05:08:33.000000000","message":"will merge ``DirectNamespaceCachePopulator`` into ``CooperativeCachePopulator``.","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"9b5f9df7ef8e676e7130812d1a8131291f80b2a1","unresolved":true,"context_lines":[{"line_number":1708,"context_line":" total_requests \u003d 0"},{"line_number":1709,"context_line":" try:"},{"line_number":1710,"context_line":" total_requests \u003d self._memcache.incr("},{"line_number":1711,"context_line":" self._token_key, time\u003dself._token_ttl)"},{"line_number":1712,"context_line":" except swift.common.exceptions.MemcacheConnectionError:"},{"line_number":1713,"context_line":" self.set_cache_state \u003d \u0027inc_error\u0027"},{"line_number":1714,"context_line":""}],"source_content_type":"text/x-python","patch_set":44,"id":"2cdf765c_abcf5912","line":1711,"updated":"2025-05-05 21:32:09.000000000","message":"oic, so it\u0027s up to the caller to check memcache once for the value before they use a CooperativeCachePopulator\n\nit seems like this class has all the information it needs to read the value from cache - so why not just start with the \"fetch/skip from cache\"","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"bc33ca00177d71d1674f2f9caec1a06429b7cb19","unresolved":false,"context_lines":[{"line_number":1708,"context_line":" total_requests \u003d 0"},{"line_number":1709,"context_line":" try:"},{"line_number":1710,"context_line":" total_requests \u003d self._memcache.incr("},{"line_number":1711,"context_line":" self._token_key, time\u003dself._token_ttl)"},{"line_number":1712,"context_line":" except swift.common.exceptions.MemcacheConnectionError:"},{"line_number":1713,"context_line":" self.set_cache_state \u003d \u0027inc_error\u0027"},{"line_number":1714,"context_line":""}],"source_content_type":"text/x-python","patch_set":44,"id":"0067987c_b5504cef","line":1711,"in_reply_to":"2cdf765c_abcf5912","updated":"2025-05-13 14:20:39.000000000","message":"Acknowledged","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":15343,"name":"Tim Burke","email":"tburke@nvidia.com","username":"tburke"},"change_message_id":"a89ce01fc06197ae7d361499c8aefb52f52772c0","unresolved":true,"context_lines":[{"line_number":1715,"context_line":" req_served_from_cache \u003d False"},{"line_number":1716,"context_line":" if not total_requests:"},{"line_number":1717,"context_line":" # Couldn\u0027t connect to the memcache to increment the token key"},{"line_number":1718,"context_line":" data \u003d self._query_backend_and_set_cache()"},{"line_number":1719,"context_line":" elif total_requests \u003c\u003d self._num_tokens:"},{"line_number":1720,"context_line":" # Acquired a cooperative token, go fetching data from backend and"},{"line_number":1721,"context_line":" # set the data in memcache."}],"source_content_type":"text/x-python","patch_set":44,"id":"8c90e430_b063c2e7","line":1718,"updated":"2025-05-02 17:56:01.000000000","message":"I was torn about just jumping straight to\n```\nif 0 \u003c total_requests \u003c\u003d self._num_tokens:\n```\nand letting this case get handled like the out-of-tokens case. I guess it comes down to whether we think the increment failure would happen because memcache is fully _down_ vs just overloaded or something -- i.e. whether there\u0027s any hope that some other request could be populating the cache if we wait.","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"9b5f9df7ef8e676e7130812d1a8131291f80b2a1","unresolved":true,"context_lines":[{"line_number":1715,"context_line":" req_served_from_cache \u003d False"},{"line_number":1716,"context_line":" if not total_requests:"},{"line_number":1717,"context_line":" # Couldn\u0027t connect to the memcache to increment the token key"},{"line_number":1718,"context_line":" data \u003d self._query_backend_and_set_cache()"},{"line_number":1719,"context_line":" elif total_requests \u003c\u003d self._num_tokens:"},{"line_number":1720,"context_line":" # Acquired a cooperative token, go fetching data from backend and"},{"line_number":1721,"context_line":" # set the data in memcache."}],"source_content_type":"text/x-python","patch_set":44,"id":"a35b205f_81acd10d","line":1718,"in_reply_to":"8c90e430_b063c2e7","updated":"2025-05-05 21:32:09.000000000","message":"I think it\u0027s probably more likely a retry would eventually succeed. Worst case should always be \"send PUT-obj-direct-to-async-sharded-with-no-update-shard-avail\" to the object-server.\n\nI think Al and I have complained before there\u0027s no \"decrement\" case where all the token winners eventually fail their backend request and everyone just waits for Ns for an update-to-memcache that will never come.","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"bc33ca00177d71d1674f2f9caec1a06429b7cb19","unresolved":false,"context_lines":[{"line_number":1715,"context_line":" req_served_from_cache \u003d False"},{"line_number":1716,"context_line":" if not total_requests:"},{"line_number":1717,"context_line":" # Couldn\u0027t connect to the memcache to increment the token key"},{"line_number":1718,"context_line":" data \u003d self._query_backend_and_set_cache()"},{"line_number":1719,"context_line":" elif total_requests \u003c\u003d self._num_tokens:"},{"line_number":1720,"context_line":" # Acquired a cooperative token, go fetching data from backend and"},{"line_number":1721,"context_line":" # set the data in memcache."}],"source_content_type":"text/x-python","patch_set":44,"id":"f3b863ca_96ce2d96","line":1718,"in_reply_to":"a35b205f_81acd10d","updated":"2025-05-13 14:20:39.000000000","message":"\u003e everyone just waits for Ns for an update-to-memcache that will never come.\n\ni guess that\u0027s why you wouldn\u0027t run with num_tokens \u003d 1; although I think we actually DO because the increment race in a distributed cache is so lossy we still get 3-5 token winners anyway.","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"60027608cb197b0cc45c774d34e6cfc10e69d564","unresolved":true,"context_lines":[{"line_number":1552,"context_line":" # the cases of cache_state is memcache miss, error, skip, force_skip"},{"line_number":1553,"context_line":" # or disabled."},{"line_number":1554,"context_line":" if resp:"},{"line_number":1555,"context_line":" logger.increment(\u0027%s.%s.cache.%s.%d\u0027 % ("},{"line_number":1556,"context_line":" server_type, op_type, cache_state, resp.status_int))"},{"line_number":1557,"context_line":" else:"},{"line_number":1558,"context_line":" # In some situation, we choose not to lookup backend after cache"}],"source_content_type":"text/x-python","patch_set":47,"id":"77f609d5_4d8ffc30","line":1555,"updated":"2025-05-08 04:22:07.000000000","message":"those are existing formatted stats which proxy-server emits, there may be some of them can be combined/merged into token stats. I think keep those stats same in this patch (no impact to the prod) and take on those changes with the planned work to convert those metrics to labeled metrics.","commit_id":"37b4fa9c305ff233795bbaeaaf292f084477cac5"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"229a254027345cff6d07728e8348bff38e210b84","unresolved":false,"context_lines":[{"line_number":1552,"context_line":" # the cases of cache_state is memcache miss, error, skip, force_skip"},{"line_number":1553,"context_line":" # or disabled."},{"line_number":1554,"context_line":" if resp:"},{"line_number":1555,"context_line":" logger.increment(\u0027%s.%s.cache.%s.%d\u0027 % ("},{"line_number":1556,"context_line":" server_type, op_type, cache_state, resp.status_int))"},{"line_number":1557,"context_line":" else:"},{"line_number":1558,"context_line":" # In some situation, we choose not to lookup backend after cache"}],"source_content_type":"text/x-python","patch_set":47,"id":"abfa12a2_fff8edc7","line":1555,"in_reply_to":"77f609d5_4d8ffc30","updated":"2025-05-08 04:51:36.000000000","message":"No, those are general cache get/set metrics which we use different panels to monitor, they should still be kept separate and better stay in the ``proxy/controller/obj.py``.","commit_id":"37b4fa9c305ff233795bbaeaaf292f084477cac5"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"bc33ca00177d71d1674f2f9caec1a06429b7cb19","unresolved":true,"context_lines":[{"line_number":1574,"context_line":" serves as a base unit for calculating exponential backoff delays when"},{"line_number":1575,"context_line":" awaiting cache population by other requests, and for determining the"},{"line_number":1576,"context_line":" cooperative token\u0027s time-to-live (which is set to 10x this value); Must"},{"line_number":1577,"context_line":" be greater than 0."},{"line_number":1578,"context_line":" :param num_tokens: the minimum limit of tokens per each usage sesssion,"},{"line_number":1579,"context_line":" also the the minimum limit of in-flight requests allowed to fetch data"},{"line_number":1580,"context_line":" from backend; default to be 3, which give redundancy when any request"}],"source_content_type":"text/x-python","patch_set":51,"id":"7851bb7c_39aee991","line":1577,"updated":"2025-05-13 14:20:39.000000000","message":"this makes more sense.","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f48506f0c3b126f38254dce975af8aa00c0e085c","unresolved":false,"context_lines":[{"line_number":1574,"context_line":" serves as a base unit for calculating exponential backoff delays when"},{"line_number":1575,"context_line":" awaiting cache population by other requests, and for determining the"},{"line_number":1576,"context_line":" cooperative token\u0027s time-to-live (which is set to 10x this value); Must"},{"line_number":1577,"context_line":" be greater than 0."},{"line_number":1578,"context_line":" :param num_tokens: the minimum limit of tokens per each usage sesssion,"},{"line_number":1579,"context_line":" also the the minimum limit of in-flight requests allowed to fetch data"},{"line_number":1580,"context_line":" from backend; default to be 3, which give redundancy when any request"}],"source_content_type":"text/x-python","patch_set":51,"id":"a29892eb_a997f05a","line":1577,"in_reply_to":"7851bb7c_39aee991","updated":"2025-05-30 14:35:31.000000000","message":"Acknowledged","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"bc33ca00177d71d1674f2f9caec1a06429b7cb19","unresolved":true,"context_lines":[{"line_number":1664,"context_line":" The first retry is 1.5 times of the ``avg_backend_fetch_time``, the"},{"line_number":1665,"context_line":" second is 3 times, and the third is 6 times of it, so total is 10.5"},{"line_number":1666,"context_line":" times of the ``avg_backend_fetch_time``. This roughly equals to the"},{"line_number":1667,"context_line":" ``token_ttl`` which is 10 times of the ``avg_backend_fetch_time``."},{"line_number":1668,"context_line":""},{"line_number":1669,"context_line":" :returns: value of the data fetched from Memcached; None if not exist."},{"line_number":1670,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":51,"id":"9ce61ac8_f466ac55","line":1667,"updated":"2025-05-13 14:20:39.000000000","message":"this makes more sense.","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f48506f0c3b126f38254dce975af8aa00c0e085c","unresolved":false,"context_lines":[{"line_number":1664,"context_line":" The first retry is 1.5 times of the ``avg_backend_fetch_time``, the"},{"line_number":1665,"context_line":" second is 3 times, and the third is 6 times of it, so total is 10.5"},{"line_number":1666,"context_line":" times of the ``avg_backend_fetch_time``. This roughly equals to the"},{"line_number":1667,"context_line":" ``token_ttl`` which is 10 times of the ``avg_backend_fetch_time``."},{"line_number":1668,"context_line":""},{"line_number":1669,"context_line":" :returns: value of the data fetched from Memcached; None if not exist."},{"line_number":1670,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":51,"id":"afc26dd4_f53757ff","line":1667,"in_reply_to":"9ce61ac8_f466ac55","updated":"2025-05-30 14:35:31.000000000","message":"Acknowledged","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"6cb4c00b5d866f0adfc367d233fc084dadc6126f","unresolved":true,"context_lines":[{"line_number":1673,"context_line":" retry_interval \u003d self._avg_backend_fetch_time * 1.5"},{"line_number":1674,"context_line":" num_waits \u003d 0"},{"line_number":1675,"context_line":" while cur_time \u003c cutoff_time or num_waits \u003c 3:"},{"line_number":1676,"context_line":" if cur_time \u003c cutoff_time:"},{"line_number":1677,"context_line":" eventlet.sleep(retry_interval)"},{"line_number":1678,"context_line":" num_waits +\u003d 1"},{"line_number":1679,"context_line":" else:"}],"source_content_type":"text/x-python","patch_set":51,"id":"747eed4f_c3e533b9","line":1676,"updated":"2025-05-13 22:09:40.000000000","message":"why do we even HAVE the cutoff_time if we\u0027re always going to do 3 retries\n\nwait... no maybe we only do 1 retry (two requests, num_waits\u003d3) sometimes...","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f48506f0c3b126f38254dce975af8aa00c0e085c","unresolved":false,"context_lines":[{"line_number":1673,"context_line":" retry_interval \u003d self._avg_backend_fetch_time * 1.5"},{"line_number":1674,"context_line":" num_waits \u003d 0"},{"line_number":1675,"context_line":" while cur_time \u003c cutoff_time or num_waits \u003c 3:"},{"line_number":1676,"context_line":" if cur_time \u003c cutoff_time:"},{"line_number":1677,"context_line":" eventlet.sleep(retry_interval)"},{"line_number":1678,"context_line":" num_waits +\u003d 1"},{"line_number":1679,"context_line":" else:"}],"source_content_type":"text/x-python","patch_set":51,"id":"e172aacc_1a5c6ceb","line":1676,"in_reply_to":"747eed4f_c3e533b9","updated":"2025-05-30 14:35:31.000000000","message":"Acknowledged","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"6cb4c00b5d866f0adfc367d233fc084dadc6126f","unresolved":true,"context_lines":[{"line_number":1682,"context_line":" # To have one last check, when eventlet scheduling didn\u0027t give"},{"line_number":1683,"context_line":" # this greenthread enough cpu cycles and it didn\u0027t have enough"},{"line_number":1684,"context_line":" # times of retries."},{"line_number":1685,"context_line":" num_waits \u003d 3"},{"line_number":1686,"context_line":" cache_data \u003d self._memcache.get("},{"line_number":1687,"context_line":" self._cache_key, raise_on_error\u003dFalse)"},{"line_number":1688,"context_line":" if cache_data:"}],"source_content_type":"text/x-python","patch_set":51,"id":"58f44870_63d806af","line":1685,"updated":"2025-05-13 22:09:40.000000000","message":"wait; this seems like we do only get \"at least one\" retry!?","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f48506f0c3b126f38254dce975af8aa00c0e085c","unresolved":false,"context_lines":[{"line_number":1682,"context_line":" # To have one last check, when eventlet scheduling didn\u0027t give"},{"line_number":1683,"context_line":" # this greenthread enough cpu cycles and it didn\u0027t have enough"},{"line_number":1684,"context_line":" # times of retries."},{"line_number":1685,"context_line":" num_waits \u003d 3"},{"line_number":1686,"context_line":" cache_data \u003d self._memcache.get("},{"line_number":1687,"context_line":" self._cache_key, raise_on_error\u003dFalse)"},{"line_number":1688,"context_line":" if cache_data:"}],"source_content_type":"text/x-python","patch_set":51,"id":"e2a07176_00d1ef87","line":1685,"in_reply_to":"58f44870_63d806af","updated":"2025-05-30 14:35:31.000000000","message":"Acknowledged","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"bc33ca00177d71d1674f2f9caec1a06429b7cb19","unresolved":true,"context_lines":[{"line_number":1704,"context_line":" \"\"\""},{"line_number":1705,"context_line":" if not self._num_tokens:"},{"line_number":1706,"context_line":" # Cooperative token disabled, fetch from backend."},{"line_number":1707,"context_line":" return self._query_backend_and_set_cache()"},{"line_number":1708,"context_line":""},{"line_number":1709,"context_line":" total_requests \u003d 0"},{"line_number":1710,"context_line":" try:"}],"source_content_type":"text/x-python","patch_set":51,"id":"e48f39fe_78d1b860","line":1707,"updated":"2025-05-13 14:20:39.000000000","message":"pretty straight forward; i wonder if it\u0027d be useful to incr a `backend_req{token\u003d\u0027disabled\u0027}` counter.","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f48506f0c3b126f38254dce975af8aa00c0e085c","unresolved":false,"context_lines":[{"line_number":1704,"context_line":" \"\"\""},{"line_number":1705,"context_line":" if not self._num_tokens:"},{"line_number":1706,"context_line":" # Cooperative token disabled, fetch from backend."},{"line_number":1707,"context_line":" return self._query_backend_and_set_cache()"},{"line_number":1708,"context_line":""},{"line_number":1709,"context_line":" total_requests \u003d 0"},{"line_number":1710,"context_line":" try:"}],"source_content_type":"text/x-python","patch_set":51,"id":"81634657_2968c027","line":1707,"in_reply_to":"e48f39fe_78d1b860","updated":"2025-05-30 14:35:31.000000000","message":"Done","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"f0bb1bfd8815909cded335c731656367e5ef3f80","unresolved":true,"context_lines":[{"line_number":1525,"context_line":" designed to be a back-off time window for retrying backend, it will work"},{"line_number":1526,"context_line":" for the same purpose in this case as well. When one token session ends"},{"line_number":1527,"context_line":" after ``token_ttl``, requests which see cache miss will start a new round"},{"line_number":1528,"context_line":" of cooperation token session."},{"line_number":1529,"context_line":""},{"line_number":1530,"context_line":" :param app: the application instance containing app.logger, app.statsd"},{"line_number":1531,"context_line":" :param infocache: the infocache instance."}],"source_content_type":"text/x-python","patch_set":58,"id":"fa797df2_8e648f34","line":1528,"updated":"2025-09-05 17:20:22.000000000","message":"this doctstring is great! thank you for writing up the concept so thoroughly. I\u0027ve tried to polish it a bit and correct some English language here https://paste.openstack.org/show/bpnDmQL7aDzM8I7fWuGl/","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"4c1274f7efe1429f2f72426dec4e95e955fbee47","unresolved":false,"context_lines":[{"line_number":1525,"context_line":" designed to be a back-off time window for retrying backend, it will work"},{"line_number":1526,"context_line":" for the same purpose in this case as well. When one token session ends"},{"line_number":1527,"context_line":" after ``token_ttl``, requests which see cache miss will start a new round"},{"line_number":1528,"context_line":" of cooperation token session."},{"line_number":1529,"context_line":""},{"line_number":1530,"context_line":" :param app: the application instance containing app.logger, app.statsd"},{"line_number":1531,"context_line":" :param infocache: the infocache instance."}],"source_content_type":"text/x-python","patch_set":58,"id":"94490f7f_1dbf29cd","line":1528,"in_reply_to":"fa797df2_8e648f34","updated":"2025-09-18 04:45:01.000000000","message":"thanks for the help, I have squashed your improvement.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"f0bb1bfd8815909cded335c731656367e5ef3f80","unresolved":true,"context_lines":[{"line_number":1537,"context_line":" backend fetch operation ``do_fetch_backend`` to complete. This duration"},{"line_number":1538,"context_line":" serves as a base unit for calculating exponential backoff delays when"},{"line_number":1539,"context_line":" awaiting cache population by other requests, and for determining the"},{"line_number":1540,"context_line":" cooperative token\u0027s time-to-live (which is set to 10x this value); Must"},{"line_number":1541,"context_line":" be greater than 0."},{"line_number":1542,"context_line":" :param num_tokens: the minimum limit of tokens per each usage sesssion,"},{"line_number":1543,"context_line":" also the the minimum limit of in-flight requests allowed to fetch data"}],"source_content_type":"text/x-python","patch_set":58,"id":"46c1b5d5_a07c0b17","line":1540,"range":{"start_line":1540,"start_character":75,"end_line":1540,"end_character":79},"updated":"2025-09-05 17:20:22.000000000","message":"I don\u0027t see this ``\u003e0`` being enforced, so s/must/should/","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"4c1274f7efe1429f2f72426dec4e95e955fbee47","unresolved":false,"context_lines":[{"line_number":1537,"context_line":" backend fetch operation ``do_fetch_backend`` to complete. This duration"},{"line_number":1538,"context_line":" serves as a base unit for calculating exponential backoff delays when"},{"line_number":1539,"context_line":" awaiting cache population by other requests, and for determining the"},{"line_number":1540,"context_line":" cooperative token\u0027s time-to-live (which is set to 10x this value); Must"},{"line_number":1541,"context_line":" be greater than 0."},{"line_number":1542,"context_line":" :param num_tokens: the minimum limit of tokens per each usage sesssion,"},{"line_number":1543,"context_line":" also the the minimum limit of in-flight requests allowed to fetch data"}],"source_content_type":"text/x-python","patch_set":58,"id":"632b915a_9bb1ab82","line":1540,"range":{"start_line":1540,"start_character":75,"end_line":1540,"end_character":79},"in_reply_to":"46c1b5d5_a07c0b17","updated":"2025-09-18 04:45:01.000000000","message":"Acknowledged","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"f0bb1bfd8815909cded335c731656367e5ef3f80","unresolved":true,"context_lines":[{"line_number":1567,"context_line":" # the ``avg_backend_fetch_time``."},{"line_number":1568,"context_line":" self._token_ttl \u003d avg_backend_fetch_time * 10"},{"line_number":1569,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":1570,"context_line":" # The status of cache operation which sets backend data into Memcached."},{"line_number":1571,"context_line":" self.set_cache_state \u003d None"},{"line_number":1572,"context_line":" # Indicates if this request has acquired one token."},{"line_number":1573,"context_line":" self.token_acquired \u003d False"}],"source_content_type":"text/x-python","patch_set":58,"id":"171aa758_65a40534","line":1570,"updated":"2025-09-05 17:20:22.000000000","message":"this comment isn\u0027t accurate: set_cache_state is also used to indicate an inc_error (when trying to get the token). IMHO that is confusing: I was expecting an inc error to be reflected in the token label:\n\n```\ntoken \u003d with_token | no_token | disabled | error\n ^^^^^\n```","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"4c1274f7efe1429f2f72426dec4e95e955fbee47","unresolved":false,"context_lines":[{"line_number":1567,"context_line":" # the ``avg_backend_fetch_time``."},{"line_number":1568,"context_line":" self._token_ttl \u003d avg_backend_fetch_time * 10"},{"line_number":1569,"context_line":" self._num_tokens \u003d num_tokens"},{"line_number":1570,"context_line":" # The status of cache operation which sets backend data into Memcached."},{"line_number":1571,"context_line":" self.set_cache_state \u003d None"},{"line_number":1572,"context_line":" # Indicates if this request has acquired one token."},{"line_number":1573,"context_line":" self.token_acquired \u003d False"}],"source_content_type":"text/x-python","patch_set":58,"id":"1ff1f5d6_8af171ca","line":1570,"in_reply_to":"171aa758_65a40534","updated":"2025-09-18 04:45:01.000000000","message":"I have changed ``self._labels[\u0027token\u0027]`` to be \u0027error\u0027 in this case.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":1675,"context_line":" \"\"\""},{"line_number":1676,"context_line":" labels \u003d {"},{"line_number":1677,"context_line":" **self._labels,"},{"line_number":1678,"context_line":" }"},{"line_number":1679,"context_line":" if not self._num_tokens:"},{"line_number":1680,"context_line":" # Cooperative token disabled, fetch from backend."},{"line_number":1681,"context_line":" data \u003d self._query_backend_and_set_cache()"}],"source_content_type":"text/x-python","patch_set":58,"id":"48fa800a_694dd24d","line":1678,"updated":"2025-09-08 15:33:27.000000000","message":"why is a ``copy`` of ``self._labels`` needed? It _suggests_ that the instance of CooperativeCachePopulator might be re-used (i.e. ``fetch_data`` called more than once) which raises a bigger concern: while there\u0027s nothing to stop repeated calls to fetch_data on the same instance, I don\u0027t think that would be a good idea! Most of the other instance state persists from one call to the next and isn\u0027t consistently cleared or re-set.\n\nThis test illustrates the problem:\nhttps://paste.openstack.org/show/b5MDnBg4BdeRBRhfkuM7/\n\nI think the class either needs to enforce a \u0027one-shot\u0027 property by raising an error if fetch_data is called more than once. As a minimum, document that property.\n\nOR, change *all* the per-call instance state to be local vars and make it \u0027stateless\u0027. This only seems necessary of there\u0027s a use case for a caller to hold on to an instance of a CooperativeCachePopulator and re-enter ``fetch_data`` (I\u0027ve not yet studied the follow on patch enough to know if that is a use-case).","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"4c1274f7efe1429f2f72426dec4e95e955fbee47","unresolved":false,"context_lines":[{"line_number":1675,"context_line":" \"\"\""},{"line_number":1676,"context_line":" labels \u003d {"},{"line_number":1677,"context_line":" **self._labels,"},{"line_number":1678,"context_line":" }"},{"line_number":1679,"context_line":" if not self._num_tokens:"},{"line_number":1680,"context_line":" # Cooperative token disabled, fetch from backend."},{"line_number":1681,"context_line":" data \u003d self._query_backend_and_set_cache()"}],"source_content_type":"text/x-python","patch_set":58,"id":"21ec8a73_11c3410d","line":1678,"in_reply_to":"48fa800a_694dd24d","updated":"2025-09-18 04:45:01.000000000","message":"good point on enforcing a \u0027one-shot\u0027 property by raising an error, I have made those changes and added a new test case for it.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":1691,"context_line":" total_requests \u003d self._memcache.incr("},{"line_number":1692,"context_line":" self._token_key, time\u003dself._token_ttl)"},{"line_number":1693,"context_line":" except swift.common.exceptions.MemcacheConnectionError:"},{"line_number":1694,"context_line":" self.set_cache_state \u003d \u0027inc_error\u0027"},{"line_number":1695,"context_line":""},{"line_number":1696,"context_line":" req_served_from_cache \u003d False"},{"line_number":1697,"context_line":" if not total_requests:"}],"source_content_type":"text/x-python","patch_set":58,"id":"feea3f51_77d957be","line":1694,"updated":"2025-09-08 15:33:27.000000000","message":"there\u0027s no test that assert labels[\u0027set_cache_state\u0027] \u003d\u003d \u0027inc_error\u0027 😞\n\nbut self.set_cache_state is going to be set again in _query_backend_and_set_cache() - this test actually proves it test_get_token_connection_error","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"4c1274f7efe1429f2f72426dec4e95e955fbee47","unresolved":false,"context_lines":[{"line_number":1691,"context_line":" total_requests \u003d self._memcache.incr("},{"line_number":1692,"context_line":" self._token_key, time\u003dself._token_ttl)"},{"line_number":1693,"context_line":" except swift.common.exceptions.MemcacheConnectionError:"},{"line_number":1694,"context_line":" self.set_cache_state \u003d \u0027inc_error\u0027"},{"line_number":1695,"context_line":""},{"line_number":1696,"context_line":" req_served_from_cache \u003d False"},{"line_number":1697,"context_line":" if not total_requests:"}],"source_content_type":"text/x-python","patch_set":58,"id":"537e3b62_0f1b95a6","line":1694,"in_reply_to":"feea3f51_77d957be","updated":"2025-09-18 04:45:01.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"7eb3d31e196a6e5a6a8c76ca8e59df24d2bd878f","unresolved":true,"context_lines":[{"line_number":1711,"context_line":" # any request with a token finishes both backend fetching and"},{"line_number":1712,"context_line":" # memcache set successful, it can remove all cooperative tokens"},{"line_number":1713,"context_line":" # of this token session."},{"line_number":1714,"context_line":" self._memcache.delete(self._token_key)"},{"line_number":1715,"context_line":""},{"line_number":1716,"context_line":" else:"},{"line_number":1717,"context_line":" # No token acquired, it means that there are requests in-flight"}],"source_content_type":"text/x-python","patch_set":58,"id":"6c76b6c1_915606e2","line":1714,"updated":"2025-09-09 12:30:04.000000000","message":"So if all 3 backend requests fail immediately, then every subsequent request must wait for token_ttl before anything useful can happen? That seems like a flaw.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"4c1274f7efe1429f2f72426dec4e95e955fbee47","unresolved":false,"context_lines":[{"line_number":1711,"context_line":" # any request with a token finishes both backend fetching and"},{"line_number":1712,"context_line":" # memcache set successful, it can remove all cooperative tokens"},{"line_number":1713,"context_line":" # of this token session."},{"line_number":1714,"context_line":" self._memcache.delete(self._token_key)"},{"line_number":1715,"context_line":""},{"line_number":1716,"context_line":" else:"},{"line_number":1717,"context_line":" # No token acquired, it means that there are requests in-flight"}],"source_content_type":"text/x-python","patch_set":58,"id":"a3df45c9_5b4a60b4","line":1714,"in_reply_to":"6c76b6c1_915606e2","updated":"2025-09-18 04:45:01.000000000","message":"if all 3 backend requests fail, subsequent requests probably will fail too due to the overloaded container servers. In this situation, without the cooperative token, those failed subsequent requests will wait for 60 seconds; with the token, those requests will wait for 60+2 seconds. I think it\u0027s about the same.\n\n@clay.gerrard@gmail.com and I did have discussed further optimizations to fail fast in this case (instead of bombing the backend), but that\u0027ll be another patch.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":1732,"context_line":" # Total number of requests equals to \u0027cache_served\u0027 plus"},{"line_number":1733,"context_line":" # \u0027backend_reqs\u0027 \u0027with_token\u0027 and \u0027backend_reqs\u0027 \u0027no_token\u0027."},{"line_number":1734,"context_line":" labels[\u0027event\u0027] \u003d \u0027backend_reqs\u0027"},{"line_number":1735,"context_line":" labels[\u0027status\u0027] \u003d self.backend_resp.status_int"},{"line_number":1736,"context_line":""},{"line_number":1737,"context_line":" labels[\u0027token\u0027] \u003d \u0027with_token\u0027 if self.token_acquired else \u0027no_token\u0027"},{"line_number":1738,"context_line":" if self.set_cache_state:"}],"source_content_type":"text/x-python","patch_set":58,"id":"1ea8470b_6320bccd","line":1735,"range":{"start_line":1735,"start_character":31,"end_line":1735,"end_character":59},"updated":"2025-09-08 15:33:27.000000000","message":"in order to convince myself that this was safe (i.e. that self.backend_resp has been set) I needed to convince myself that we only ever enter this clause if self._query_backend_and_set_cache() has been called.\n\nBut we actually want these labels set every time self._query_backend_and_set_cache() is called (see line 1705 for example), so I think the condition can be simpler and clearer:\n\n```\nif self.backend_resp:\n labels[\u0027event\u0027] \u003d \u0027backend_reqs\u0027\n labels[\u0027status\u0027] \u003d self.backend_resp.status_int\n```\n\nsee also https://paste.openstack.org/show/bPRiPxMBWmycDmIRafa7/","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"4c1274f7efe1429f2f72426dec4e95e955fbee47","unresolved":false,"context_lines":[{"line_number":1732,"context_line":" # Total number of requests equals to \u0027cache_served\u0027 plus"},{"line_number":1733,"context_line":" # \u0027backend_reqs\u0027 \u0027with_token\u0027 and \u0027backend_reqs\u0027 \u0027no_token\u0027."},{"line_number":1734,"context_line":" labels[\u0027event\u0027] \u003d \u0027backend_reqs\u0027"},{"line_number":1735,"context_line":" labels[\u0027status\u0027] \u003d self.backend_resp.status_int"},{"line_number":1736,"context_line":""},{"line_number":1737,"context_line":" labels[\u0027token\u0027] \u003d \u0027with_token\u0027 if self.token_acquired else \u0027no_token\u0027"},{"line_number":1738,"context_line":" if self.set_cache_state:"}],"source_content_type":"text/x-python","patch_set":58,"id":"39c42e7a_e5bfeac1","line":1735,"range":{"start_line":1735,"start_character":31,"end_line":1735,"end_character":59},"in_reply_to":"1ea8470b_6320bccd","updated":"2025-09-18 04:45:01.000000000","message":"thanks for the refactoring changes, I have squashed them. It\u0027s much clearer!","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"f0bb1bfd8815909cded335c731656367e5ef3f80","unresolved":true,"context_lines":[{"line_number":1736,"context_line":""},{"line_number":1737,"context_line":" labels[\u0027token\u0027] \u003d \u0027with_token\u0027 if self.token_acquired else \u0027no_token\u0027"},{"line_number":1738,"context_line":" if self.set_cache_state:"},{"line_number":1739,"context_line":" labels[\u0027set_cache_state\u0027] \u003d self.set_cache_state"},{"line_number":1740,"context_line":" self._statsd.increment(\u0027swift_token\u0027, labels\u003dlabels)"},{"line_number":1741,"context_line":" return data"},{"line_number":1742,"context_line":""}],"source_content_type":"text/x-python","patch_set":58,"id":"90998b6b_f8fd44a7","line":1739,"updated":"2025-09-05 17:20:22.000000000","message":"I started enumerating the various labels and values:\n```\n # statsd labels can be:\n # token: with_token | no_token | disabled\n # event: backend_reqs | cache_served\n # status: an http status int\n # set_cache_state: inc_error | set | set_error\n # lack_retries: True | False \n\n```\nbut I need to look harder at which are always present and which not. I wonder if it might be clearer is the labels dict was populated in the various if/elif/else rather that setting flags that then act as conditions for labels.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":1736,"context_line":""},{"line_number":1737,"context_line":" labels[\u0027token\u0027] \u003d \u0027with_token\u0027 if self.token_acquired else \u0027no_token\u0027"},{"line_number":1738,"context_line":" if self.set_cache_state:"},{"line_number":1739,"context_line":" labels[\u0027set_cache_state\u0027] \u003d self.set_cache_state"},{"line_number":1740,"context_line":" self._statsd.increment(\u0027swift_token\u0027, labels\u003dlabels)"},{"line_number":1741,"context_line":" return data"},{"line_number":1742,"context_line":""}],"source_content_type":"text/x-python","patch_set":58,"id":"b73e27bb_b25c1e5a","line":1739,"in_reply_to":"90998b6b_f8fd44a7","updated":"2025-09-08 15:33:27.000000000","message":"I made an attempt to pull *almost* all the label setup into a single place so it is more obvious which labels are set when\nhttps://paste.openstack.org/show/bPRiPxMBWmycDmIRafa7/","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"4c1274f7efe1429f2f72426dec4e95e955fbee47","unresolved":false,"context_lines":[{"line_number":1736,"context_line":""},{"line_number":1737,"context_line":" labels[\u0027token\u0027] \u003d \u0027with_token\u0027 if self.token_acquired else \u0027no_token\u0027"},{"line_number":1738,"context_line":" if self.set_cache_state:"},{"line_number":1739,"context_line":" labels[\u0027set_cache_state\u0027] \u003d self.set_cache_state"},{"line_number":1740,"context_line":" self._statsd.increment(\u0027swift_token\u0027, labels\u003dlabels)"},{"line_number":1741,"context_line":" return data"},{"line_number":1742,"context_line":""}],"source_content_type":"text/x-python","patch_set":58,"id":"b5601678_6e3a98f5","line":1739,"in_reply_to":"b73e27bb_b25c1e5a","updated":"2025-09-18 04:45:01.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"f0bb1bfd8815909cded335c731656367e5ef3f80","unresolved":true,"context_lines":[{"line_number":1737,"context_line":" labels[\u0027token\u0027] \u003d \u0027with_token\u0027 if self.token_acquired else \u0027no_token\u0027"},{"line_number":1738,"context_line":" if self.set_cache_state:"},{"line_number":1739,"context_line":" labels[\u0027set_cache_state\u0027] \u003d self.set_cache_state"},{"line_number":1740,"context_line":" self._statsd.increment(\u0027swift_token\u0027, labels\u003dlabels)"},{"line_number":1741,"context_line":" return data"},{"line_number":1742,"context_line":""},{"line_number":1743,"context_line":""}],"source_content_type":"text/x-python","patch_set":58,"id":"e08c265b_2441ce04","line":1740,"range":{"start_line":1740,"start_character":32,"end_line":1740,"end_character":43},"updated":"2025-09-05 17:20:22.000000000","message":"nit: I can imagine this being confused with \u0027token\u0027 in the auth context. Perhaps \u0027swift_coop_token\u0027??","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"10f897d87faa2a035331daf6011424bc796d7cfc","unresolved":false,"context_lines":[{"line_number":1737,"context_line":" labels[\u0027token\u0027] \u003d \u0027with_token\u0027 if self.token_acquired else \u0027no_token\u0027"},{"line_number":1738,"context_line":" if self.set_cache_state:"},{"line_number":1739,"context_line":" labels[\u0027set_cache_state\u0027] \u003d self.set_cache_state"},{"line_number":1740,"context_line":" self._statsd.increment(\u0027swift_token\u0027, labels\u003dlabels)"},{"line_number":1741,"context_line":" return data"},{"line_number":1742,"context_line":""},{"line_number":1743,"context_line":""}],"source_content_type":"text/x-python","patch_set":58,"id":"b6473a1e_a5c5fd0b","line":1740,"range":{"start_line":1740,"start_character":32,"end_line":1740,"end_character":43},"in_reply_to":"e08c265b_2441ce04","updated":"2025-09-26 18:29:20.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"f3c176a0b2b9e61da8a19a7513ee2e08b18fc30b","unresolved":true,"context_lines":[{"line_number":1640,"context_line":" The first retry is 1.5 times of the ``avg_backend_fetch_time``, the"},{"line_number":1641,"context_line":" second is 3 times, and the third is 6 times of it, so total is 10.5"},{"line_number":1642,"context_line":" times of the ``avg_backend_fetch_time``. This roughly equals to the"},{"line_number":1643,"context_line":" ``token_ttl`` which is 10 times of the ``avg_backend_fetch_time``."},{"line_number":1644,"context_line":""},{"line_number":1645,"context_line":" :returns: value of the data fetched from Memcached; None if not exist."},{"line_number":1646,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":60,"id":"114d5511_c4c825f8","line":1643,"updated":"2025-09-25 22:35:07.000000000","message":"I remember learning how this worked and then reviewing the updating cache tests it surprised me *again*\n\nI understand why the token_ttl should be related to the avg_backend_fetch_time - order of magnitude worst case buffer 10x - love it.\n\nBut why hard-coded 3 retires (and one extra) at these specific intervals is the \"only correct answer\" for all clusters is beyond me.","commit_id":"b74296ef8a4902726852bae1a0e80eb15061efa8"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"10f897d87faa2a035331daf6011424bc796d7cfc","unresolved":true,"context_lines":[{"line_number":1640,"context_line":" The first retry is 1.5 times of the ``avg_backend_fetch_time``, the"},{"line_number":1641,"context_line":" second is 3 times, and the third is 6 times of it, so total is 10.5"},{"line_number":1642,"context_line":" times of the ``avg_backend_fetch_time``. This roughly equals to the"},{"line_number":1643,"context_line":" ``token_ttl`` which is 10 times of the ``avg_backend_fetch_time``."},{"line_number":1644,"context_line":""},{"line_number":1645,"context_line":" :returns: value of the data fetched from Memcached; None if not exist."},{"line_number":1646,"context_line":" \"\"\""}],"source_content_type":"text/x-python","patch_set":60,"id":"1a2246bb_2d3b16a1","line":1643,"in_reply_to":"114d5511_c4c825f8","updated":"2025-09-26 18:29:20.000000000","message":"I am this topic is not ``new``, ;-)\nDuring last review cycle, we had discussions on this, both agreed that the current retry algorithm is an simplified implementation for the base cooperative token patch and works from prod testing, and later we will try make it more generic and add support of different retry algorithm if more cases coming up (and we had this work planned in the roadmap doc).","commit_id":"b74296ef8a4902726852bae1a0e80eb15061efa8"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"f3c176a0b2b9e61da8a19a7513ee2e08b18fc30b","unresolved":true,"context_lines":[{"line_number":1650,"context_line":" num_waits \u003d 0"},{"line_number":1651,"context_line":" while cur_time \u003c cutoff_time or num_waits \u003c 3:"},{"line_number":1652,"context_line":" if cur_time \u003c cutoff_time:"},{"line_number":1653,"context_line":" eventlet.sleep(retry_interval)"},{"line_number":1654,"context_line":" num_waits +\u003d 1"},{"line_number":1655,"context_line":" else:"},{"line_number":1656,"context_line":" # Request has no token and doesn\u0027t get enough retries."}],"source_content_type":"text/x-python","patch_set":60,"id":"b4b6bf5d_33b73b32","line":1653,"updated":"2025-09-25 22:35:07.000000000","message":"this module has both:\n\n```\nimport eventlet\nfrom eventlet import GreenPool, sleep, Timeout\n```\n\nIf you use the local name `sleep` here in this code path it\u0027s easier for tests to patch just this module w/o tracking calls to the eventlet module\u0027s sleep from other parts of the code (e.g. `test.unit.FakeStatus.__init__` calls `eventlet.sleep()`)","commit_id":"b74296ef8a4902726852bae1a0e80eb15061efa8"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"10f897d87faa2a035331daf6011424bc796d7cfc","unresolved":false,"context_lines":[{"line_number":1650,"context_line":" num_waits \u003d 0"},{"line_number":1651,"context_line":" while cur_time \u003c cutoff_time or num_waits \u003c 3:"},{"line_number":1652,"context_line":" if cur_time \u003c cutoff_time:"},{"line_number":1653,"context_line":" eventlet.sleep(retry_interval)"},{"line_number":1654,"context_line":" num_waits +\u003d 1"},{"line_number":1655,"context_line":" else:"},{"line_number":1656,"context_line":" # Request has no token and doesn\u0027t get enough retries."}],"source_content_type":"text/x-python","patch_set":60,"id":"3f793aa5_6399e84f","line":1653,"in_reply_to":"b4b6bf5d_33b73b32","updated":"2025-09-26 18:29:20.000000000","message":"Done","commit_id":"b74296ef8a4902726852bae1a0e80eb15061efa8"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"f3c176a0b2b9e61da8a19a7513ee2e08b18fc30b","unresolved":true,"context_lines":[{"line_number":1736,"context_line":" self._labels[\u0027event\u0027] \u003d \u0027cache_served\u0027"},{"line_number":1737,"context_line":" if self.set_cache_state:"},{"line_number":1738,"context_line":" self._labels[\u0027set_cache_state\u0027] \u003d self.set_cache_state"},{"line_number":1739,"context_line":" self._statsd.increment(\u0027swift_token\u0027, labels\u003dself._labels)"},{"line_number":1740,"context_line":" return data"},{"line_number":1741,"context_line":""},{"line_number":1742,"context_line":""}],"source_content_type":"text/x-python","patch_set":60,"id":"07c24bca_8b86cd8d","line":1739,"updated":"2025-09-25 22:35:07.000000000","message":"`swift_coop_cache` would be a significantly better name than \"token\"","commit_id":"b74296ef8a4902726852bae1a0e80eb15061efa8"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"10f897d87faa2a035331daf6011424bc796d7cfc","unresolved":false,"context_lines":[{"line_number":1736,"context_line":" self._labels[\u0027event\u0027] \u003d \u0027cache_served\u0027"},{"line_number":1737,"context_line":" if self.set_cache_state:"},{"line_number":1738,"context_line":" self._labels[\u0027set_cache_state\u0027] \u003d self.set_cache_state"},{"line_number":1739,"context_line":" self._statsd.increment(\u0027swift_token\u0027, labels\u003dself._labels)"},{"line_number":1740,"context_line":" return data"},{"line_number":1741,"context_line":""},{"line_number":1742,"context_line":""}],"source_content_type":"text/x-python","patch_set":60,"id":"deabd479_52c1b340","line":1739,"in_reply_to":"07c24bca_8b86cd8d","updated":"2025-09-26 18:29:20.000000000","message":"Done","commit_id":"b74296ef8a4902726852bae1a0e80eb15061efa8"}],"swift/proxy/controllers/base.py":[{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"a6a59d8e442cb63d30a6673c398ae6801807a646","unresolved":true,"context_lines":[{"line_number":69,"context_line":"DEFAULT_RECHECK_CONTAINER_EXISTENCE \u003d 60 # seconds"},{"line_number":70,"context_line":"DEFAULT_RECHECK_UPDATING_SHARD_RANGES \u003d 3600 # seconds"},{"line_number":71,"context_line":"DEFAULT_RECHECK_LISTING_SHARD_RANGES \u003d 600 # seconds"},{"line_number":72,"context_line":"DEFAULT_SHARD_RANGES_CACHE_MISS_LOCK_TTL \u003d 3 # seconds"},{"line_number":73,"context_line":"DEFAULT_SHARD_RANGES_CACHE_MISS_SLEEP_INTERVAL \u003d 0.05 # seconds"},{"line_number":74,"context_line":""},{"line_number":75,"context_line":""}],"source_content_type":"text/x-python","patch_set":1,"id":"12658c9a_537b8f2a","line":72,"updated":"2023-08-01 14:18:22.000000000","message":"this seems like a reasonably short default","commit_id":"c2ded7e6c723660bd6175fda1180d97d8674f2cc"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":69,"context_line":"DEFAULT_RECHECK_CONTAINER_EXISTENCE \u003d 60 # seconds"},{"line_number":70,"context_line":"DEFAULT_RECHECK_UPDATING_SHARD_RANGES \u003d 3600 # seconds"},{"line_number":71,"context_line":"DEFAULT_RECHECK_LISTING_SHARD_RANGES \u003d 600 # seconds"},{"line_number":72,"context_line":"DEFAULT_SHARD_RANGES_CACHE_MISS_LOCK_TTL \u003d 3 # seconds"},{"line_number":73,"context_line":"DEFAULT_SHARD_RANGES_CACHE_MISS_SLEEP_INTERVAL \u003d 0.05 # seconds"},{"line_number":74,"context_line":""},{"line_number":75,"context_line":""}],"source_content_type":"text/x-python","patch_set":1,"id":"68be4a15_1374d505","line":72,"in_reply_to":"12658c9a_537b8f2a","updated":"2024-01-12 06:04:07.000000000","message":"Acknowledged","commit_id":"c2ded7e6c723660bd6175fda1180d97d8674f2cc"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":true,"context_lines":[{"line_number":70,"context_line":"DEFAULT_RECHECK_UPDATING_SHARD_RANGES \u003d 3600 # seconds"},{"line_number":71,"context_line":"DEFAULT_RECHECK_LISTING_SHARD_RANGES \u003d 600 # seconds"},{"line_number":72,"context_line":"DEFAULT_SHARD_RANGES_CACHE_MISS_LOCK_TTL \u003d 3 # seconds"},{"line_number":73,"context_line":"DEFAULT_SHARD_RANGES_CACHE_MISS_SLEEP_INTERVAL \u003d 0.05 # seconds"},{"line_number":74,"context_line":""},{"line_number":75,"context_line":""},{"line_number":76,"context_line":"def update_headers(response, headers):"}],"source_content_type":"text/x-python","patch_set":2,"id":"5422514a_75f6a471","line":73,"updated":"2023-08-25 00:02:59.000000000","message":"50ms might be reasonable, for get our p99 mean is ~11ms and max is 47.8ms","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":70,"context_line":"DEFAULT_RECHECK_UPDATING_SHARD_RANGES \u003d 3600 # seconds"},{"line_number":71,"context_line":"DEFAULT_RECHECK_LISTING_SHARD_RANGES \u003d 600 # seconds"},{"line_number":72,"context_line":"DEFAULT_SHARD_RANGES_CACHE_MISS_LOCK_TTL \u003d 3 # seconds"},{"line_number":73,"context_line":"DEFAULT_SHARD_RANGES_CACHE_MISS_SLEEP_INTERVAL \u003d 0.05 # seconds"},{"line_number":74,"context_line":""},{"line_number":75,"context_line":""},{"line_number":76,"context_line":"def update_headers(response, headers):"}],"source_content_type":"text/x-python","patch_set":2,"id":"aa8e8502_199859f6","line":73,"in_reply_to":"5422514a_75f6a471","updated":"2024-01-16 19:52:53.000000000","message":"for those containers which trigger thundering herds, the average of the backend shard range query time is ~50ms.","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"}],"swift/proxy/controllers/obj.py":[{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"a6a59d8e442cb63d30a6673c398ae6801807a646","unresolved":true,"context_lines":[{"line_number":328,"context_line":" miss_lock_key \u003d \u0027miss_lock/%s\u0027 % cache_key"},{"line_number":329,"context_line":" miss_lock \u003d None"},{"line_number":330,"context_line":" while retries_time_window \u003e 0:"},{"line_number":331,"context_line":" namespaces \u003d memcache.get(cache_key, raise_on_error\u003dTrue)"},{"line_number":332,"context_line":" if namespaces:"},{"line_number":333,"context_line":" # cache hit."},{"line_number":334,"context_line":" return namespaces"}],"source_content_type":"text/x-python","patch_set":1,"id":"90d18a43_f8127e75","line":331,"updated":"2023-08-01 14:18:22.000000000","message":"when reading the commit message I had assumed the lock would be per worker, but I can see now this is a bit more ... ambitious!","commit_id":"c2ded7e6c723660bd6175fda1180d97d8674f2cc"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f30c62ac0176bd7a4d71ed142aa2769a91d48f87","unresolved":false,"context_lines":[{"line_number":328,"context_line":" miss_lock_key \u003d \u0027miss_lock/%s\u0027 % cache_key"},{"line_number":329,"context_line":" miss_lock \u003d None"},{"line_number":330,"context_line":" while retries_time_window \u003e 0:"},{"line_number":331,"context_line":" namespaces \u003d memcache.get(cache_key, raise_on_error\u003dTrue)"},{"line_number":332,"context_line":" if namespaces:"},{"line_number":333,"context_line":" # cache hit."},{"line_number":334,"context_line":" return namespaces"}],"source_content_type":"text/x-python","patch_set":1,"id":"c1688679_16634cba","line":331,"in_reply_to":"90d18a43_f8127e75","updated":"2023-08-02 06:12:29.000000000","message":"thanks for bringing this up, yeah, this is a global memcache lock, I have modified the title and commit message.","commit_id":"c2ded7e6c723660bd6175fda1180d97d8674f2cc"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"a6a59d8e442cb63d30a6673c398ae6801807a646","unresolved":true,"context_lines":[{"line_number":334,"context_line":" return namespaces"},{"line_number":335,"context_line":" # cache miss."},{"line_number":336,"context_line":" if not miss_lock:"},{"line_number":337,"context_line":" miss_lock \u003d memcache.get(miss_lock_key, raise_on_error\u003dTrue)"},{"line_number":338,"context_line":" if not miss_lock:"},{"line_number":339,"context_line":" # first cache miss request, set miss_lock, return None"},{"line_number":340,"context_line":" # and caller will fetch updating shard ranges from backend."}],"source_content_type":"text/x-python","patch_set":1,"id":"8d10d93b_0b3df838","line":337,"updated":"2023-08-01 14:18:22.000000000","message":"does raise-on-error here break us out of this loop?","commit_id":"c2ded7e6c723660bd6175fda1180d97d8674f2cc"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f30c62ac0176bd7a4d71ed142aa2769a91d48f87","unresolved":false,"context_lines":[{"line_number":334,"context_line":" return namespaces"},{"line_number":335,"context_line":" # cache miss."},{"line_number":336,"context_line":" if not miss_lock:"},{"line_number":337,"context_line":" miss_lock \u003d memcache.get(miss_lock_key, raise_on_error\u003dTrue)"},{"line_number":338,"context_line":" if not miss_lock:"},{"line_number":339,"context_line":" # first cache miss request, set miss_lock, return None"},{"line_number":340,"context_line":" # and caller will fetch updating shard ranges from backend."}],"source_content_type":"text/x-python","patch_set":1,"id":"50c67342_8c5ff912","line":337,"in_reply_to":"8d10d93b_0b3df838","updated":"2023-08-02 06:12:29.000000000","message":"yes, any connection exception happened within this function (two memcache.get and one memcache.set) will break it out of this loop. And the current caller of _get_memcached_namespaces_with_lock() already catches \"MemcacheConnectionError\", and will query backend for updating shard ranges since connection error means memcache probably is not accessible anymore.","commit_id":"c2ded7e6c723660bd6175fda1180d97d8674f2cc"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"a6a59d8e442cb63d30a6673c398ae6801807a646","unresolved":true,"context_lines":[{"line_number":345,"context_line":" # there are requests in-flight which will fetch updating shard"},{"line_number":346,"context_line":" # ranges from backend and update them in cache, let\u0027s wait for"},{"line_number":347,"context_line":" # them to finish with limited retires."},{"line_number":348,"context_line":" sleep(self.shard_ranges_cache_miss_sleep_interval)"},{"line_number":349,"context_line":" retries_time_window \u003d retries_time_window - \\"},{"line_number":350,"context_line":" self.shard_ranges_cache_miss_sleep_interval"},{"line_number":351,"context_line":" continue"}],"source_content_type":"text/x-python","patch_set":1,"id":"e141a8b7_b50d7917","line":348,"updated":"2023-08-01 14:18:22.000000000","message":"exponential backoff would probably be better.","commit_id":"c2ded7e6c723660bd6175fda1180d97d8674f2cc"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":false,"context_lines":[{"line_number":345,"context_line":" # there are requests in-flight which will fetch updating shard"},{"line_number":346,"context_line":" # ranges from backend and update them in cache, let\u0027s wait for"},{"line_number":347,"context_line":" # them to finish with limited retires."},{"line_number":348,"context_line":" sleep(self.shard_ranges_cache_miss_sleep_interval)"},{"line_number":349,"context_line":" retries_time_window \u003d retries_time_window - \\"},{"line_number":350,"context_line":" self.shard_ranges_cache_miss_sleep_interval"},{"line_number":351,"context_line":" continue"}],"source_content_type":"text/x-python","patch_set":1,"id":"c20c90e5_51ec4a8d","line":348,"in_reply_to":"86953cb0_51e64a91","updated":"2023-08-25 00:02:59.000000000","message":"Done","commit_id":"c2ded7e6c723660bd6175fda1180d97d8674f2cc"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f30c62ac0176bd7a4d71ed142aa2769a91d48f87","unresolved":true,"context_lines":[{"line_number":345,"context_line":" # there are requests in-flight which will fetch updating shard"},{"line_number":346,"context_line":" # ranges from backend and update them in cache, let\u0027s wait for"},{"line_number":347,"context_line":" # them to finish with limited retires."},{"line_number":348,"context_line":" sleep(self.shard_ranges_cache_miss_sleep_interval)"},{"line_number":349,"context_line":" retries_time_window \u003d retries_time_window - \\"},{"line_number":350,"context_line":" self.shard_ranges_cache_miss_sleep_interval"},{"line_number":351,"context_line":" continue"}],"source_content_type":"text/x-python","patch_set":1,"id":"86953cb0_51e64a91","line":348,"in_reply_to":"e141a8b7_b50d7917","updated":"2023-08-02 06:12:29.000000000","message":"good question. In production, I see a full updating-shard-range GET on a large container will typically take 0.8 seconds. Since the rate of those backend requests will be high (200 req/s or so) when cache misses, I feel exponential backoff will have less retries but could incur longer delays after first request caches the updating-shard-range? ideally, event notification will be great, but memcache doesn\u0027t provided that.","commit_id":"c2ded7e6c723660bd6175fda1180d97d8674f2cc"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"a6a59d8e442cb63d30a6673c398ae6801807a646","unresolved":true,"context_lines":[{"line_number":428,"context_line":" cache_key, cached_namespaces.bounds,"},{"line_number":429,"context_line":" time\u003dself.app.recheck_updating_shard_ranges)"},{"line_number":430,"context_line":" self._clear_memcached_namespaces_miss_lock("},{"line_number":431,"context_line":" memcache, cache_key)"},{"line_number":432,"context_line":" update_shard \u003d find_namespace(obj, shard_ranges or [])"},{"line_number":433,"context_line":" record_cache_op_metrics("},{"line_number":434,"context_line":" self.logger, \u0027shard_updating\u0027, cache_state, response)"}],"source_content_type":"text/x-python","patch_set":1,"id":"1b5e0c6f_3887e3fb","line":431,"updated":"2023-08-01 14:18:22.000000000","message":"i\u0027d rather see the clear lock in a finally - even with the short ttl we don\u0027t want to block other callers if we\u0027re not trying to fetch","commit_id":"c2ded7e6c723660bd6175fda1180d97d8674f2cc"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f30c62ac0176bd7a4d71ed142aa2769a91d48f87","unresolved":true,"context_lines":[{"line_number":428,"context_line":" cache_key, cached_namespaces.bounds,"},{"line_number":429,"context_line":" time\u003dself.app.recheck_updating_shard_ranges)"},{"line_number":430,"context_line":" self._clear_memcached_namespaces_miss_lock("},{"line_number":431,"context_line":" memcache, cache_key)"},{"line_number":432,"context_line":" update_shard \u003d find_namespace(obj, shard_ranges or [])"},{"line_number":433,"context_line":" record_cache_op_metrics("},{"line_number":434,"context_line":" self.logger, \u0027shard_updating\u0027, cache_state, response)"}],"source_content_type":"text/x-python","patch_set":1,"id":"25ea0be9_e60e6c2c","line":431,"in_reply_to":"1b5e0c6f_3887e3fb","updated":"2023-08-02 06:12:29.000000000","message":"good point, if the first request takes the lock and is very slow to fetch data from backend, all other requests would have to wait until TTL. but it\u0027s hard to clear lock in a finally, since set lock and clear lock are at different functions. maybe we can modify the design to allow a number of requests to fetch the backend, then the chance of this blocking issue happening will be very much reduced.","commit_id":"c2ded7e6c723660bd6175fda1180d97d8674f2cc"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":false,"context_lines":[{"line_number":428,"context_line":" cache_key, cached_namespaces.bounds,"},{"line_number":429,"context_line":" time\u003dself.app.recheck_updating_shard_ranges)"},{"line_number":430,"context_line":" self._clear_memcached_namespaces_miss_lock("},{"line_number":431,"context_line":" memcache, cache_key)"},{"line_number":432,"context_line":" update_shard \u003d find_namespace(obj, shard_ranges or [])"},{"line_number":433,"context_line":" record_cache_op_metrics("},{"line_number":434,"context_line":" self.logger, \u0027shard_updating\u0027, cache_state, response)"}],"source_content_type":"text/x-python","patch_set":1,"id":"ffb983bf_601042aa","line":431,"in_reply_to":"25ea0be9_e60e6c2c","updated":"2023-08-25 00:02:59.000000000","message":"Done","commit_id":"c2ded7e6c723660bd6175fda1180d97d8674f2cc"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":true,"context_lines":[{"line_number":360,"context_line":" :return: an instance of :class:`swift.common.utils.ShardRange`,"},{"line_number":361,"context_line":" or None if the update should go back to the root"},{"line_number":362,"context_line":" \"\"\""},{"line_number":363,"context_line":" if not self.app.recheck_updating_shard_ranges:"},{"line_number":364,"context_line":" # caching is disabled"},{"line_number":365,"context_line":" return self._get_update_shard_caching_disabled("},{"line_number":366,"context_line":" req, account, container, obj)"}],"source_content_type":"text/x-python","patch_set":2,"id":"207d3a3a_d7b7d13f","line":363,"updated":"2023-08-25 00:02:59.000000000","message":"we should also fall into this guard return if memcache is None (makes no sense to fetch whole shard list if we can\u0027t store in memcache, infocache hit is un-reliable). That would allow us to drop all the `if memcache` conditions littered in the functions from here on out.","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":360,"context_line":" :return: an instance of :class:`swift.common.utils.ShardRange`,"},{"line_number":361,"context_line":" or None if the update should go back to the root"},{"line_number":362,"context_line":" \"\"\""},{"line_number":363,"context_line":" if not self.app.recheck_updating_shard_ranges:"},{"line_number":364,"context_line":" # caching is disabled"},{"line_number":365,"context_line":" return self._get_update_shard_caching_disabled("},{"line_number":366,"context_line":" req, account, container, obj)"}],"source_content_type":"text/x-python","patch_set":2,"id":"250b47d0_6b7e1dba","line":363,"in_reply_to":"207d3a3a_d7b7d13f","updated":"2024-01-12 06:04:07.000000000","message":"Acknowledged","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":true,"context_lines":[{"line_number":378,"context_line":" namespace \u003d cached_namespaces.get_namespace(obj)"},{"line_number":379,"context_line":" update_shard \u003d ShardRange("},{"line_number":380,"context_line":" name\u003dnamespace.name, timestamp\u003d0, lower\u003dnamespace.lower,"},{"line_number":381,"context_line":" upper\u003dnamespace.upper)"},{"line_number":382,"context_line":" else:"},{"line_number":383,"context_line":" # pull full set of updating shard ranges from backend"},{"line_number":384,"context_line":" shard_ranges, response \u003d self._get_shard_ranges("}],"source_content_type":"text/x-python","patch_set":2,"id":"01373c11_a8befdce","line":381,"updated":"2023-08-25 00:02:59.000000000","message":"this code path doesn\u0027t use find_namespace, it creates a ShardRange object from a namespace it found in the NamespaceBoundList\n\nis this significantly different from using using find_namespace from shard_ranges list?","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":378,"context_line":" namespace \u003d cached_namespaces.get_namespace(obj)"},{"line_number":379,"context_line":" update_shard \u003d ShardRange("},{"line_number":380,"context_line":" name\u003dnamespace.name, timestamp\u003d0, lower\u003dnamespace.lower,"},{"line_number":381,"context_line":" upper\u003dnamespace.upper)"},{"line_number":382,"context_line":" else:"},{"line_number":383,"context_line":" # pull full set of updating shard ranges from backend"},{"line_number":384,"context_line":" shard_ranges, response \u003d self._get_shard_ranges("}],"source_content_type":"text/x-python","patch_set":2,"id":"2ae05deb_d6b4f015","line":381,"in_reply_to":"01373c11_a8befdce","updated":"2024-01-12 06:04:07.000000000","message":"not related to new patch anymore.","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":true,"context_lines":[{"line_number":396,"context_line":" memcache.set("},{"line_number":397,"context_line":" cache_key, cached_namespaces.bounds,"},{"line_number":398,"context_line":" time\u003dself.app.recheck_updating_shard_ranges)"},{"line_number":399,"context_line":" clear_memcached_get_global_lock(memcache, cache_key)"},{"line_number":400,"context_line":" update_shard \u003d find_namespace(obj, shard_ranges or [])"},{"line_number":401,"context_line":" record_cache_op_metrics("},{"line_number":402,"context_line":" self.logger, \u0027shard_updating\u0027, cache_state, response)"}],"source_content_type":"text/x-python","patch_set":2,"id":"c3440edc_46b9efed","line":399,"updated":"2023-08-25 00:02:59.000000000","message":"oh my, this is a whold different function from where we take the cooperative token, yuk.","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":396,"context_line":" memcache.set("},{"line_number":397,"context_line":" cache_key, cached_namespaces.bounds,"},{"line_number":398,"context_line":" time\u003dself.app.recheck_updating_shard_ranges)"},{"line_number":399,"context_line":" clear_memcached_get_global_lock(memcache, cache_key)"},{"line_number":400,"context_line":" update_shard \u003d find_namespace(obj, shard_ranges or [])"},{"line_number":401,"context_line":" record_cache_op_metrics("},{"line_number":402,"context_line":" self.logger, \u0027shard_updating\u0027, cache_state, response)"}],"source_content_type":"text/x-python","patch_set":2,"id":"d2e8250b_8dc0fed3","line":399,"in_reply_to":"c3440edc_46b9efed","updated":"2024-01-12 06:04:07.000000000","message":"Acknowledged","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"4f502421d065f04e818773f9ffbcde51e080125f","unresolved":true,"context_lines":[{"line_number":397,"context_line":" cache_key, cached_namespaces.bounds,"},{"line_number":398,"context_line":" time\u003dself.app.recheck_updating_shard_ranges)"},{"line_number":399,"context_line":" clear_memcached_get_global_lock(memcache, cache_key)"},{"line_number":400,"context_line":" update_shard \u003d find_namespace(obj, shard_ranges or [])"},{"line_number":401,"context_line":" record_cache_op_metrics("},{"line_number":402,"context_line":" self.logger, \u0027shard_updating\u0027, cache_state, response)"},{"line_number":403,"context_line":" return update_shard"}],"source_content_type":"text/x-python","patch_set":2,"id":"820d655d_6e0fd89b","line":400,"updated":"2023-08-25 00:02:59.000000000","message":"here we use the (potentially empty) shard_ranges list even tho we potentially have a cached_namespaces object? Does it even make sense to call find_namespace? We either HAVE cached_namespaces from the backend response or we\u0027re going to return None on backend error.","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":397,"context_line":" cache_key, cached_namespaces.bounds,"},{"line_number":398,"context_line":" time\u003dself.app.recheck_updating_shard_ranges)"},{"line_number":399,"context_line":" clear_memcached_get_global_lock(memcache, cache_key)"},{"line_number":400,"context_line":" update_shard \u003d find_namespace(obj, shard_ranges or [])"},{"line_number":401,"context_line":" record_cache_op_metrics("},{"line_number":402,"context_line":" self.logger, \u0027shard_updating\u0027, cache_state, response)"},{"line_number":403,"context_line":" return update_shard"}],"source_content_type":"text/x-python","patch_set":2,"id":"11e57a21_7e317dde","line":400,"in_reply_to":"820d655d_6e0fd89b","updated":"2024-01-12 06:04:07.000000000","message":"it\u0027s fixed in the proxy namespace patch: https://review.opendev.org/c/openstack/swift/+/895602/46/swift/proxy/controllers/obj.py#380\n\nI will rebase, since that patch is just merged.","commit_id":"0cd20beea81bdcbbdc207c75467f253703d57175"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"9af500b9230a35de9ed57d87146aa69ad63444f4","unresolved":true,"context_lines":[{"line_number":50,"context_line":" quorum_size, reiterate, close_if_possible, safe_json_loads, md5,"},{"line_number":51,"context_line":" ShardRange, cache_from_env, NamespaceBoundList,"},{"line_number":52,"context_line":" CooperativeIterator, memcache_get_with_global_lock,"},{"line_number":53,"context_line":" clear_memcached_get_global_lock, get_from_layered_stable_cache)"},{"line_number":54,"context_line":"from swift.common.bufferedhttp import http_connect"},{"line_number":55,"context_line":"from swift.common.constraints import check_metadata, check_object_creation"},{"line_number":56,"context_line":"from swift.common import constraints"}],"source_content_type":"text/x-python","patch_set":3,"id":"ce21c767_d05a2937","line":53,"updated":"2023-08-28 16:15:03.000000000","message":"It looks like I lost the call to clear_memccahe_get_global_lock","commit_id":"931b802516555363c91c0443eb73a1bef69f0e72"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":50,"context_line":" quorum_size, reiterate, close_if_possible, safe_json_loads, md5,"},{"line_number":51,"context_line":" ShardRange, cache_from_env, NamespaceBoundList,"},{"line_number":52,"context_line":" CooperativeIterator, memcache_get_with_global_lock,"},{"line_number":53,"context_line":" clear_memcached_get_global_lock, get_from_layered_stable_cache)"},{"line_number":54,"context_line":"from swift.common.bufferedhttp import http_connect"},{"line_number":55,"context_line":"from swift.common.constraints import check_metadata, check_object_creation"},{"line_number":56,"context_line":"from swift.common import constraints"}],"source_content_type":"text/x-python","patch_set":3,"id":"9c83e342_d7dfc2d0","line":53,"in_reply_to":"ce21c767_d05a2937","updated":"2024-01-12 06:04:07.000000000","message":"not related to new patch anymore.","commit_id":"931b802516555363c91c0443eb73a1bef69f0e72"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"9af500b9230a35de9ed57d87146aa69ad63444f4","unresolved":true,"context_lines":[{"line_number":307,"context_line":" namespaces \u003d memcache_get_with_global_lock("},{"line_number":308,"context_line":" memcache, cache_key,"},{"line_number":309,"context_line":" self.app.shard_ranges_cache_miss_lock_ttl,"},{"line_number":310,"context_line":" self.app.shard_ranges_cache_miss_sleep_interval)"},{"line_number":311,"context_line":" cache_state \u003d \u0027hit\u0027 if namespaces else \u0027miss\u0027"},{"line_number":312,"context_line":" except MemcacheConnectionError:"},{"line_number":313,"context_line":" namespaces \u003d None"}],"source_content_type":"text/x-python","patch_set":3,"id":"58ce170b_e38ebc36","line":310,"updated":"2023-08-28 16:15:03.000000000","message":"I don\u0027t think this method is actually used anymore","commit_id":"931b802516555363c91c0443eb73a1bef69f0e72"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":307,"context_line":" namespaces \u003d memcache_get_with_global_lock("},{"line_number":308,"context_line":" memcache, cache_key,"},{"line_number":309,"context_line":" self.app.shard_ranges_cache_miss_lock_ttl,"},{"line_number":310,"context_line":" self.app.shard_ranges_cache_miss_sleep_interval)"},{"line_number":311,"context_line":" cache_state \u003d \u0027hit\u0027 if namespaces else \u0027miss\u0027"},{"line_number":312,"context_line":" except MemcacheConnectionError:"},{"line_number":313,"context_line":" namespaces \u003d None"}],"source_content_type":"text/x-python","patch_set":3,"id":"196c61af_e019db3c","line":310,"in_reply_to":"58ce170b_e38ebc36","updated":"2024-01-12 06:04:07.000000000","message":"Acknowledged","commit_id":"931b802516555363c91c0443eb73a1bef69f0e72"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"9af500b9230a35de9ed57d87146aa69ad63444f4","unresolved":true,"context_lines":[{"line_number":405,"context_line":" nonlocal response"},{"line_number":406,"context_line":" shard_ranges, response \u003d self._get_shard_ranges("},{"line_number":407,"context_line":" req, account, container, states\u003d\u0027updating\u0027)"},{"line_number":408,"context_line":" return NamespaceBoundList.parse(shard_ranges)"},{"line_number":409,"context_line":""},{"line_number":410,"context_line":" cached_namespaces, cache_state \u003d get_from_layered_stable_cache("},{"line_number":411,"context_line":" req, cache_key, get_from_memcache, set_in_memcache,"}],"source_content_type":"text/x-python","patch_set":3,"id":"bd8bd508_e2c0bc88","line":408,"updated":"2023-08-28 16:15:03.000000000","message":"I think the ratelimiting lookup will actually end up in this function - we\u0027re not trying to slow down memcache lookups; we\u0027re trying to slowdown the backend","commit_id":"931b802516555363c91c0443eb73a1bef69f0e72"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":405,"context_line":" nonlocal response"},{"line_number":406,"context_line":" shard_ranges, response \u003d self._get_shard_ranges("},{"line_number":407,"context_line":" req, account, container, states\u003d\u0027updating\u0027)"},{"line_number":408,"context_line":" return NamespaceBoundList.parse(shard_ranges)"},{"line_number":409,"context_line":""},{"line_number":410,"context_line":" cached_namespaces, cache_state \u003d get_from_layered_stable_cache("},{"line_number":411,"context_line":" req, cache_key, get_from_memcache, set_in_memcache,"}],"source_content_type":"text/x-python","patch_set":3,"id":"8dc1c70d_61bd5d47","line":408,"in_reply_to":"bd8bd508_e2c0bc88","updated":"2024-01-12 06:04:07.000000000","message":"Acknowledged","commit_id":"931b802516555363c91c0443eb73a1bef69f0e72"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":329,"context_line":" # there will be only one shard range in the list if any"},{"line_number":330,"context_line":" return shard_ranges[0] if shard_ranges else None"},{"line_number":331,"context_line":""},{"line_number":332,"context_line":" class UpdatingShardCacheToken(CacheCooperativeToken):"},{"line_number":333,"context_line":" def do_fetch_backend(self, controller, req, account, container):"},{"line_number":334,"context_line":" # pull full set of updating shard ranges from backend"},{"line_number":335,"context_line":" ("}],"source_content_type":"text/x-python","patch_set":4,"id":"7a81a95a_6e97aa82","line":332,"updated":"2024-01-12 20:27:10.000000000","message":"is this BaseObjectController.UpdatingShardCacheToken - this class definition could move out from under this class, yeah?","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":329,"context_line":" # there will be only one shard range in the list if any"},{"line_number":330,"context_line":" return shard_ranges[0] if shard_ranges else None"},{"line_number":331,"context_line":""},{"line_number":332,"context_line":" class UpdatingShardCacheToken(CacheCooperativeToken):"},{"line_number":333,"context_line":" def do_fetch_backend(self, controller, req, account, container):"},{"line_number":334,"context_line":" # pull full set of updating shard ranges from backend"},{"line_number":335,"context_line":" ("}],"source_content_type":"text/x-python","patch_set":4,"id":"2e5f9b2d_e18c89a4","line":332,"in_reply_to":"7a81a95a_6e97aa82","updated":"2024-01-16 19:52:53.000000000","message":"this class is removed after I switch to functools.partial()","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":335,"context_line":" ("},{"line_number":336,"context_line":" shard_ranges,"},{"line_number":337,"context_line":" self.backend_response,"},{"line_number":338,"context_line":" ) \u003d controller._get_updating_shard_ranges(req, account, container)"},{"line_number":339,"context_line":" if shard_ranges:"},{"line_number":340,"context_line":" # only store the list of namespace lower bounds and names into"},{"line_number":341,"context_line":" # infocache and memcache."}],"source_content_type":"text/x-python","patch_set":4,"id":"cd213814_7fb9393d","line":338,"updated":"2024-01-12 20:27:10.000000000","message":"idk, I might just pass in the controller._get_updating_shard_ranges *method* at this point? it\u0027s the only thing I see we need from the controller object...\n\n... and hey, maybe at that point - why not a functools.partial()!?","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":335,"context_line":" ("},{"line_number":336,"context_line":" shard_ranges,"},{"line_number":337,"context_line":" self.backend_response,"},{"line_number":338,"context_line":" ) \u003d controller._get_updating_shard_ranges(req, account, container)"},{"line_number":339,"context_line":" if shard_ranges:"},{"line_number":340,"context_line":" # only store the list of namespace lower bounds and names into"},{"line_number":341,"context_line":" # infocache and memcache."}],"source_content_type":"text/x-python","patch_set":4,"id":"08541bc9_9290ed93","line":338,"in_reply_to":"cd213814_7fb9393d","updated":"2024-01-16 19:52:53.000000000","message":"Thank you very much for the pointer, functools.partial() is exactly what I tried to find in the first place! I will go switch to it.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":338,"context_line":" ) \u003d controller._get_updating_shard_ranges(req, account, container)"},{"line_number":339,"context_line":" if shard_ranges:"},{"line_number":340,"context_line":" # only store the list of namespace lower bounds and names into"},{"line_number":341,"context_line":" # infocache and memcache."},{"line_number":342,"context_line":" ns_bound_list \u003d NamespaceBoundList.parse(shard_ranges)"},{"line_number":343,"context_line":" return ns_bound_list.bounds"},{"line_number":344,"context_line":" return None"}],"source_content_type":"text/x-python","patch_set":4,"id":"7bc1ee8e_bba67662","line":341,"updated":"2024-01-12 20:27:10.000000000","message":"boo; isn\u0027t an infocache hit going to have to turn this back into a NamespaceBoundList?!","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":338,"context_line":" ) \u003d controller._get_updating_shard_ranges(req, account, container)"},{"line_number":339,"context_line":" if shard_ranges:"},{"line_number":340,"context_line":" # only store the list of namespace lower bounds and names into"},{"line_number":341,"context_line":" # infocache and memcache."},{"line_number":342,"context_line":" ns_bound_list \u003d NamespaceBoundList.parse(shard_ranges)"},{"line_number":343,"context_line":" return ns_bound_list.bounds"},{"line_number":344,"context_line":" return None"}],"source_content_type":"text/x-python","patch_set":4,"id":"33752e10_73c94da7","line":341,"in_reply_to":"7bc1ee8e_bba67662","updated":"2024-01-16 19:52:53.000000000","message":"Done","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":366,"context_line":" # caching is enabled, try to get from caches"},{"line_number":367,"context_line":" response \u003d None"},{"line_number":368,"context_line":" cache_key \u003d get_cache_key(account, container, shard\u003d\u0027updating\u0027)"},{"line_number":369,"context_line":" skip_chance \u003d self.app.container_updating_shard_ranges_skip_cache"},{"line_number":370,"context_line":" ns_bound_list, get_cache_state \u003d get_namespaces_from_cache("},{"line_number":371,"context_line":" req, cache_key, skip_chance)"},{"line_number":372,"context_line":" if ns_bound_list:"}],"source_content_type":"text/x-python","patch_set":4,"id":"412b4c1a_05c1c74a","line":369,"updated":"2024-01-12 20:27:10.000000000","message":"I think this cache_skip_cache pattern has been really successful and could similarlly benifit from a class level encapsulation. Once we have a SuperAwesomeCacheMagic() that basically only requires you implement the do_fetch_from_backend and maybe a couple of normalization methods for infocache/memcache - but then your \"get_from_cache_or_maybe_backend_but_like_SO_awesome()\" just work - we\u0027ll want to use it everywhere!","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":true,"context_lines":[{"line_number":366,"context_line":" # caching is enabled, try to get from caches"},{"line_number":367,"context_line":" response \u003d None"},{"line_number":368,"context_line":" cache_key \u003d get_cache_key(account, container, shard\u003d\u0027updating\u0027)"},{"line_number":369,"context_line":" skip_chance \u003d self.app.container_updating_shard_ranges_skip_cache"},{"line_number":370,"context_line":" ns_bound_list, get_cache_state \u003d get_namespaces_from_cache("},{"line_number":371,"context_line":" req, cache_key, skip_chance)"},{"line_number":372,"context_line":" if ns_bound_list:"}],"source_content_type":"text/x-python","patch_set":4,"id":"943f1473_060feb8c","line":369,"in_reply_to":"412b4c1a_05c1c74a","updated":"2024-01-16 19:52:53.000000000","message":"I wonder if some user cases won\u0027t need either infocache or memcache. E.g. for updating shard range cache, even we store it in infocache, it won\u0027t be reused. Maybe there will be other case which needs infocache but doeesn\u0027t need memcache.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"50192330b0eaa55928bcecef326b623b9faae22f","unresolved":false,"context_lines":[{"line_number":366,"context_line":" # caching is enabled, try to get from caches"},{"line_number":367,"context_line":" response \u003d None"},{"line_number":368,"context_line":" cache_key \u003d get_cache_key(account, container, shard\u003d\u0027updating\u0027)"},{"line_number":369,"context_line":" skip_chance \u003d self.app.container_updating_shard_ranges_skip_cache"},{"line_number":370,"context_line":" ns_bound_list, get_cache_state \u003d get_namespaces_from_cache("},{"line_number":371,"context_line":" req, cache_key, skip_chance)"},{"line_number":372,"context_line":" if ns_bound_list:"}],"source_content_type":"text/x-python","patch_set":4,"id":"237f2d07_c83e7c98","line":369,"in_reply_to":"943f1473_060feb8c","updated":"2024-09-25 16:09:59.000000000","message":"Done","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":384,"context_line":" infocache, memcache,"},{"line_number":385,"context_line":" cache_key, self.app.recheck_updating_shard_ranges,"},{"line_number":386,"context_line":" self.app.shard_ranges_cache_token_ttl,"},{"line_number":387,"context_line":" self.app.shard_ranges_cache_token_sleep_interval)"},{"line_number":388,"context_line":" shard_ranges \u003d cache_token.fetch_backend_with_token("},{"line_number":389,"context_line":" self, req, account, container)"},{"line_number":390,"context_line":" update_shard \u003d find_namespace(obj, shard_ranges or [])"}],"source_content_type":"text/x-python","patch_set":4,"id":"721e07bb_fcd8a903","line":387,"updated":"2024-01-12 20:27:10.000000000","message":"six params isn\u0027t TOO bad; I could see a super generic version of this taking a self.app.conf and a \"config prefix\" - so that ops could play with tuning the \"shard_updating__num_token\" and \"shard_updating__token_tll\" etc for different keys and we don\u0027t have to write as much plumbing. Maybe that\u0027s too dynamic; I guess it\u0027s a trade off on how much we want to leverage generic cooperative caching.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"9816d414da03efe9e797d1c07e9da26512571715","unresolved":false,"context_lines":[{"line_number":384,"context_line":" infocache, memcache,"},{"line_number":385,"context_line":" cache_key, self.app.recheck_updating_shard_ranges,"},{"line_number":386,"context_line":" self.app.shard_ranges_cache_token_ttl,"},{"line_number":387,"context_line":" self.app.shard_ranges_cache_token_sleep_interval)"},{"line_number":388,"context_line":" shard_ranges \u003d cache_token.fetch_backend_with_token("},{"line_number":389,"context_line":" self, req, account, container)"},{"line_number":390,"context_line":" update_shard \u003d find_namespace(obj, shard_ranges or [])"}],"source_content_type":"text/x-python","patch_set":4,"id":"5aecb14f_055bbc93","line":387,"in_reply_to":"721e07bb_fcd8a903","updated":"2024-09-25 21:54:01.000000000","message":"comment on previous old implementation.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":386,"context_line":" self.app.shard_ranges_cache_token_ttl,"},{"line_number":387,"context_line":" self.app.shard_ranges_cache_token_sleep_interval)"},{"line_number":388,"context_line":" shard_ranges \u003d cache_token.fetch_backend_with_token("},{"line_number":389,"context_line":" self, req, account, container)"},{"line_number":390,"context_line":" update_shard \u003d find_namespace(obj, shard_ranges or [])"},{"line_number":391,"context_line":" if cache_token.set_cache_state:"},{"line_number":392,"context_line":" record_cache_op_metrics("}],"source_content_type":"text/x-python","patch_set":4,"id":"9007e22d_04b011d8","line":389,"updated":"2024-01-12 20:27:10.000000000","message":"ok, so here shard_ranges is actually NamespaceBoundList - oh, or no - it\u0027s a list of tuples?","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":386,"context_line":" self.app.shard_ranges_cache_token_ttl,"},{"line_number":387,"context_line":" self.app.shard_ranges_cache_token_sleep_interval)"},{"line_number":388,"context_line":" shard_ranges \u003d cache_token.fetch_backend_with_token("},{"line_number":389,"context_line":" self, req, account, container)"},{"line_number":390,"context_line":" update_shard \u003d find_namespace(obj, shard_ranges or [])"},{"line_number":391,"context_line":" if cache_token.set_cache_state:"},{"line_number":392,"context_line":" record_cache_op_metrics("}],"source_content_type":"text/x-python","patch_set":4,"id":"c531b5c5_cc2a51f3","line":389,"in_reply_to":"9007e22d_04b011d8","updated":"2024-01-16 19:52:53.000000000","message":"it\u0027s a list of tuples to be set into memcache. we need this format since ``cache_token.fetch_backend_with_token`` doesn\u0027t know how to the actual format and conversion.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"7f012afef4cab2f37542d8e140c1cd0313fb38b4","unresolved":true,"context_lines":[{"line_number":401,"context_line":" \u0027Retrieved updating shards (%d shards) from cache instead \u0027"},{"line_number":402,"context_line":" \u0027of backend due to request coalescing by cooperative \u0027"},{"line_number":403,"context_line":" \u0027token for %s\u0027, len(shard_ranges), cache_key)"},{"line_number":404,"context_line":" response \u003d cache_token.backend_response"},{"line_number":405,"context_line":" record_cache_op_metrics("},{"line_number":406,"context_line":" self.logger, self.server_type.lower(), \u0027shard_updating\u0027,"},{"line_number":407,"context_line":" get_cache_state, response)"}],"source_content_type":"text/x-python","patch_set":4,"id":"8e588325_9e2c1b0b","line":404,"updated":"2024-01-12 20:27:10.000000000","message":"and we use the cache_token.backend_response for stats\n\nI think in the follow-on get namespaces patches we did some work to make all this cache resp logging and record_cache_op_metrics usage more consistent; extending the common base class hierarchy to encapsulate this might help us maintain that consistency.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"bae818be4cef99896893eb4754a796ca00def573","unresolved":false,"context_lines":[{"line_number":401,"context_line":" \u0027Retrieved updating shards (%d shards) from cache instead \u0027"},{"line_number":402,"context_line":" \u0027of backend due to request coalescing by cooperative \u0027"},{"line_number":403,"context_line":" \u0027token for %s\u0027, len(shard_ranges), cache_key)"},{"line_number":404,"context_line":" response \u003d cache_token.backend_response"},{"line_number":405,"context_line":" record_cache_op_metrics("},{"line_number":406,"context_line":" self.logger, self.server_type.lower(), \u0027shard_updating\u0027,"},{"line_number":407,"context_line":" get_cache_state, response)"}],"source_content_type":"text/x-python","patch_set":4,"id":"c3ed1364_de113923","line":404,"in_reply_to":"8e588325_9e2c1b0b","updated":"2024-01-16 19:52:53.000000000","message":"yes, I will rebase on the latest master which has merged get namespaces patches.","commit_id":"af8fa1c521e3d850349dc3a1bb9b6a158f70feb2"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"8c3edc7ef38309ffa5ba0a73112d3322f152bf98","unresolved":true,"context_lines":[{"line_number":405,"context_line":" self.logger.info("},{"line_number":406,"context_line":" \u0027Cached updating shards for %s (%d shards)\u0027,"},{"line_number":407,"context_line":" cache_key, len(bounds))"},{"line_number":408,"context_line":" if cache_token.req_served_from_cache:"},{"line_number":409,"context_line":" self.logger.info("},{"line_number":410,"context_line":" \u0027Retrieved updating shards (%d shards) from cache instead \u0027"},{"line_number":411,"context_line":" \u0027of backend due to request coalescing by cooperative \u0027"}],"source_content_type":"text/x-python","patch_set":5,"id":"8324f5b0_af5771f7","line":408,"updated":"2024-01-16 19:58:47.000000000","message":"I feel logging outside out CacheCooperativeToken is better, since we can add more context info into the log messages.","commit_id":"b8d41c9360dbd0604a5143f9175631ff25781d09"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"8007916b6068566286569c73cf26d26c3d40b414","unresolved":false,"context_lines":[{"line_number":405,"context_line":" self.logger.info("},{"line_number":406,"context_line":" \u0027Cached updating shards for %s (%d shards)\u0027,"},{"line_number":407,"context_line":" cache_key, len(bounds))"},{"line_number":408,"context_line":" if cache_token.req_served_from_cache:"},{"line_number":409,"context_line":" self.logger.info("},{"line_number":410,"context_line":" \u0027Retrieved updating shards (%d shards) from cache instead \u0027"},{"line_number":411,"context_line":" \u0027of backend due to request coalescing by cooperative \u0027"}],"source_content_type":"text/x-python","patch_set":5,"id":"65aecd88_dbb17884","line":408,"in_reply_to":"765deee7_2afb1828","updated":"2024-02-14 05:07:15.000000000","message":"Acknowledged","commit_id":"b8d41c9360dbd0604a5143f9175631ff25781d09"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"fad23fcc9a8765b6d96b3b325398b7ba3746e17a","unresolved":true,"context_lines":[{"line_number":405,"context_line":" self.logger.info("},{"line_number":406,"context_line":" \u0027Cached updating shards for %s (%d shards)\u0027,"},{"line_number":407,"context_line":" cache_key, len(bounds))"},{"line_number":408,"context_line":" if cache_token.req_served_from_cache:"},{"line_number":409,"context_line":" self.logger.info("},{"line_number":410,"context_line":" \u0027Retrieved updating shards (%d shards) from cache instead \u0027"},{"line_number":411,"context_line":" \u0027of backend due to request coalescing by cooperative \u0027"}],"source_content_type":"text/x-python","patch_set":5,"id":"765deee7_2afb1828","line":408,"in_reply_to":"8324f5b0_af5771f7","updated":"2024-01-22 16:38:52.000000000","message":"It doesn\u0027t seem right to me that CooperativeToken knows anything at all about how its caller want to track cache hits/misses for the application data. CooperativeToken should just take care of (not) granting the token.","commit_id":"b8d41c9360dbd0604a5143f9175631ff25781d09"}],"swift/proxy/server.py":[{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"a6a59d8e442cb63d30a6673c398ae6801807a646","unresolved":true,"context_lines":[{"line_number":251,"context_line":" conf.get(\u0027account_existence_skip_cache_pct\u0027, 0))"},{"line_number":252,"context_line":" self.shard_ranges_cache_miss_lock_ttl \u003d \\"},{"line_number":253,"context_line":" int(conf.get(\u0027shard_ranges_cache_miss_lock_ttl\u0027,"},{"line_number":254,"context_line":" DEFAULT_SHARD_RANGES_CACHE_MISS_LOCK_TTL))"},{"line_number":255,"context_line":" self.shard_ranges_cache_miss_sleep_interval \u003d \\"},{"line_number":256,"context_line":" float(conf.get(\u0027shard_ranges_cache_miss_sleep_interval\u0027,"},{"line_number":257,"context_line":" DEFAULT_SHARD_RANGES_CACHE_MISS_SLEEP_INTERVAL))"}],"source_content_type":"text/x-python","patch_set":1,"id":"c84f0d5d_5c4ef597","line":254,"updated":"2023-08-01 14:18:22.000000000","message":"configurability is normally a reasonable strategy for new unproven ideas - kudos","commit_id":"c2ded7e6c723660bd6175fda1180d97d8674f2cc"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"42b5baeed9f180e4c4e353e09d64a55207fded93","unresolved":false,"context_lines":[{"line_number":251,"context_line":" conf.get(\u0027account_existence_skip_cache_pct\u0027, 0))"},{"line_number":252,"context_line":" self.shard_ranges_cache_miss_lock_ttl \u003d \\"},{"line_number":253,"context_line":" int(conf.get(\u0027shard_ranges_cache_miss_lock_ttl\u0027,"},{"line_number":254,"context_line":" DEFAULT_SHARD_RANGES_CACHE_MISS_LOCK_TTL))"},{"line_number":255,"context_line":" self.shard_ranges_cache_miss_sleep_interval \u003d \\"},{"line_number":256,"context_line":" float(conf.get(\u0027shard_ranges_cache_miss_sleep_interval\u0027,"},{"line_number":257,"context_line":" DEFAULT_SHARD_RANGES_CACHE_MISS_SLEEP_INTERVAL))"}],"source_content_type":"text/x-python","patch_set":1,"id":"d0e3454c_35b2d525","line":254,"in_reply_to":"c84f0d5d_5c4ef597","updated":"2024-01-12 06:04:07.000000000","message":"Acknowledged","commit_id":"c2ded7e6c723660bd6175fda1180d97d8674f2cc"}],"test/unit/common/test_memcached.py":[{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"f0bb1bfd8815909cded335c731656367e5ef3f80","unresolved":true,"context_lines":[{"line_number":219,"context_line":" pass"},{"line_number":220,"context_line":""},{"line_number":221,"context_line":""},{"line_number":222,"context_line":"class TestableMemcacheRing(memcached.MemcacheRing):"},{"line_number":223,"context_line":""},{"line_number":224,"context_line":" def __init__(self, servers, **kwargs):"},{"line_number":225,"context_line":" self.inject_incr_error \u003d kwargs.pop(\u0027inject_incr_error\u0027, False)"}],"source_content_type":"text/x-python","patch_set":58,"id":"b7115129_8ae30579","line":222,"updated":"2025-09-05 17:20:22.000000000","message":"this seems to duplicate test.unit.FakeMemcache\n\nThe key difference is that this class extends MemcacheRing and forwards call to it...but I\u0027m not sure that it *needs* to? AFAICT every time this class is instantiated the caller then fixes up its ``_client_cache`` with a ``MockedMemcachePool`` - which is good because we don\u0027t want unit tests trying to send to an actual network address. But I can\u0027t find anywhere that uses this TestableMemcacheRing class and makes any assertion about what is passed to the memcached\u0027s.\n\n*I think I\u0027d prefer to see test.unit.FakeMemcache re-used, and improved as required, rather than this new helper class added.*\n\nHowever, if there\u0027s a good reason for this new class, then:\n\n1. I don\u0027t think it belongs in this module because it is not testing memcached; it is a helper for other test modules and so would be better in test.unit.__init__ adjacent to FakeMemcache (with some comment as to how it differs)\n\n2. the overriding of the memcached servers should be encapsulated and not left to the callers to do after instantiation, e.g.:\n\n```\ndiff --git a/test/unit/common/test_memcached.py b/test/unit/common/test_memcached.py\nindex 7721c15c9..f4f594175 100644\n--- a/test/unit/common/test_memcached.py\n+++ b/test/unit/common/test_memcached.py\n@@ -226,7 +226,10 @@ class TestableMemcacheRing(memcached.MemcacheRing):\n self.inject_set_error \u003d kwargs.pop(\u0027inject_set_error\u0027, False)\n self.inject_get_error \u003d kwargs.pop(\u0027inject_get_error\u0027, False)\n self.inject_del_error \u003d kwargs.pop(\u0027inject_del_error\u0027, False)\n- super(TestableMemcacheRing, self).__init__(servers, **kwargs)\n+ super(TestableMemcacheRing, self).__init__([\u00271.2.3.4:11211\u0027], **kwargs)\n+ mock_cache \u003d MockMemcached()\n+ self._client_cache[\u00271.2.3.4:11211\u0027] \u003d MockedMemcachePool(\n+ [(mock_cache, mock_cache)] * 2)\n self.set_calls \u003d []\n self.incr_calls \u003d []\n self.get_calls \u003d []\ndiff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py\nindex 2a13403f4..8f4f2a1b4 100644\n--- a/test/unit/common/test_utils.py\n+++ b/test/unit/common/test_utils.py\n@@ -2981,9 +2981,6 @@ class TestCooperativeCachePopulator(unittest.TestCase):\n self.statsd \u003d debug_labeled_statsd_client(conf)\n self.memcache \u003d TestableMemcacheRing(\n [\u00271.2.3.4:11211\u0027], logger\u003dself.logger)\n- mock_cache \u003d MockMemcached()\n- self.memcache._client_cache[\u00271.2.3.4:11211\u0027] \u003d MockedMemcachePool(\n- [(mock_cache, mock_cache)] * 2)\n self.infocache \u003d {}\n self.cache_key \u003d \"test_key\"\n self.token_key \u003d \"_cache_token/%s\" % self.cache_key\n```","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":219,"context_line":" pass"},{"line_number":220,"context_line":""},{"line_number":221,"context_line":""},{"line_number":222,"context_line":"class TestableMemcacheRing(memcached.MemcacheRing):"},{"line_number":223,"context_line":""},{"line_number":224,"context_line":" def __init__(self, servers, **kwargs):"},{"line_number":225,"context_line":" self.inject_incr_error \u003d kwargs.pop(\u0027inject_incr_error\u0027, False)"}],"source_content_type":"text/x-python","patch_set":58,"id":"06c2bf96_02c74b77","line":222,"in_reply_to":"b7115129_8ae30579","updated":"2025-09-22 05:47:05.000000000","message":"``test.unit.FakeMemcache`` is good and simple test helper when you just need basic memory based GET/SET, but it doesn\u0027t implement features like key/value TTL, and also it bypasses the internal handling of ``MemcacheRing`` which has implementation for ``incr`` to guarantee atomicity. IMHO, if we are going to write some concurrent test cases to test components who reply on those features, we need to use ``TestableMemcacheRing`` which uses real ``MemcacheRing`` object (and connected to ``MockMemcached`` who follows the memcache protocols). For example, ``TestCooperativeCachePopulator`` has several test cases which spin up green threads and issue requests concurrently.\n\nokay, I will move it to ``test.unit.__init__`` and make sure it\u0027s encapsulated well.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"f0bb1bfd8815909cded335c731656367e5ef3f80","unresolved":true,"context_lines":[{"line_number":222,"context_line":"class TestableMemcacheRing(memcached.MemcacheRing):"},{"line_number":223,"context_line":""},{"line_number":224,"context_line":" def __init__(self, servers, **kwargs):"},{"line_number":225,"context_line":" self.inject_incr_error \u003d kwargs.pop(\u0027inject_incr_error\u0027, False)"},{"line_number":226,"context_line":" self.inject_set_error \u003d kwargs.pop(\u0027inject_set_error\u0027, False)"},{"line_number":227,"context_line":" self.inject_get_error \u003d kwargs.pop(\u0027inject_get_error\u0027, False)"},{"line_number":228,"context_line":" self.inject_del_error \u003d kwargs.pop(\u0027inject_del_error\u0027, False)"}],"source_content_type":"text/x-python","patch_set":58,"id":"57863b05_f4a6ca1f","line":225,"updated":"2025-09-05 17:20:22.000000000","message":"it\u0027s unusual to see the ``kwargs`` handled this way rather than written in the method signature, and I can\u0027t see any reason for it. Although it works, it means that method preview in IDEs is lacking the documentation of the args. Can\u0027t we just have:\n\n```\ndiff --git a/test/unit/common/test_memcached.py b/test/unit/common/test_memcached.py\nindex 7721c15c9..0f6dedfcc 100644\n--- a/test/unit/common/test_memcached.py\n+++ b/test/unit/common/test_memcached.py\n@@ -221,11 +221,12 @@ class MockMemcached(object):\n\n class TestableMemcacheRing(memcached.MemcacheRing):\n\n- def __init__(self, servers, **kwargs):\n- self.inject_incr_error \u003d kwargs.pop(\u0027inject_incr_error\u0027, False)\n- self.inject_set_error \u003d kwargs.pop(\u0027inject_set_error\u0027, False)\n- self.inject_get_error \u003d kwargs.pop(\u0027inject_get_error\u0027, False)\n- self.inject_del_error \u003d kwargs.pop(\u0027inject_del_error\u0027, False)\n+ def __init__(self, servers, inject_incr_error\u003dNone, inject_set_error\u003dNone,\n+ inject_get_error\u003dNone, inject_delete_error\u003dNone, **kwargs):\n+ self.inject_incr_error \u003d inject_incr_error\n+ self.inject_set_error \u003d inject_set_error\n+ self.inject_get_error \u003d inject_get_error\n+ self.inject_del_error \u003d inject_delete_error\n super(TestableMemcacheRing, self).__init__(servers, **kwargs)\n self.set_calls \u003d []\n self.incr_calls \u003d []\n```","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":222,"context_line":"class TestableMemcacheRing(memcached.MemcacheRing):"},{"line_number":223,"context_line":""},{"line_number":224,"context_line":" def __init__(self, servers, **kwargs):"},{"line_number":225,"context_line":" self.inject_incr_error \u003d kwargs.pop(\u0027inject_incr_error\u0027, False)"},{"line_number":226,"context_line":" self.inject_set_error \u003d kwargs.pop(\u0027inject_set_error\u0027, False)"},{"line_number":227,"context_line":" self.inject_get_error \u003d kwargs.pop(\u0027inject_get_error\u0027, False)"},{"line_number":228,"context_line":" self.inject_del_error \u003d kwargs.pop(\u0027inject_del_error\u0027, False)"}],"source_content_type":"text/x-python","patch_set":58,"id":"636a658b_933cd30c","line":225,"in_reply_to":"57863b05_f4a6ca1f","updated":"2025-09-22 05:47:05.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"f0bb1bfd8815909cded335c731656367e5ef3f80","unresolved":true,"context_lines":[{"line_number":226,"context_line":" self.inject_set_error \u003d kwargs.pop(\u0027inject_set_error\u0027, False)"},{"line_number":227,"context_line":" self.inject_get_error \u003d kwargs.pop(\u0027inject_get_error\u0027, False)"},{"line_number":228,"context_line":" self.inject_del_error \u003d kwargs.pop(\u0027inject_del_error\u0027, False)"},{"line_number":229,"context_line":" super(TestableMemcacheRing, self).__init__(servers, **kwargs)"},{"line_number":230,"context_line":" self.set_calls \u003d []"},{"line_number":231,"context_line":" self.incr_calls \u003d []"},{"line_number":232,"context_line":" self.get_calls \u003d []"}],"source_content_type":"text/x-python","patch_set":58,"id":"79cc9988_e8bab53c","line":229,"updated":"2025-09-05 17:20:22.000000000","message":"the args for super() are not necessary in python3 (same comment at lines 239, 246, 256, 262 below)\nhttps://peps.python.org/pep-3135/","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":226,"context_line":" self.inject_set_error \u003d kwargs.pop(\u0027inject_set_error\u0027, False)"},{"line_number":227,"context_line":" self.inject_get_error \u003d kwargs.pop(\u0027inject_get_error\u0027, False)"},{"line_number":228,"context_line":" self.inject_del_error \u003d kwargs.pop(\u0027inject_del_error\u0027, False)"},{"line_number":229,"context_line":" super(TestableMemcacheRing, self).__init__(servers, **kwargs)"},{"line_number":230,"context_line":" self.set_calls \u003d []"},{"line_number":231,"context_line":" self.incr_calls \u003d []"},{"line_number":232,"context_line":" self.get_calls \u003d []"}],"source_content_type":"text/x-python","patch_set":58,"id":"2f1abbe7_bd71bc01","line":229,"in_reply_to":"79cc9988_e8bab53c","updated":"2025-09-22 05:47:05.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"f0bb1bfd8815909cded335c731656367e5ef3f80","unresolved":true,"context_lines":[{"line_number":242,"context_line":" min_compress_len\u003d0, raise_on_error\u003dFalse):"},{"line_number":243,"context_line":" self.set_calls.append((key, value, time))"},{"line_number":244,"context_line":" if self.inject_set_error:"},{"line_number":245,"context_line":" raise MemcacheConnectionError"},{"line_number":246,"context_line":" super(TestableMemcacheRing, self).set("},{"line_number":247,"context_line":" key, value, serialize, time, min_compress_len, raise_on_error)"},{"line_number":248,"context_line":""}],"source_content_type":"text/x-python","patch_set":58,"id":"040058dd_376f3947","line":245,"updated":"2025-09-05 17:20:22.000000000","message":"shoudn\u0027t this also be guarded by ``if raise_on_error`` same as line 252?","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":242,"context_line":" min_compress_len\u003d0, raise_on_error\u003dFalse):"},{"line_number":243,"context_line":" self.set_calls.append((key, value, time))"},{"line_number":244,"context_line":" if self.inject_set_error:"},{"line_number":245,"context_line":" raise MemcacheConnectionError"},{"line_number":246,"context_line":" super(TestableMemcacheRing, self).set("},{"line_number":247,"context_line":" key, value, serialize, time, min_compress_len, raise_on_error)"},{"line_number":248,"context_line":""}],"source_content_type":"text/x-python","patch_set":58,"id":"ffb107a8_5013dc6d","line":245,"in_reply_to":"040058dd_376f3947","updated":"2025-09-22 05:47:05.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"f0bb1bfd8815909cded335c731656367e5ef3f80","unresolved":true,"context_lines":[{"line_number":247,"context_line":" key, value, serialize, time, min_compress_len, raise_on_error)"},{"line_number":248,"context_line":""},{"line_number":249,"context_line":" def get(self, key, raise_on_error\u003dFalse):"},{"line_number":250,"context_line":" self.get_calls.append((key))"},{"line_number":251,"context_line":" if self.inject_get_error:"},{"line_number":252,"context_line":" if raise_on_error:"},{"line_number":253,"context_line":" raise MemcacheConnectionError"}],"source_content_type":"text/x-python","patch_set":58,"id":"a7881d5f_e3adc541","line":250,"range":{"start_line":250,"start_character":30,"end_line":250,"end_character":35},"updated":"2025-09-05 17:20:22.000000000","message":"these parentheses are redundant (same at line 259). If you want the value appended to the list to be a tuple then you need ``append((key,))``, but that doesn\u0027t seem necessary.\n\nThere\u0027s a bunch of places that assert get_calls and del_calls that similarly have redundant parentheses around the list items.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":247,"context_line":" key, value, serialize, time, min_compress_len, raise_on_error)"},{"line_number":248,"context_line":""},{"line_number":249,"context_line":" def get(self, key, raise_on_error\u003dFalse):"},{"line_number":250,"context_line":" self.get_calls.append((key))"},{"line_number":251,"context_line":" if self.inject_get_error:"},{"line_number":252,"context_line":" if raise_on_error:"},{"line_number":253,"context_line":" raise MemcacheConnectionError"}],"source_content_type":"text/x-python","patch_set":58,"id":"4e6044b7_60eb42a3","line":250,"range":{"start_line":250,"start_character":30,"end_line":250,"end_character":35},"in_reply_to":"a7881d5f_e3adc541","updated":"2025-09-22 05:47:05.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"f0bb1bfd8815909cded335c731656367e5ef3f80","unresolved":true,"context_lines":[{"line_number":255,"context_line":" return None"},{"line_number":256,"context_line":" return super(TestableMemcacheRing, self).get(key, raise_on_error)"},{"line_number":257,"context_line":""},{"line_number":258,"context_line":" def delete(self, key):"},{"line_number":259,"context_line":" self.del_calls.append((key))"},{"line_number":260,"context_line":" if self.inject_del_error:"},{"line_number":261,"context_line":" raise MemcacheConnectionError"}],"source_content_type":"text/x-python","patch_set":58,"id":"8dec3a1e_7d9dfcf7","line":258,"updated":"2025-09-05 17:20:22.000000000","message":"nit: the superclass method also has a ``server_key`` arg","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":255,"context_line":" return None"},{"line_number":256,"context_line":" return super(TestableMemcacheRing, self).get(key, raise_on_error)"},{"line_number":257,"context_line":""},{"line_number":258,"context_line":" def delete(self, key):"},{"line_number":259,"context_line":" self.del_calls.append((key))"},{"line_number":260,"context_line":" if self.inject_del_error:"},{"line_number":261,"context_line":" raise MemcacheConnectionError"}],"source_content_type":"text/x-python","patch_set":58,"id":"334436cd_b8df9ac2","line":258,"in_reply_to":"8dec3a1e_7d9dfcf7","updated":"2025-09-22 05:47:05.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"}],"test/unit/common/test_utils.py":[{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"3046ae999f7df493351b99a76f26e260bbe7815d","unresolved":true,"context_lines":[{"line_number":4670,"context_line":" self.memcache.get_calls[0], self.cache_key)"},{"line_number":4671,"context_line":" self.assertEqual(self.memcache.del_calls, [])"},{"line_number":4672,"context_line":""},{"line_number":4673,"context_line":" def test_concurrent_requests(self):"},{"line_number":4674,"context_line":" # Simulate \"num_processes\" concurrent threads, each of them issues a"},{"line_number":4675,"context_line":" # \"fetch_data\" request cooperatively."},{"line_number":4676,"context_line":" self.retry_interval \u003d 0.01"}],"source_content_type":"text/x-python","patch_set":17,"id":"99d3a134_477fa1ab","line":4673,"updated":"2024-03-28 19:44:16.000000000","message":"I am adding more test_concurrent_requests with regard to token ttl. will upload after addressing new comments.","commit_id":"8454ca5f2f38d73d4f3b19e264e8567abff9cf23"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"619e2c2898764382962905311bc930e42956e463","unresolved":false,"context_lines":[{"line_number":4670,"context_line":" self.memcache.get_calls[0], self.cache_key)"},{"line_number":4671,"context_line":" self.assertEqual(self.memcache.del_calls, [])"},{"line_number":4672,"context_line":""},{"line_number":4673,"context_line":" def test_concurrent_requests(self):"},{"line_number":4674,"context_line":" # Simulate \"num_processes\" concurrent threads, each of them issues a"},{"line_number":4675,"context_line":" # \"fetch_data\" request cooperatively."},{"line_number":4676,"context_line":" self.retry_interval \u003d 0.01"}],"source_content_type":"text/x-python","patch_set":17,"id":"4f1ef4d6_bf32346b","line":4673,"in_reply_to":"99d3a134_477fa1ab","updated":"2024-04-02 03:21:47.000000000","message":"Acknowledged","commit_id":"8454ca5f2f38d73d4f3b19e264e8567abff9cf23"},{"author":{"_account_id":7233,"name":"Matthew Oliver","email":"matt@oliver.net.au","username":"mattoliverau"},"change_message_id":"7b315613f27a881219393fdb50c21e051627af79","unresolved":true,"context_lines":[{"line_number":3804,"context_line":" self.token_key \u003d \"_cache_token/%s\" % self.cache_key"},{"line_number":3805,"context_line":" self.cache_ttl \u003d 60"},{"line_number":3806,"context_line":" self.do_fetch_backend \u003d mock.Mock("},{"line_number":3807,"context_line":" return_value\u003d(\"backend data\", \"response\"))"},{"line_number":3808,"context_line":" self.retry_interval \u003d 0.001"},{"line_number":3809,"context_line":""},{"line_number":3810,"context_line":" def test_populator_constructor(self):"}],"source_content_type":"text/x-python","patch_set":32,"id":"2e708e4d_3521d220","line":3807,"range":{"start_line":3807,"start_character":42,"end_line":3807,"end_character":52},"updated":"2024-07-08 07:46:50.000000000","message":"I thought this had to be more of a response like object. If it should, maybe we should have a response object or mock one here?","commit_id":"a2d4b75fb2e361d8ecdadbbc525129602760ec25"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"9816d414da03efe9e797d1c07e9da26512571715","unresolved":false,"context_lines":[{"line_number":3804,"context_line":" self.token_key \u003d \"_cache_token/%s\" % self.cache_key"},{"line_number":3805,"context_line":" self.cache_ttl \u003d 60"},{"line_number":3806,"context_line":" self.do_fetch_backend \u003d mock.Mock("},{"line_number":3807,"context_line":" return_value\u003d(\"backend data\", \"response\"))"},{"line_number":3808,"context_line":" self.retry_interval \u003d 0.001"},{"line_number":3809,"context_line":""},{"line_number":3810,"context_line":" def test_populator_constructor(self):"}],"source_content_type":"text/x-python","patch_set":32,"id":"76693f8d_1a2852fc","line":3807,"range":{"start_line":3807,"start_character":42,"end_line":3807,"end_character":52},"in_reply_to":"2e708e4d_3521d220","updated":"2024-09-25 21:54:01.000000000","message":"Done","commit_id":"a2d4b75fb2e361d8ecdadbbc525129602760ec25"},{"author":{"_account_id":7233,"name":"Matthew Oliver","email":"matt@oliver.net.au","username":"mattoliverau"},"change_message_id":"7b315613f27a881219393fdb50c21e051627af79","unresolved":true,"context_lines":[{"line_number":3892,"context_line":" )"},{"line_number":3893,"context_line":" self.assertEqual("},{"line_number":3894,"context_line":" self.memcache.set_calls,"},{"line_number":3895,"context_line":" [(self.cache_key, \"BACKEND DATA\", self.cache_ttl)]"},{"line_number":3896,"context_line":" )"},{"line_number":3897,"context_line":" self.assertEqual(self.memcache.del_calls, [(self.token_key)])"},{"line_number":3898,"context_line":""}],"source_content_type":"text/x-python","patch_set":32,"id":"713e65cc_1be3a68a","line":3895,"updated":"2024-07-08 07:46:50.000000000","message":"Nice!","commit_id":"a2d4b75fb2e361d8ecdadbbc525129602760ec25"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f3064c1509afef499bf100fa5ef7b516368dcf6c","unresolved":false,"context_lines":[{"line_number":3892,"context_line":" )"},{"line_number":3893,"context_line":" self.assertEqual("},{"line_number":3894,"context_line":" self.memcache.set_calls,"},{"line_number":3895,"context_line":" [(self.cache_key, \"BACKEND DATA\", self.cache_ttl)]"},{"line_number":3896,"context_line":" )"},{"line_number":3897,"context_line":" self.assertEqual(self.memcache.del_calls, [(self.token_key)])"},{"line_number":3898,"context_line":""}],"source_content_type":"text/x-python","patch_set":32,"id":"6b1d3659_dbf9243d","line":3895,"in_reply_to":"713e65cc_1be3a68a","updated":"2024-07-09 14:28:54.000000000","message":"Acknowledged","commit_id":"a2d4b75fb2e361d8ecdadbbc525129602760ec25"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"ffa3f065a7442ba639c1224c06eb0896f862e82e","unresolved":false,"context_lines":[{"line_number":4273,"context_line":" )"},{"line_number":4274,"context_line":" self.assertEqual(self.memcache.del_calls, [(self.token_key)] * 3)"},{"line_number":4275,"context_line":""},{"line_number":4276,"context_line":" def test_concurrent_requests_all_token_requests_fail(self):"},{"line_number":4277,"context_line":" # Simulate multiple concurrent threads issued into a cooperative token"},{"line_number":4278,"context_line":" # session, each thread will issue a \"fetch_data\" request cooperatively."},{"line_number":4279,"context_line":" # And the first three requests will acquire the token, but fail to get"}],"source_content_type":"text/x-python","patch_set":32,"id":"bc18ca01_34c60509","line":4276,"updated":"2024-07-03 17:57:11.000000000","message":"this test case was a little flaky, if multiple patches were submitted and all were running pipelines together, because it was sensitive to timings. Optimized to be less sensitive.","commit_id":"a2d4b75fb2e361d8ecdadbbc525129602760ec25"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"2b1e63489af1912155414850c1da6fd8497653a2","unresolved":false,"context_lines":[{"line_number":3992,"context_line":" self.retry_interval,"},{"line_number":3993,"context_line":" None,"},{"line_number":3994,"context_line":" None,"},{"line_number":3995,"context_line":" num_tokens_per_session"},{"line_number":3996,"context_line":" )"},{"line_number":3997,"context_line":" populator._token_ttl \u003d 10"},{"line_number":3998,"context_line":" data \u003d populator.fetch_data()"}],"source_content_type":"text/x-python","patch_set":34,"id":"45ef39b0_2535819e","line":3995,"updated":"2024-08-06 04:37:45.000000000","message":"added test for random value of ``num_tokens_per_session``, test passed with 200 runs.","commit_id":"3bd9fdb84152e89d758fbc6c934e7eef38203cd1"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"2b1e63489af1912155414850c1da6fd8497653a2","unresolved":false,"context_lines":[{"line_number":4052,"context_line":" self.retry_interval,"},{"line_number":4053,"context_line":" None,"},{"line_number":4054,"context_line":" None,"},{"line_number":4055,"context_line":" num_tokens_per_session"},{"line_number":4056,"context_line":" )"},{"line_number":4057,"context_line":" data \u003d populator.fetch_data()"},{"line_number":4058,"context_line":" self.assertEqual(data, \"backend data\")"}],"source_content_type":"text/x-python","patch_set":34,"id":"feaccefc_11d8e915","line":4055,"updated":"2024-08-06 04:37:45.000000000","message":"added test for random value of ``num_tokens_per_session``, test passed with 200 runs.","commit_id":"3bd9fdb84152e89d758fbc6c934e7eef38203cd1"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"2b1e63489af1912155414850c1da6fd8497653a2","unresolved":false,"context_lines":[{"line_number":4419,"context_line":" for i in range(16):"},{"line_number":4420,"context_line":" pool.spawn("},{"line_number":4421,"context_line":" worker_process,"},{"line_number":4422,"context_line":" random.uniform(self.retry_interval * 3,"},{"line_number":4423,"context_line":" self.retry_interval * 6),"},{"line_number":4424,"context_line":" self.retry_interval"},{"line_number":4425,"context_line":" )"}],"source_content_type":"text/x-python","patch_set":34,"id":"ac37343e_139e384f","line":4422,"updated":"2024-08-06 04:37:45.000000000","message":"this is fix for the test flaky. test passed with 400 runs.","commit_id":"3bd9fdb84152e89d758fbc6c934e7eef38203cd1"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"9b5f9df7ef8e676e7130812d1a8131291f80b2a1","unresolved":true,"context_lines":[{"line_number":2956,"context_line":" self.cache_key \u003d \"test_key\""},{"line_number":2957,"context_line":" self.token_key \u003d \"_cache_token/%s\" % self.cache_key"},{"line_number":2958,"context_line":" self.cache_ttl \u003d 60"},{"line_number":2959,"context_line":" self.retry_interval \u003d 0.001"},{"line_number":2960,"context_line":""},{"line_number":2961,"context_line":" class MockCachePopulator(CooperativeCachePopulator):"},{"line_number":2962,"context_line":" def do_fetch_backend(self):"}],"source_content_type":"text/x-python","patch_set":44,"id":"0a37e765_86c8a3bc","line":2959,"updated":"2025-05-05 21:32:09.000000000","message":"I could see some benefit to mocking time.sleep - mostly in making assertions about our exponential backoff and testing w/ larger token_ttl","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"fd079a543129d979551147dd455b511d4d553c6e","unresolved":false,"context_lines":[{"line_number":2956,"context_line":" self.cache_key \u003d \"test_key\""},{"line_number":2957,"context_line":" self.token_key \u003d \"_cache_token/%s\" % self.cache_key"},{"line_number":2958,"context_line":" self.cache_ttl \u003d 60"},{"line_number":2959,"context_line":" self.retry_interval \u003d 0.001"},{"line_number":2960,"context_line":""},{"line_number":2961,"context_line":" class MockCachePopulator(CooperativeCachePopulator):"},{"line_number":2962,"context_line":" def do_fetch_backend(self):"}],"source_content_type":"text/x-python","patch_set":44,"id":"d4bd8cdd_f842223e","line":2959,"in_reply_to":"0a37e765_86c8a3bc","updated":"2025-05-07 05:08:33.000000000","message":"Acknowledged","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"9b5f9df7ef8e676e7130812d1a8131291f80b2a1","unresolved":true,"context_lines":[{"line_number":3145,"context_line":" # and trying to fetch data from the Memcached until it succeeds."},{"line_number":3146,"context_line":" num_tokens_per_session \u003d random.randint(1, 3)"},{"line_number":3147,"context_line":" total_retries \u003d random.randint(1, 10)"},{"line_number":3148,"context_line":" retries \u003d [0]"},{"line_number":3149,"context_line":""},{"line_number":3150,"context_line":" class CustomizedCache(TestableMemcacheRing):"},{"line_number":3151,"context_line":" def get(self, key, raise_on_error\u003dFalse):"}],"source_content_type":"text/x-python","patch_set":44,"id":"cfea1456_712ae5d2","line":3148,"updated":"2025-05-05 21:32:09.000000000","message":"please use `nonlocal` instead of this ugly legacy-python cruft.","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"fd079a543129d979551147dd455b511d4d553c6e","unresolved":false,"context_lines":[{"line_number":3145,"context_line":" # and trying to fetch data from the Memcached until it succeeds."},{"line_number":3146,"context_line":" num_tokens_per_session \u003d random.randint(1, 3)"},{"line_number":3147,"context_line":" total_retries \u003d random.randint(1, 10)"},{"line_number":3148,"context_line":" retries \u003d [0]"},{"line_number":3149,"context_line":""},{"line_number":3150,"context_line":" class CustomizedCache(TestableMemcacheRing):"},{"line_number":3151,"context_line":" def get(self, key, raise_on_error\u003dFalse):"}],"source_content_type":"text/x-python","patch_set":44,"id":"b72bf345_c684af41","line":3148,"in_reply_to":"cfea1456_712ae5d2","updated":"2025-05-07 05:08:33.000000000","message":"Done","commit_id":"16d6894d66acef49f21b5783e22a1d545e24f7fd"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"bc33ca00177d71d1674f2f9caec1a06429b7cb19","unresolved":true,"context_lines":[{"line_number":3033,"context_line":" self.assertEqual(data, \"backend data\")"},{"line_number":3034,"context_line":" self.assertEqual(populator.backend_resp, self.backend_resp)"},{"line_number":3035,"context_line":" self.assertEqual(populator.set_cache_state, \"set\")"},{"line_number":3036,"context_line":" self.assertFalse(populator.token_acquired)"},{"line_number":3037,"context_line":" self.assertEqual(self.memcache.get_calls, [])"},{"line_number":3038,"context_line":" self.assertEqual(self.infocache[self.cache_key], \"backend data\")"},{"line_number":3039,"context_line":" self.assertEqual(self.memcache.incr_calls, [])"}],"source_content_type":"text/x-python","patch_set":51,"id":"3f905bbf_ae745d66","line":3036,"updated":"2025-05-13 14:20:39.000000000","message":"this is good to know; so it\u0027s better to think of this as a derivative of the \"waited 0s on memcache and fell out of the loop\" case than the \"every one is a token winner\" case.","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f48506f0c3b126f38254dce975af8aa00c0e085c","unresolved":false,"context_lines":[{"line_number":3033,"context_line":" self.assertEqual(data, \"backend data\")"},{"line_number":3034,"context_line":" self.assertEqual(populator.backend_resp, self.backend_resp)"},{"line_number":3035,"context_line":" self.assertEqual(populator.set_cache_state, \"set\")"},{"line_number":3036,"context_line":" self.assertFalse(populator.token_acquired)"},{"line_number":3037,"context_line":" self.assertEqual(self.memcache.get_calls, [])"},{"line_number":3038,"context_line":" self.assertEqual(self.infocache[self.cache_key], \"backend data\")"},{"line_number":3039,"context_line":" self.assertEqual(self.memcache.incr_calls, [])"}],"source_content_type":"text/x-python","patch_set":51,"id":"ca9eba93_54107677","line":3036,"in_reply_to":"3f905bbf_ae745d66","updated":"2025-05-30 14:35:31.000000000","message":"Acknowledged","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"bc33ca00177d71d1674f2f9caec1a06429b7cb19","unresolved":true,"context_lines":[{"line_number":3036,"context_line":" self.assertFalse(populator.token_acquired)"},{"line_number":3037,"context_line":" self.assertEqual(self.memcache.get_calls, [])"},{"line_number":3038,"context_line":" self.assertEqual(self.infocache[self.cache_key], \"backend data\")"},{"line_number":3039,"context_line":" self.assertEqual(self.memcache.incr_calls, [])"},{"line_number":3040,"context_line":" self.assertEqual("},{"line_number":3041,"context_line":" self.memcache.set_calls,"},{"line_number":3042,"context_line":" [(self.cache_key, \"backend data\", self.cache_ttl)]"}],"source_content_type":"text/x-python","patch_set":51,"id":"8f2faf24_5e0c5ef0","line":3039,"updated":"2025-05-13 14:20:39.000000000","message":"weird for me that this made a backend request and set the value in memcache but didn\u0027t increment any stats...","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f48506f0c3b126f38254dce975af8aa00c0e085c","unresolved":false,"context_lines":[{"line_number":3036,"context_line":" self.assertFalse(populator.token_acquired)"},{"line_number":3037,"context_line":" self.assertEqual(self.memcache.get_calls, [])"},{"line_number":3038,"context_line":" self.assertEqual(self.infocache[self.cache_key], \"backend data\")"},{"line_number":3039,"context_line":" self.assertEqual(self.memcache.incr_calls, [])"},{"line_number":3040,"context_line":" self.assertEqual("},{"line_number":3041,"context_line":" self.memcache.set_calls,"},{"line_number":3042,"context_line":" [(self.cache_key, \"backend data\", self.cache_ttl)]"}],"source_content_type":"text/x-python","patch_set":51,"id":"02cad0b9_8d6d8233","line":3039,"in_reply_to":"8f2faf24_5e0c5ef0","updated":"2025-05-30 14:35:31.000000000","message":"Done","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"bc33ca00177d71d1674f2f9caec1a06429b7cb19","unresolved":true,"context_lines":[{"line_number":3218,"context_line":" # list_of_expected_mock_sleep_calls"},{"line_number":3219,"context_line":" expected \u003d {"},{"line_number":3220,"context_line":" (1.0, 1): [mock.call(1.5), mock.call(3.0)],"},{"line_number":3221,"context_line":" (1.0, 2): [mock.call(1.5), mock.call(3.0), mock.call(6.0)],"},{"line_number":3222,"context_line":" (2.0, 1): [mock.call(3.0), mock.call(6.0)],"},{"line_number":3223,"context_line":" (2.0, 2): [mock.call(3.0), mock.call(6.0), mock.call(12.0)],"},{"line_number":3224,"context_line":" }"}],"source_content_type":"text/x-python","patch_set":51,"id":"9b437e94_64652335","line":3221,"updated":"2025-05-13 14:20:39.000000000","message":"so in this case the final retry will be after the final 10s ttl; but it doesn\u0027t matter b/c we don\u0027t look at the token counter after the initial increment - in fact the first window failed it\u0027s possible our memcache retries will catch the set from a second window.","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f48506f0c3b126f38254dce975af8aa00c0e085c","unresolved":false,"context_lines":[{"line_number":3218,"context_line":" # list_of_expected_mock_sleep_calls"},{"line_number":3219,"context_line":" expected \u003d {"},{"line_number":3220,"context_line":" (1.0, 1): [mock.call(1.5), mock.call(3.0)],"},{"line_number":3221,"context_line":" (1.0, 2): [mock.call(1.5), mock.call(3.0), mock.call(6.0)],"},{"line_number":3222,"context_line":" (2.0, 1): [mock.call(3.0), mock.call(6.0)],"},{"line_number":3223,"context_line":" (2.0, 2): [mock.call(3.0), mock.call(6.0), mock.call(12.0)],"},{"line_number":3224,"context_line":" }"}],"source_content_type":"text/x-python","patch_set":51,"id":"1b6a9433_8f39e591","line":3221,"in_reply_to":"9b437e94_64652335","updated":"2025-05-30 14:35:31.000000000","message":"Acknowledged","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"bc33ca00177d71d1674f2f9caec1a06429b7cb19","unresolved":true,"context_lines":[{"line_number":3220,"context_line":" (1.0, 1): [mock.call(1.5), mock.call(3.0)],"},{"line_number":3221,"context_line":" (1.0, 2): [mock.call(1.5), mock.call(3.0), mock.call(6.0)],"},{"line_number":3222,"context_line":" (2.0, 1): [mock.call(3.0), mock.call(6.0)],"},{"line_number":3223,"context_line":" (2.0, 2): [mock.call(3.0), mock.call(6.0), mock.call(12.0)],"},{"line_number":3224,"context_line":" }"},{"line_number":3225,"context_line":""},{"line_number":3226,"context_line":" for (avg_fetch_time, total_miss_retries), expected_sleep_calls in \\"}],"source_content_type":"text/x-python","patch_set":51,"id":"8823fb7d_6aff8ca6","line":3223,"updated":"2025-05-13 14:20:39.000000000","message":"these numbers all makes sense given the 1.5 start and 2x backoff.","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f48506f0c3b126f38254dce975af8aa00c0e085c","unresolved":false,"context_lines":[{"line_number":3220,"context_line":" (1.0, 1): [mock.call(1.5), mock.call(3.0)],"},{"line_number":3221,"context_line":" (1.0, 2): [mock.call(1.5), mock.call(3.0), mock.call(6.0)],"},{"line_number":3222,"context_line":" (2.0, 1): [mock.call(3.0), mock.call(6.0)],"},{"line_number":3223,"context_line":" (2.0, 2): [mock.call(3.0), mock.call(6.0), mock.call(12.0)],"},{"line_number":3224,"context_line":" }"},{"line_number":3225,"context_line":""},{"line_number":3226,"context_line":" for (avg_fetch_time, total_miss_retries), expected_sleep_calls in \\"}],"source_content_type":"text/x-python","patch_set":51,"id":"c870770b_e8107467","line":3223,"in_reply_to":"8823fb7d_6aff8ca6","updated":"2025-05-30 14:35:31.000000000","message":"Acknowledged","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"6cb4c00b5d866f0adfc367d233fc084dadc6126f","unresolved":true,"context_lines":[{"line_number":3331,"context_line":" self.memcache.set_calls,"},{"line_number":3332,"context_line":" [(self.cache_key, \"backend data\", self.cache_ttl)]"},{"line_number":3333,"context_line":" )"},{"line_number":3334,"context_line":" self.assertGreater(retries, 1)"},{"line_number":3335,"context_line":" self.assertEqual("},{"line_number":3336,"context_line":" self.memcache.get_calls, [(\u0027NOT_EXISTED_YET\u0027)] * retries)"},{"line_number":3337,"context_line":" self.assertEqual(self.memcache.del_calls, [])"}],"source_content_type":"text/x-python","patch_set":51,"id":"03f05145_e6456df8","line":3334,"updated":"2025-05-13 22:09:40.000000000","message":"why only \"greater\" than 1? Do we not know exactly how many it will be?\n\n```\ndiff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py\nindex 97dd233ac..08b29a105 100644\n--- a/test/unit/common/test_utils.py\n+++ b/test/unit/common/test_utils.py\n@@ -3277,7 +3277,7 @@ class TestCooperativeCachePopulator(unittest.TestCase):\n self.memcache.set_calls,\n [(self.cache_key, \"backend data\", self.cache_ttl)]\n )\n- self.assertGreater(retries, 1)\n+ self.assertEqual(retries, 3)\n self.assertEqual(\n self.memcache.get_calls, [(\u0027NOT_EXISTED_YET\u0027)] * retries)\n self.assertEqual(self.memcache.del_calls, [])\n@@ -3331,7 +3331,7 @@ class TestCooperativeCachePopulator(unittest.TestCase):\n self.memcache.set_calls,\n [(self.cache_key, \"backend data\", self.cache_ttl)]\n )\n- self.assertGreater(retries, 1)\n+ self.assertEqual(retries, 2)\n self.assertEqual(\n self.memcache.get_calls, [(\u0027NOT_EXISTED_YET\u0027)] * retries)\n self.assertEqual(self.memcache.del_calls, [])\n```\n\n^ does this work for you - I think it\u0027d be a good bit better.","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f48506f0c3b126f38254dce975af8aa00c0e085c","unresolved":false,"context_lines":[{"line_number":3331,"context_line":" self.memcache.set_calls,"},{"line_number":3332,"context_line":" [(self.cache_key, \"backend data\", self.cache_ttl)]"},{"line_number":3333,"context_line":" )"},{"line_number":3334,"context_line":" self.assertGreater(retries, 1)"},{"line_number":3335,"context_line":" self.assertEqual("},{"line_number":3336,"context_line":" self.memcache.get_calls, [(\u0027NOT_EXISTED_YET\u0027)] * retries)"},{"line_number":3337,"context_line":" self.assertEqual(self.memcache.del_calls, [])"}],"source_content_type":"text/x-python","patch_set":51,"id":"127ca844_fba564dd","line":3334,"in_reply_to":"03f05145_e6456df8","updated":"2025-05-30 14:35:31.000000000","message":"Done","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"6cb4c00b5d866f0adfc367d233fc084dadc6126f","unresolved":true,"context_lines":[{"line_number":3333,"context_line":" )"},{"line_number":3334,"context_line":" self.assertGreater(retries, 1)"},{"line_number":3335,"context_line":" self.assertEqual("},{"line_number":3336,"context_line":" self.memcache.get_calls, [(\u0027NOT_EXISTED_YET\u0027)] * retries)"},{"line_number":3337,"context_line":" self.assertEqual(self.memcache.del_calls, [])"},{"line_number":3338,"context_line":" stats \u003d self.logger.statsd_client.get_stats_counts()"},{"line_number":3339,"context_line":" self.assertEqual({\u0027token.test.lack_retries\u0027: 1,"}],"source_content_type":"text/x-python","patch_set":51,"id":"963d45fc_0dc5538b","line":3336,"updated":"2025-05-13 22:09:40.000000000","message":"is this just asserting that our mock was called however many times our mock was called?\n\nShouldn\u0027t we `assertEqual(expected_retries, retries)`","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"f48506f0c3b126f38254dce975af8aa00c0e085c","unresolved":false,"context_lines":[{"line_number":3333,"context_line":" )"},{"line_number":3334,"context_line":" self.assertGreater(retries, 1)"},{"line_number":3335,"context_line":" self.assertEqual("},{"line_number":3336,"context_line":" self.memcache.get_calls, [(\u0027NOT_EXISTED_YET\u0027)] * retries)"},{"line_number":3337,"context_line":" self.assertEqual(self.memcache.del_calls, [])"},{"line_number":3338,"context_line":" stats \u003d self.logger.statsd_client.get_stats_counts()"},{"line_number":3339,"context_line":" self.assertEqual({\u0027token.test.lack_retries\u0027: 1,"}],"source_content_type":"text/x-python","patch_set":51,"id":"81b1ea99_90534a1d","line":3336,"in_reply_to":"963d45fc_0dc5538b","updated":"2025-05-30 14:35:31.000000000","message":"changed to \n```\n self.assertEqual(\n self.memcache.get_calls, [(\u0027NOT_EXISTED_YET\u0027)] * 2)\n```","commit_id":"b5fd2a25492ff3421e6110948bff8a3c005deda9"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"f0bb1bfd8815909cded335c731656367e5ef3f80","unresolved":true,"context_lines":[{"line_number":2982,"context_line":" self.memcache \u003d TestableMemcacheRing("},{"line_number":2983,"context_line":" [\u00271.2.3.4:11211\u0027], logger\u003dself.logger)"},{"line_number":2984,"context_line":" mock_cache \u003d MockMemcached()"},{"line_number":2985,"context_line":" self.memcache._client_cache[\u00271.2.3.4:11211\u0027] \u003d MockedMemcachePool("},{"line_number":2986,"context_line":" [(mock_cache, mock_cache)] * 2)"},{"line_number":2987,"context_line":" self.infocache \u003d {}"},{"line_number":2988,"context_line":" self.cache_key \u003d \"test_key\""}],"source_content_type":"text/x-python","patch_set":58,"id":"6c713741_7ea2e9a3","line":2985,"updated":"2025-09-05 17:20:22.000000000","message":"this monkey patching *must* happen *every time* TestableMemcacheRing is used, so it should happen in ``TestableMemcacheRing.__init__``\n\nBut, see also my comment in test_memcached.py: I think we should be re-using/enhancing test.unit.FakeMemcache","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":2982,"context_line":" self.memcache \u003d TestableMemcacheRing("},{"line_number":2983,"context_line":" [\u00271.2.3.4:11211\u0027], logger\u003dself.logger)"},{"line_number":2984,"context_line":" mock_cache \u003d MockMemcached()"},{"line_number":2985,"context_line":" self.memcache._client_cache[\u00271.2.3.4:11211\u0027] \u003d MockedMemcachePool("},{"line_number":2986,"context_line":" [(mock_cache, mock_cache)] * 2)"},{"line_number":2987,"context_line":" self.infocache \u003d {}"},{"line_number":2988,"context_line":" self.cache_key \u003d \"test_key\""}],"source_content_type":"text/x-python","patch_set":58,"id":"b7dc7633_24fb6324","line":2985,"in_reply_to":"6c713741_7ea2e9a3","updated":"2025-09-22 05:47:05.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3028,"context_line":""},{"line_number":3029,"context_line":" def test_populator_constructor(self):"},{"line_number":3030,"context_line":" # Test the first request will acquire the token, fetch data from"},{"line_number":3031,"context_line":" # the backend and set it into Memcached."},{"line_number":3032,"context_line":" obj \u003d self.MockCachePopulator("},{"line_number":3033,"context_line":" MockApp(self.logger, self.statsd),"},{"line_number":3034,"context_line":" self.infocache, self.memcache,"}],"source_content_type":"text/x-python","patch_set":58,"id":"ba751337_a422866d","line":3031,"updated":"2025-09-08 15:33:27.000000000","message":"this comment doesn\u0027t apply to this test","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3028,"context_line":""},{"line_number":3029,"context_line":" def test_populator_constructor(self):"},{"line_number":3030,"context_line":" # Test the first request will acquire the token, fetch data from"},{"line_number":3031,"context_line":" # the backend and set it into Memcached."},{"line_number":3032,"context_line":" obj \u003d self.MockCachePopulator("},{"line_number":3033,"context_line":" MockApp(self.logger, self.statsd),"},{"line_number":3034,"context_line":" self.infocache, self.memcache,"}],"source_content_type":"text/x-python","patch_set":58,"id":"2a9d73cc_b9077a90","line":3031,"in_reply_to":"ba751337_a422866d","updated":"2025-09-22 05:47:05.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3055,"context_line":" self.assertEqual(obj.cache_decoder(42), 42)"},{"line_number":3056,"context_line":" self.assertIsNone(obj.set_cache_state)"},{"line_number":3057,"context_line":" self.assertFalse(obj.token_acquired)"},{"line_number":3058,"context_line":" self.assertIsNone(obj.backend_resp)"},{"line_number":3059,"context_line":""},{"line_number":3060,"context_line":" def test_populator_num_tokens_zero(self):"},{"line_number":3061,"context_line":" populator \u003d self.MockCachePopulator("}],"source_content_type":"text/x-python","patch_set":58,"id":"17556ca9_1dcd2eb6","line":3058,"updated":"2025-09-08 15:33:27.000000000","message":"is it deliberate that these last 3 attributes are part of the \"public\" interface of the class (i.e. not under-score prefixed)? So far it seems that they are merely implementation details, not used by any caller other than these tests.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3055,"context_line":" self.assertEqual(obj.cache_decoder(42), 42)"},{"line_number":3056,"context_line":" self.assertIsNone(obj.set_cache_state)"},{"line_number":3057,"context_line":" self.assertFalse(obj.token_acquired)"},{"line_number":3058,"context_line":" self.assertIsNone(obj.backend_resp)"},{"line_number":3059,"context_line":""},{"line_number":3060,"context_line":" def test_populator_num_tokens_zero(self):"},{"line_number":3061,"context_line":" populator \u003d self.MockCachePopulator("}],"source_content_type":"text/x-python","patch_set":58,"id":"fc729e21_d3a6f342","line":3058,"in_reply_to":"17556ca9_1dcd2eb6","updated":"2025-09-22 05:47:05.000000000","message":"yes, those 3 attributes are part of the \"public\" interface of the class and used by the callers, see: https://review.opendev.org/c/openstack/swift/+/908969","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3069,"context_line":" )"},{"line_number":3070,"context_line":" data \u003d populator.fetch_data()"},{"line_number":3071,"context_line":" self.assertEqual(data, \"backend data\")"},{"line_number":3072,"context_line":" self.assertEqual(populator.backend_resp, self.backend_resp)"},{"line_number":3073,"context_line":" self.assertEqual(populator.set_cache_state, \"set\")"},{"line_number":3074,"context_line":" self.assertFalse(populator.token_acquired)"},{"line_number":3075,"context_line":" self.assertEqual(self.memcache.get_calls, [])"}],"source_content_type":"text/x-python","patch_set":58,"id":"611215eb_16727e8f","line":3072,"updated":"2025-09-08 15:33:27.000000000","message":"again, this seems to be suggesting that backend_resp is part of the public interface, but the key thing to assert is that MockPopulator.do_fetch_backend is called and we get that data back without trying memcache","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3069,"context_line":" )"},{"line_number":3070,"context_line":" data \u003d populator.fetch_data()"},{"line_number":3071,"context_line":" self.assertEqual(data, \"backend data\")"},{"line_number":3072,"context_line":" self.assertEqual(populator.backend_resp, self.backend_resp)"},{"line_number":3073,"context_line":" self.assertEqual(populator.set_cache_state, \"set\")"},{"line_number":3074,"context_line":" self.assertFalse(populator.token_acquired)"},{"line_number":3075,"context_line":" self.assertEqual(self.memcache.get_calls, [])"}],"source_content_type":"text/x-python","patch_set":58,"id":"dbcabc0e_853524b9","line":3072,"in_reply_to":"611215eb_16727e8f","updated":"2025-09-22 05:47:05.000000000","message":"Acknowledged","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3072,"context_line":" self.assertEqual(populator.backend_resp, self.backend_resp)"},{"line_number":3073,"context_line":" self.assertEqual(populator.set_cache_state, \"set\")"},{"line_number":3074,"context_line":" self.assertFalse(populator.token_acquired)"},{"line_number":3075,"context_line":" self.assertEqual(self.memcache.get_calls, [])"},{"line_number":3076,"context_line":" self.assertEqual(self.infocache[self.cache_key], \"backend data\")"},{"line_number":3077,"context_line":" self.assertEqual(self.memcache.incr_calls, [])"},{"line_number":3078,"context_line":" self.assertEqual("}],"source_content_type":"text/x-python","patch_set":58,"id":"b2395351_4a6980af","line":3075,"range":{"start_line":3075,"start_character":25,"end_line":3075,"end_character":52},"updated":"2025-09-08 15:33:27.000000000","message":"oh! so this highlights that if num_tokens is zero then we don\u0027t ever try to get data from memcache: is that a configuration that we *ever* want in real life? Or is the pattern that the caller should have tried memcache for data before even instantiating a CoperativeCachePopulator? If so we should make sure the docstring makes that clear","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3072,"context_line":" self.assertEqual(populator.backend_resp, self.backend_resp)"},{"line_number":3073,"context_line":" self.assertEqual(populator.set_cache_state, \"set\")"},{"line_number":3074,"context_line":" self.assertFalse(populator.token_acquired)"},{"line_number":3075,"context_line":" self.assertEqual(self.memcache.get_calls, [])"},{"line_number":3076,"context_line":" self.assertEqual(self.infocache[self.cache_key], \"backend data\")"},{"line_number":3077,"context_line":" self.assertEqual(self.memcache.incr_calls, [])"},{"line_number":3078,"context_line":" self.assertEqual("}],"source_content_type":"text/x-python","patch_set":58,"id":"a13e8715_1edecb14","line":3075,"range":{"start_line":3075,"start_character":25,"end_line":3075,"end_character":52},"in_reply_to":"b2395351_4a6980af","updated":"2025-09-22 05:47:05.000000000","message":"yes, it\u0027s in the docstring: https://review.opendev.org/c/openstack/swift/+/890174/58/swift/common/utils/__init__.py#1546\n\n```\n0 means no cooperative token is used.\n```","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3086,"context_line":" \u0027resource\u0027: \u0027updating_shard\u0027,"},{"line_number":3087,"context_line":" \u0027event\u0027: \u0027backend_reqs\u0027,"},{"line_number":3088,"context_line":" \u0027token\u0027: \u0027disabled\u0027,"},{"line_number":3089,"context_line":" \u0027status\u0027: 200,"},{"line_number":3090,"context_line":" }.items())): 1,"},{"line_number":3091,"context_line":" }, stats)"},{"line_number":3092,"context_line":""}],"source_content_type":"text/x-python","patch_set":58,"id":"18558bd7_272c437e","line":3089,"updated":"2025-09-08 15:33:27.000000000","message":"no ``set_cache_state`` label ?","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3086,"context_line":" \u0027resource\u0027: \u0027updating_shard\u0027,"},{"line_number":3087,"context_line":" \u0027event\u0027: \u0027backend_reqs\u0027,"},{"line_number":3088,"context_line":" \u0027token\u0027: \u0027disabled\u0027,"},{"line_number":3089,"context_line":" \u0027status\u0027: 200,"},{"line_number":3090,"context_line":" }.items())): 1,"},{"line_number":3091,"context_line":" }, stats)"},{"line_number":3092,"context_line":""}],"source_content_type":"text/x-python","patch_set":58,"id":"5b5b6c85_8b5c60cb","line":3089,"in_reply_to":"18558bd7_272c437e","updated":"2025-09-22 05:47:05.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3087,"context_line":" \u0027event\u0027: \u0027backend_reqs\u0027,"},{"line_number":3088,"context_line":" \u0027token\u0027: \u0027disabled\u0027,"},{"line_number":3089,"context_line":" \u0027status\u0027: 200,"},{"line_number":3090,"context_line":" }.items())): 1,"},{"line_number":3091,"context_line":" }, stats)"},{"line_number":3092,"context_line":""},{"line_number":3093,"context_line":" def test_first_request_with_token(self):"}],"source_content_type":"text/x-python","patch_set":58,"id":"21104952_48fe92f5","line":3090,"range":{"start_line":3090,"start_character":13,"end_line":3090,"end_character":21},"updated":"2025-09-08 15:33:27.000000000","message":"isn\u0027t it unnecessarily verbose to write this as a dict and then transform to a set?\nvs\n\n```\n self.assertEqual({\n (\u0027swift_token\u0027, frozenset((\n (\u0027resource\u0027, \u0027updating_shard\u0027),\n (\u0027event\u0027, \u0027backend_reqs\u0027),\n (\u0027token\u0027, \u0027disabled\u0027),\n (\u0027status\u0027, 200)),\n )): 1,\n }, stats)\n\n```","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3087,"context_line":" \u0027event\u0027: \u0027backend_reqs\u0027,"},{"line_number":3088,"context_line":" \u0027token\u0027: \u0027disabled\u0027,"},{"line_number":3089,"context_line":" \u0027status\u0027: 200,"},{"line_number":3090,"context_line":" }.items())): 1,"},{"line_number":3091,"context_line":" }, stats)"},{"line_number":3092,"context_line":""},{"line_number":3093,"context_line":" def test_first_request_with_token(self):"}],"source_content_type":"text/x-python","patch_set":58,"id":"4733674f_23871cf8","line":3090,"range":{"start_line":3090,"start_character":13,"end_line":3090,"end_character":21},"in_reply_to":"21104952_48fe92f5","updated":"2025-09-22 05:47:05.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3234,"context_line":""},{"line_number":3235,"context_line":" class CustomizedCache(TestableMemcacheRing):"},{"line_number":3236,"context_line":" def get(self, key, raise_on_error\u003dFalse):"},{"line_number":3237,"context_line":" nonlocal retries"},{"line_number":3238,"context_line":" retries +\u003d 1"},{"line_number":3239,"context_line":" if retries \u003c\u003d total_miss_retries:"},{"line_number":3240,"context_line":" value \u003d super(CustomizedCache, self).get("}],"source_content_type":"text/x-python","patch_set":58,"id":"dbc10f3e_eb3db76d","line":3237,"range":{"start_line":3237,"start_character":20,"end_line":3237,"end_character":36},"updated":"2025-09-08 15:33:27.000000000","message":"this could be an attribute of CustomizedCache, BUT it also seems to duplicate the count of get calls that TestableMemcacheRing is maintaining\n\n```\ndiff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py\nindex 2a13403f4..e128a6b4f 100644\n--- a/test/unit/common/test_utils.py\n+++ b/test/unit/common/test_utils.py\n@@ -3230,13 +3230,10 @@ class TestCooperativeCachePopulator(unittest.TestCase):\n self.logger.statsd_client.clear()\n self.statsd.clear()\n num_tokens_per_session \u003d random.randint(1, 3)\n- retries \u003d 0\n \n class CustomizedCache(TestableMemcacheRing):\n def get(self, key, raise_on_error\u003dFalse):\n- nonlocal retries\n- retries +\u003d 1\n- if retries \u003c\u003d total_miss_retries:\n+ if len(self.get_calls) \u003c\u003d total_miss_retries -1:\n value \u003d super(CustomizedCache, self).get(\n \"NOT_EXISTED_YET\")\n return value\n@@ -3282,6 +3279,7 @@ class TestCooperativeCachePopulator(unittest.TestCase):\n [(self.token_key, 1, populator._token_ttl)]\n )\n self.assertEqual(self.memcache.set_calls, [])\n+ retries \u003d len(self.memcache.get_calls)\n self.assertEqual(retries, total_miss_retries + 1)\n self.assertEqual(\n self.memcache.get_calls,\n\n```","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3234,"context_line":""},{"line_number":3235,"context_line":" class CustomizedCache(TestableMemcacheRing):"},{"line_number":3236,"context_line":" def get(self, key, raise_on_error\u003dFalse):"},{"line_number":3237,"context_line":" nonlocal retries"},{"line_number":3238,"context_line":" retries +\u003d 1"},{"line_number":3239,"context_line":" if retries \u003c\u003d total_miss_retries:"},{"line_number":3240,"context_line":" value \u003d super(CustomizedCache, self).get("}],"source_content_type":"text/x-python","patch_set":58,"id":"2e19d7d1_a69a6179","line":3237,"range":{"start_line":3237,"start_character":20,"end_line":3237,"end_character":36},"in_reply_to":"dbc10f3e_eb3db76d","updated":"2025-09-22 05:47:05.000000000","message":"that\u0027s true, but I prefer using ``retries`` which has slightly better code readability.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3237,"context_line":" nonlocal retries"},{"line_number":3238,"context_line":" retries +\u003d 1"},{"line_number":3239,"context_line":" if retries \u003c\u003d total_miss_retries:"},{"line_number":3240,"context_line":" value \u003d super(CustomizedCache, self).get("},{"line_number":3241,"context_line":" \"NOT_EXISTED_YET\")"},{"line_number":3242,"context_line":" return value"},{"line_number":3243,"context_line":" else:"}],"source_content_type":"text/x-python","patch_set":58,"id":"b335b010_80f9cc0e","line":3240,"range":{"start_line":3240,"start_character":38,"end_line":3240,"end_character":59},"updated":"2025-09-08 15:33:27.000000000","message":"args are not needed for ``super``","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3237,"context_line":" nonlocal retries"},{"line_number":3238,"context_line":" retries +\u003d 1"},{"line_number":3239,"context_line":" if retries \u003c\u003d total_miss_retries:"},{"line_number":3240,"context_line":" value \u003d super(CustomizedCache, self).get("},{"line_number":3241,"context_line":" \"NOT_EXISTED_YET\")"},{"line_number":3242,"context_line":" return value"},{"line_number":3243,"context_line":" else:"}],"source_content_type":"text/x-python","patch_set":58,"id":"fb6ccd43_350de8cb","line":3240,"range":{"start_line":3240,"start_character":38,"end_line":3240,"end_character":59},"in_reply_to":"b335b010_80f9cc0e","updated":"2025-09-22 05:47:05.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3239,"context_line":" if retries \u003c\u003d total_miss_retries:"},{"line_number":3240,"context_line":" value \u003d super(CustomizedCache, self).get("},{"line_number":3241,"context_line":" \"NOT_EXISTED_YET\")"},{"line_number":3242,"context_line":" return value"},{"line_number":3243,"context_line":" else:"},{"line_number":3244,"context_line":" return super(CustomizedCache, self).get(key)"},{"line_number":3245,"context_line":""}],"source_content_type":"text/x-python","patch_set":58,"id":"4ca3ba8d_c8aefcec","line":3242,"updated":"2025-09-08 15:33:27.000000000","message":"IIUC the idea is to return None for the first few get\u0027s, but it is achieved by changing the key to a key that doesn\u0027t exist and the forwarding to the memcached - why not just return None here?","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3239,"context_line":" if retries \u003c\u003d total_miss_retries:"},{"line_number":3240,"context_line":" value \u003d super(CustomizedCache, self).get("},{"line_number":3241,"context_line":" \"NOT_EXISTED_YET\")"},{"line_number":3242,"context_line":" return value"},{"line_number":3243,"context_line":" else:"},{"line_number":3244,"context_line":" return super(CustomizedCache, self).get(key)"},{"line_number":3245,"context_line":""}],"source_content_type":"text/x-python","patch_set":58,"id":"fce0a8fd_c3794f0e","line":3242,"in_reply_to":"4ca3ba8d_c8aefcec","updated":"2025-09-22 05:47:05.000000000","message":"that\u0027s true, I guess I wanted the test case to be as much as close to the prod as possible, since I am using a real MemcacheRing object on top of a mocked memcache instance.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3269,"context_line":" \u0027resource\u0027: \"test\","},{"line_number":3270,"context_line":" }"},{"line_number":3271,"context_line":" )"},{"line_number":3272,"context_line":" populator._token_ttl \u003d 10"},{"line_number":3273,"context_line":" with mock.patch.object(utils.eventlet, \u0027sleep\u0027) as mock_sleep:"},{"line_number":3274,"context_line":" data \u003d populator.fetch_data()"},{"line_number":3275,"context_line":" self.assertEqual(expected_sleep_calls, mock_sleep.call_args_list)"}],"source_content_type":"text/x-python","patch_set":58,"id":"12b91ea3_1672767b","line":3272,"updated":"2025-09-08 15:33:27.000000000","message":"this never happens in real life, so seem to be a distortion of the unit being tested - is it necessary? ( I removed the line and tests passed)","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3269,"context_line":" \u0027resource\u0027: \"test\","},{"line_number":3270,"context_line":" }"},{"line_number":3271,"context_line":" )"},{"line_number":3272,"context_line":" populator._token_ttl \u003d 10"},{"line_number":3273,"context_line":" with mock.patch.object(utils.eventlet, \u0027sleep\u0027) as mock_sleep:"},{"line_number":3274,"context_line":" data \u003d populator.fetch_data()"},{"line_number":3275,"context_line":" self.assertEqual(expected_sleep_calls, mock_sleep.call_args_list)"}],"source_content_type":"text/x-python","patch_set":58,"id":"9056d0d5_b9a13c17","line":3272,"in_reply_to":"12b91ea3_1672767b","updated":"2025-09-22 05:47:05.000000000","message":"okay, got it reduced.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3293,"context_line":" (\u0027swift_token\u0027, frozenset({"},{"line_number":3294,"context_line":" \u0027resource\u0027: \"test\","},{"line_number":3295,"context_line":" \u0027event\u0027: \u0027cache_served\u0027,"},{"line_number":3296,"context_line":" \u0027token\u0027: \u0027no_token\u0027,"},{"line_number":3297,"context_line":" \u0027lack_retries\u0027: False,"},{"line_number":3298,"context_line":" }.items())): 1,"},{"line_number":3299,"context_line":" }, stats)"}],"source_content_type":"text/x-python","patch_set":58,"id":"14c5bf06_39f00197","line":3296,"updated":"2025-09-08 15:33:27.000000000","message":"I don\u0027t think we ever get ``event\u003dcache_served`` and ``token\u003dwith_token``, so is token label unnecessary here?","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":true,"context_lines":[{"line_number":3293,"context_line":" (\u0027swift_token\u0027, frozenset({"},{"line_number":3294,"context_line":" \u0027resource\u0027: \"test\","},{"line_number":3295,"context_line":" \u0027event\u0027: \u0027cache_served\u0027,"},{"line_number":3296,"context_line":" \u0027token\u0027: \u0027no_token\u0027,"},{"line_number":3297,"context_line":" \u0027lack_retries\u0027: False,"},{"line_number":3298,"context_line":" }.items())): 1,"},{"line_number":3299,"context_line":" }, stats)"}],"source_content_type":"text/x-python","patch_set":58,"id":"d8554c93_a11beed3","line":3296,"in_reply_to":"14c5bf06_39f00197","updated":"2025-09-22 05:47:05.000000000","message":"when ``event\u003dcache_served``, ``token`` label should be ``no_token``, but probably we should still keep ``token`` label for consistency? also SRE has been parsing the current format already.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"c8ddb65a54a3410032c70039e6a73a7738b32128","unresolved":true,"context_lines":[{"line_number":3293,"context_line":" (\u0027swift_token\u0027, frozenset({"},{"line_number":3294,"context_line":" \u0027resource\u0027: \"test\","},{"line_number":3295,"context_line":" \u0027event\u0027: \u0027cache_served\u0027,"},{"line_number":3296,"context_line":" \u0027token\u0027: \u0027no_token\u0027,"},{"line_number":3297,"context_line":" \u0027lack_retries\u0027: False,"},{"line_number":3298,"context_line":" }.items())): 1,"},{"line_number":3299,"context_line":" }, stats)"}],"source_content_type":"text/x-python","patch_set":58,"id":"6d0411a2_6ec83eca","line":3296,"in_reply_to":"5eb41373_6dcecb53","updated":"2025-09-30 18:25:34.000000000","message":"when ``event\u003dcache_served``, token label should be ``no_token``. However if there is no obvious performance or latency impact to those labelled metrics emitted, I\u0027d like to keep the token label for this case: we can use it to debug possible problems in production. E.g. if we see metrics with both ``token\u003dwith_token`` and ``event\u003dcache_served``, then something is wrong.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"612655d3eb285f86ae1aa4e296980f7f239fa23f","unresolved":true,"context_lines":[{"line_number":3293,"context_line":" (\u0027swift_token\u0027, frozenset({"},{"line_number":3294,"context_line":" \u0027resource\u0027: \"test\","},{"line_number":3295,"context_line":" \u0027event\u0027: \u0027cache_served\u0027,"},{"line_number":3296,"context_line":" \u0027token\u0027: \u0027no_token\u0027,"},{"line_number":3297,"context_line":" \u0027lack_retries\u0027: False,"},{"line_number":3298,"context_line":" }.items())): 1,"},{"line_number":3299,"context_line":" }, stats)"}],"source_content_type":"text/x-python","patch_set":58,"id":"5eb41373_6dcecb53","line":3296,"in_reply_to":"d8554c93_a11beed3","updated":"2025-09-29 21:14:23.000000000","message":"\u003e is token label unnecessary here\n\nI might say \"redundant\"\n\nAs best I can tell in prod we\u0027ll see only:\n\n * event: backend_reqs \u0026 token\u003dwith_token\n * event: cache_served \u0026 token\u003dno_token\n\nthe \"token\" key is strictly used to distinguish details on an `event:backend_reqs`\n\n (\u0027token\u0027, \u0027no_token\u0027),\n (\u0027token\u0027, \u0027disabled\u0027),\n (\u0027token\u0027, \u0027error\u0027),\n \n^ all of these are bad w/ `event:backend_reqs`; so if you wanted to look for `swift_coop_cache{token!\u003d\"with_token\"}` it\u0027s might be handy if `event\u003dcache_served` was automatically excluded; but w/e","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3308,"context_line":" (2.0, 2): [mock.call(3.0), mock.call(6.0), mock.call(12.0)],"},{"line_number":3309,"context_line":" }"},{"line_number":3310,"context_line":""},{"line_number":3311,"context_line":" for (avg_fetch_time, total_miss_retries), expected_sleep_calls in \\"},{"line_number":3312,"context_line":" expected.items():"},{"line_number":3313,"context_line":" test_fetch_data("},{"line_number":3314,"context_line":" avg_fetch_time, total_miss_retries, expected_sleep_calls)"}],"source_content_type":"text/x-python","patch_set":58,"id":"9347cee3_209f1710","line":3311,"updated":"2025-09-08 15:33:27.000000000","message":"please either use subtest, or spell out each scenario as a separate call to test_fetch_data. That way, if a scenario fails you immediately know which one it was.\n\ni.e.:\n```\n test_fetch_data(1.0, 1, [mock.call(1.5), mock.call(3.0)])\n ```\n \n it\u0027ll also be less lines of code","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3308,"context_line":" (2.0, 2): [mock.call(3.0), mock.call(6.0), mock.call(12.0)],"},{"line_number":3309,"context_line":" }"},{"line_number":3310,"context_line":""},{"line_number":3311,"context_line":" for (avg_fetch_time, total_miss_retries), expected_sleep_calls in \\"},{"line_number":3312,"context_line":" expected.items():"},{"line_number":3313,"context_line":" test_fetch_data("},{"line_number":3314,"context_line":" avg_fetch_time, total_miss_retries, expected_sleep_calls)"}],"source_content_type":"text/x-python","patch_set":58,"id":"b504681d_e1a394e0","line":3311,"in_reply_to":"9347cee3_209f1710","updated":"2025-09-22 05:47:05.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3315,"context_line":""},{"line_number":3316,"context_line":" def test_fetch_data_cache_miss_without_token(self):"},{"line_number":3317,"context_line":" # Test the request which doesn\u0027t acquire the token, then keep sleeping"},{"line_number":3318,"context_line":" # and trying to fetch data from the Memcached, but enventually all"},{"line_number":3319,"context_line":" # retries exhausted with cache misses."},{"line_number":3320,"context_line":" num_tokens_per_session \u003d random.randint(1, 3)"},{"line_number":3321,"context_line":" retries \u003d 0"}],"source_content_type":"text/x-python","patch_set":58,"id":"2db996ac_95f1f204","line":3318,"range":{"start_line":3318,"start_character":59,"end_line":3318,"end_character":70},"updated":"2025-09-08 15:33:27.000000000","message":"typo: eventually","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3315,"context_line":""},{"line_number":3316,"context_line":" def test_fetch_data_cache_miss_without_token(self):"},{"line_number":3317,"context_line":" # Test the request which doesn\u0027t acquire the token, then keep sleeping"},{"line_number":3318,"context_line":" # and trying to fetch data from the Memcached, but enventually all"},{"line_number":3319,"context_line":" # retries exhausted with cache misses."},{"line_number":3320,"context_line":" num_tokens_per_session \u003d random.randint(1, 3)"},{"line_number":3321,"context_line":" retries \u003d 0"}],"source_content_type":"text/x-python","patch_set":58,"id":"983ec1bd_7584cfc8","line":3318,"range":{"start_line":3318,"start_character":59,"end_line":3318,"end_character":70},"in_reply_to":"2db996ac_95f1f204","updated":"2025-09-22 05:47:05.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3323,"context_line":" class CustomizedCache(TestableMemcacheRing):"},{"line_number":3324,"context_line":" def get(self, key, raise_on_error\u003dFalse):"},{"line_number":3325,"context_line":" nonlocal retries"},{"line_number":3326,"context_line":" retries +\u003d 1"},{"line_number":3327,"context_line":" return super(CustomizedCache, self).get(\"NOT_EXISTED_YET\")"},{"line_number":3328,"context_line":""},{"line_number":3329,"context_line":" self.memcache \u003d CustomizedCache([\u00271.2.3.4:11211\u0027], logger\u003dself.logger)"}],"source_content_type":"text/x-python","patch_set":58,"id":"29684550_8ec9a46d","line":3326,"updated":"2025-09-08 15:33:27.000000000","message":"ditto comment at line 3237","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3323,"context_line":" class CustomizedCache(TestableMemcacheRing):"},{"line_number":3324,"context_line":" def get(self, key, raise_on_error\u003dFalse):"},{"line_number":3325,"context_line":" nonlocal retries"},{"line_number":3326,"context_line":" retries +\u003d 1"},{"line_number":3327,"context_line":" return super(CustomizedCache, self).get(\"NOT_EXISTED_YET\")"},{"line_number":3328,"context_line":""},{"line_number":3329,"context_line":" self.memcache \u003d CustomizedCache([\u00271.2.3.4:11211\u0027], logger\u003dself.logger)"}],"source_content_type":"text/x-python","patch_set":58,"id":"9aaef876_b79dc738","line":3326,"in_reply_to":"29684550_8ec9a46d","updated":"2025-09-22 05:47:05.000000000","message":"Acknowledged","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3335,"context_line":" total_requests \u003d self.memcache.incr("},{"line_number":3336,"context_line":" self.token_key, delta\u003dnum_tokens_per_session, time\u003d10)"},{"line_number":3337,"context_line":" self.assertEqual(total_requests, num_tokens_per_session)"},{"line_number":3338,"context_line":" self.memcache.set(self.cache_key, [1, 2, 3])"},{"line_number":3339,"context_line":" # Test the request without a token"},{"line_number":3340,"context_line":" self.memcache.incr_calls \u003d []"},{"line_number":3341,"context_line":" self.memcache.set_calls \u003d []"}],"source_content_type":"text/x-python","patch_set":58,"id":"47583173_ca6cc75a","line":3338,"range":{"start_line":3338,"start_character":42,"end_line":3338,"end_character":51},"updated":"2025-09-08 15:33:27.000000000","message":"I\u0027m curious why this list was chosen as the sample data? it\u0027s just that \u00273\u0027 is very significant in the context of the tokens, but this list has nothing to do with the tokens, correct? Could it be something more obvious like \"cached data\" (cf \"backend data\" in other tests) ;-)","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3335,"context_line":" total_requests \u003d self.memcache.incr("},{"line_number":3336,"context_line":" self.token_key, delta\u003dnum_tokens_per_session, time\u003d10)"},{"line_number":3337,"context_line":" self.assertEqual(total_requests, num_tokens_per_session)"},{"line_number":3338,"context_line":" self.memcache.set(self.cache_key, [1, 2, 3])"},{"line_number":3339,"context_line":" # Test the request without a token"},{"line_number":3340,"context_line":" self.memcache.incr_calls \u003d []"},{"line_number":3341,"context_line":" self.memcache.set_calls \u003d []"}],"source_content_type":"text/x-python","patch_set":58,"id":"2cf6f8cb_40829661","line":3338,"range":{"start_line":3338,"start_character":42,"end_line":3338,"end_character":51},"in_reply_to":"47583173_ca6cc75a","updated":"2025-09-22 05:47:05.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3414,"context_line":" labels\u003d{"},{"line_number":3415,"context_line":" \u0027resource\u0027: \"test\","},{"line_number":3416,"context_line":" }"},{"line_number":3417,"context_line":" )"},{"line_number":3418,"context_line":" with patch(\u0027time.time\u0027, ) as mock_time:"},{"line_number":3419,"context_line":" mock_time.side_effect \u003d itertools.count(4000.99, 1.0)"},{"line_number":3420,"context_line":" data \u003d populator.fetch_data()"}],"source_content_type":"text/x-python","patch_set":58,"id":"bb71ae16_17f3c6be","line":3417,"updated":"2025-09-08 15:33:27.000000000","message":"I\u0027m seeing these 10 lines repeated a lot - they could probably be moved to setUp to provide a self.populator instance","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3414,"context_line":" labels\u003d{"},{"line_number":3415,"context_line":" \u0027resource\u0027: \"test\","},{"line_number":3416,"context_line":" }"},{"line_number":3417,"context_line":" )"},{"line_number":3418,"context_line":" with patch(\u0027time.time\u0027, ) as mock_time:"},{"line_number":3419,"context_line":" mock_time.side_effect \u003d itertools.count(4000.99, 1.0)"},{"line_number":3420,"context_line":" data \u003d populator.fetch_data()"}],"source_content_type":"text/x-python","patch_set":58,"id":"9f526116_09893b07","line":3417,"in_reply_to":"bb71ae16_17f3c6be","updated":"2025-09-22 05:47:05.000000000","message":"there are some differences, especially the value of infocache or memcache.\nI refactored the code to be more dense and use less line of code.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3487,"context_line":" \u0027resource\u0027: \"test\","},{"line_number":3488,"context_line":" \u0027event\u0027: \u0027backend_reqs\u0027,"},{"line_number":3489,"context_line":" \u0027token\u0027: \u0027no_token\u0027,"},{"line_number":3490,"context_line":" \u0027set_cache_state\u0027: \u0027set\u0027,"},{"line_number":3491,"context_line":" \u0027status\u0027: 200,"},{"line_number":3492,"context_line":" }.items())): 1,"},{"line_number":3493,"context_line":" }, stats)"}],"source_content_type":"text/x-python","patch_set":58,"id":"6a66bf30_63b698e2","line":3490,"updated":"2025-09-08 15:33:27.000000000","message":"not ``inc_error`` ?!\n\napart from ``lack_retries`` this looks identical to the previous test","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3487,"context_line":" \u0027resource\u0027: \"test\","},{"line_number":3488,"context_line":" \u0027event\u0027: \u0027backend_reqs\u0027,"},{"line_number":3489,"context_line":" \u0027token\u0027: \u0027no_token\u0027,"},{"line_number":3490,"context_line":" \u0027set_cache_state\u0027: \u0027set\u0027,"},{"line_number":3491,"context_line":" \u0027status\u0027: 200,"},{"line_number":3492,"context_line":" }.items())): 1,"},{"line_number":3493,"context_line":" }, stats)"}],"source_content_type":"text/x-python","patch_set":58,"id":"218fe632_d2b31346","line":3490,"in_reply_to":"6a66bf30_63b698e2","updated":"2025-09-22 05:47:05.000000000","message":"I removed ``inc_error``, ``\u0027token\u0027`` is ``error`` now.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3533,"context_line":" \u0027resource\u0027: \"test\","},{"line_number":3534,"context_line":" \u0027event\u0027: \u0027backend_reqs\u0027,"},{"line_number":3535,"context_line":" \u0027token\u0027: \u0027with_token\u0027,"},{"line_number":3536,"context_line":" \u0027set_cache_state\u0027: \u0027set_error\u0027,"},{"line_number":3537,"context_line":" \u0027status\u0027: 200,"},{"line_number":3538,"context_line":" }.items())): 1,"},{"line_number":3539,"context_line":" }, stats)"}],"source_content_type":"text/x-python","patch_set":58,"id":"67668a7f_a52f881c","line":3536,"range":{"start_line":3536,"start_character":16,"end_line":3536,"end_character":46},"updated":"2025-09-08 15:33:27.000000000","message":"nit: ``set_cache_state \u003d set_error`` seems unnecessarily verbose vs ``set_cache_state \u003d error`` - I don\u0027t think there\u0027s ever any other kind of error.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3533,"context_line":" \u0027resource\u0027: \"test\","},{"line_number":3534,"context_line":" \u0027event\u0027: \u0027backend_reqs\u0027,"},{"line_number":3535,"context_line":" \u0027token\u0027: \u0027with_token\u0027,"},{"line_number":3536,"context_line":" \u0027set_cache_state\u0027: \u0027set_error\u0027,"},{"line_number":3537,"context_line":" \u0027status\u0027: 200,"},{"line_number":3538,"context_line":" }.items())): 1,"},{"line_number":3539,"context_line":" }, stats)"}],"source_content_type":"text/x-python","patch_set":58,"id":"a817f576_b4a582ed","line":3536,"range":{"start_line":3536,"start_character":16,"end_line":3536,"end_character":46},"in_reply_to":"67668a7f_a52f881c","updated":"2025-09-22 05:47:05.000000000","message":"it\u0027s also true, however SRE has been parsing ``set_error`` on prod, let\u0027s continue to use it then.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3538,"context_line":" }.items())): 1,"},{"line_number":3539,"context_line":" }, stats)"},{"line_number":3540,"context_line":""},{"line_number":3541,"context_line":" def test_get_token_data_set_connection_errors(self):"},{"line_number":3542,"context_line":" # Test the request which couldn\u0027t acquire the token due to memcached"},{"line_number":3543,"context_line":" # connection error, and then couldn\u0027t set the backend data into cache"},{"line_number":3544,"context_line":" # due to memcached connection error too."}],"source_content_type":"text/x-python","patch_set":58,"id":"3128e1b5_3368b988","line":3541,"range":{"start_line":3541,"start_character":13,"end_line":3541,"end_character":22},"updated":"2025-09-08 15:33:27.000000000","message":"\"get_token\" seems wrong - this scenario does not get the token","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3538,"context_line":" }.items())): 1,"},{"line_number":3539,"context_line":" }, stats)"},{"line_number":3540,"context_line":""},{"line_number":3541,"context_line":" def test_get_token_data_set_connection_errors(self):"},{"line_number":3542,"context_line":" # Test the request which couldn\u0027t acquire the token due to memcached"},{"line_number":3543,"context_line":" # connection error, and then couldn\u0027t set the backend data into cache"},{"line_number":3544,"context_line":" # due to memcached connection error too."}],"source_content_type":"text/x-python","patch_set":58,"id":"00786097_375653e4","line":3541,"range":{"start_line":3541,"start_character":13,"end_line":3541,"end_character":22},"in_reply_to":"3128e1b5_3368b988","updated":"2025-09-22 05:47:05.000000000","message":"Acknowledged","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3587,"context_line":""},{"line_number":3588,"context_line":" def test_fetch_data_from_cache_connection_error(self):"},{"line_number":3589,"context_line":" # Test the request which doesn\u0027t acquire the token, then keep sleeping"},{"line_number":3590,"context_line":" # and trying to fetch data from the Memcached, but enventually all"},{"line_number":3591,"context_line":" # retries exhausted with memcached connection errors."},{"line_number":3592,"context_line":" self.memcache \u003d TestableMemcacheRing("},{"line_number":3593,"context_line":" [\u00271.2.3.4:11211\u0027], logger\u003dself.logger, inject_get_error\u003dTrue)"}],"source_content_type":"text/x-python","patch_set":58,"id":"dd676712_0051ec1a","line":3590,"range":{"start_line":3590,"start_character":59,"end_line":3590,"end_character":70},"updated":"2025-09-08 15:33:27.000000000","message":"typo: eventually","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3587,"context_line":""},{"line_number":3588,"context_line":" def test_fetch_data_from_cache_connection_error(self):"},{"line_number":3589,"context_line":" # Test the request which doesn\u0027t acquire the token, then keep sleeping"},{"line_number":3590,"context_line":" # and trying to fetch data from the Memcached, but enventually all"},{"line_number":3591,"context_line":" # retries exhausted with memcached connection errors."},{"line_number":3592,"context_line":" self.memcache \u003d TestableMemcacheRing("},{"line_number":3593,"context_line":" [\u00271.2.3.4:11211\u0027], logger\u003dself.logger, inject_get_error\u003dTrue)"}],"source_content_type":"text/x-python","patch_set":58,"id":"1c15e4ce_a65dbdc0","line":3590,"range":{"start_line":3590,"start_character":59,"end_line":3590,"end_character":70},"in_reply_to":"dd676712_0051ec1a","updated":"2025-09-22 05:47:05.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"95a9e84609d112ff3f1872999da44a9f9e66b17f","unresolved":true,"context_lines":[{"line_number":3639,"context_line":" \u0027set_cache_state\u0027: \u0027set\u0027,"},{"line_number":3640,"context_line":" \u0027status\u0027: 200,"},{"line_number":3641,"context_line":" }.items())): 1,"},{"line_number":3642,"context_line":" }, stats)"},{"line_number":3643,"context_line":""},{"line_number":3644,"context_line":" def test_concurrent_requests(self):"},{"line_number":3645,"context_line":" # Simulate multiple concurrent threads, each of them issues a"}],"source_content_type":"text/x-python","patch_set":58,"id":"4b948dd9_ba15a07b","line":3642,"updated":"2025-09-08 15:33:27.000000000","message":"this is the exact same set of labels as test_fetch_data_cache_miss_without_token - I think we ought to have some distinction for this case where memcache gets are failing rather than missing","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"10f897d87faa2a035331daf6011424bc796d7cfc","unresolved":false,"context_lines":[{"line_number":3639,"context_line":" \u0027set_cache_state\u0027: \u0027set\u0027,"},{"line_number":3640,"context_line":" \u0027status\u0027: 200,"},{"line_number":3641,"context_line":" }.items())): 1,"},{"line_number":3642,"context_line":" }, stats)"},{"line_number":3643,"context_line":""},{"line_number":3644,"context_line":" def test_concurrent_requests(self):"},{"line_number":3645,"context_line":" # Simulate multiple concurrent threads, each of them issues a"}],"source_content_type":"text/x-python","patch_set":58,"id":"c7a9c5f2_1932290d","line":3642,"in_reply_to":"2d0c1936_d1377f18","updated":"2025-09-26 18:29:20.000000000","message":"Acknowledged","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":true,"context_lines":[{"line_number":3639,"context_line":" \u0027set_cache_state\u0027: \u0027set\u0027,"},{"line_number":3640,"context_line":" \u0027status\u0027: 200,"},{"line_number":3641,"context_line":" }.items())): 1,"},{"line_number":3642,"context_line":" }, stats)"},{"line_number":3643,"context_line":""},{"line_number":3644,"context_line":" def test_concurrent_requests(self):"},{"line_number":3645,"context_line":" # Simulate multiple concurrent threads, each of them issues a"}],"source_content_type":"text/x-python","patch_set":58,"id":"e993d835_8c34b40c","line":3642,"in_reply_to":"4b948dd9_ba15a07b","updated":"2025-09-22 05:47:05.000000000","message":"the labels are the same, but other metrics panels will be able to see the cache connection errors which cache misses won\u0027t see.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"f3c176a0b2b9e61da8a19a7513ee2e08b18fc30b","unresolved":true,"context_lines":[{"line_number":3639,"context_line":" \u0027set_cache_state\u0027: \u0027set\u0027,"},{"line_number":3640,"context_line":" \u0027status\u0027: 200,"},{"line_number":3641,"context_line":" }.items())): 1,"},{"line_number":3642,"context_line":" }, stats)"},{"line_number":3643,"context_line":""},{"line_number":3644,"context_line":" def test_concurrent_requests(self):"},{"line_number":3645,"context_line":" # Simulate multiple concurrent threads, each of them issues a"}],"source_content_type":"text/x-python","patch_set":58,"id":"2d0c1936_d1377f18","line":3642,"in_reply_to":"e993d835_8c34b40c","updated":"2025-09-25 22:35:07.000000000","message":"\u003e ought to have some distinction for this case where memcache gets are failing rather than missing\n\nthis was really more of a decision of our memcache client, but we are starting to walk back on it a little - callers need to opt-in:\n\n```\n cache_data \u003d self._memcache.get(\n self._cache_key, raise_on_error\u003dFalse)\n```\n\n... should be possible to add later if needed.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"7eb3d31e196a6e5a6a8c76ca8e59df24d2bd878f","unresolved":true,"context_lines":[{"line_number":3648,"context_line":" num_processes \u003d 100"},{"line_number":3649,"context_line":""},{"line_number":3650,"context_line":" def worker_process():"},{"line_number":3651,"context_line":" # Initialize new populator instance in each process."},{"line_number":3652,"context_line":" populator \u003d self.DelayedCachePopulator("},{"line_number":3653,"context_line":" MockApp(self.logger, self.statsd),"},{"line_number":3654,"context_line":" {},"}],"source_content_type":"text/x-python","patch_set":58,"id":"c2f224c5_a1de35f7","line":3651,"updated":"2025-09-09 12:30:04.000000000","message":"right! better be sure to do that","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3648,"context_line":" num_processes \u003d 100"},{"line_number":3649,"context_line":""},{"line_number":3650,"context_line":" def worker_process():"},{"line_number":3651,"context_line":" # Initialize new populator instance in each process."},{"line_number":3652,"context_line":" populator \u003d self.DelayedCachePopulator("},{"line_number":3653,"context_line":" MockApp(self.logger, self.statsd),"},{"line_number":3654,"context_line":" {},"}],"source_content_type":"text/x-python","patch_set":58,"id":"71a3963d_bf3698f2","line":3651,"in_reply_to":"c2f224c5_a1de35f7","updated":"2025-09-22 05:47:05.000000000","message":"Acknowledged","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"7eb3d31e196a6e5a6a8c76ca8e59df24d2bd878f","unresolved":true,"context_lines":[{"line_number":3672,"context_line":" self.assertEqual(populator.backend_resp, self.backend_resp)"},{"line_number":3673,"context_line":" else:"},{"line_number":3674,"context_line":" self.assertEqual(populator._infocache, {})"},{"line_number":3675,"context_line":" self.assertIsNone(populator.backend_resp)"},{"line_number":3676,"context_line":""},{"line_number":3677,"context_line":" # Issue those parallel requests \"at the same time\"."},{"line_number":3678,"context_line":" pool \u003d eventlet.GreenPool()"}],"source_content_type":"text/x-python","patch_set":58,"id":"07d2fad1_3556cfcf","line":3675,"updated":"2025-09-09 12:30:04.000000000","message":"making these assertions in the greenthread doesn\u0027t work: TIL, the exceptions don\u0027t get raised back to the main thread via waitall().\n\nSo if these assertions, you\u0027ll see some noise in the console from eventlet debug reporting the tracebacks, but the test still passes:\n\n```\ndiff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py\nindex 2a13403f4..b2e0f988a 100644\n--- a/test/unit/common/test_utils.py\n+++ b/test/unit/common/test_utils.py\n@@ -3668,7 +3668,7 @@ class TestCooperativeCachePopulator(unittest.TestCase):\n if populator.set_cache_state \u003d\u003d \u0027set\u0027:\n self.assertTrue(populator.token_acquired)\n self.assertEqual(\n- populator._infocache[self.cache_key], \"backend data\")\n+ populator._infocache[self.cache_key], \"NOT the backend data\")\n self.assertEqual(populator.backend_resp, self.backend_resp)\n else:\n self.assertEqual(populator._infocache, {})\n\n\n```\n\nstill passes:\n\n```\n(swift-3.8.15) (acoles) ~/0dev/openstack/swift{review/jianjian_huo/token} % pytest ./test/unit/common/test_utils.py::TestCooperativeCachePopulator::test_concurrent_requests -s\n\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d test session starts \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\nplatform darwin -- Python 3.8.15, pytest-6.2.5, py-1.11.0, pluggy-1.2.0 -- /Users/acoles/.pyenv/versions/3.8.15/envs/swift-3.8.15/bin/python\ncachedir: .pytest_cache\nrootdir: /Users/acoles/0dev/openstack/swift, configfile: tox.ini\nplugins: cov-2.12.1, repeat-0.9.3, subtests-0.13.1, flake8-1.1.0\ncollecting ... Unable to read test config /etc/swift/test.conf - file not found\ncollected 1 item\n\ntest/unit/common/test_utils.py::TestCooperativeCachePopulator::test_concurrent_requests Traceback (most recent call last):\n File \"/Users/acoles/.pyenv/versions/3.8.15/envs/swift-3.8.15/lib/python3.8/site-packages/eventlet/hubs/hub.py\", line 471, in fire_timers\n timer()\n File \"/Users/acoles/.pyenv/versions/3.8.15/envs/swift-3.8.15/lib/python3.8/site-packages/eventlet/hubs/timer.py\", line 59, in __call__\n cb(*args, **kw)\n File \"/Users/acoles/.pyenv/versions/3.8.15/envs/swift-3.8.15/lib/python3.8/site-packages/eventlet/greenthread.py\", line 265, in main\n result \u003d function(*args, **kwargs)\n File \"/Users/acoles/0dev/openstack/swift/test/unit/common/test_utils.py\", line 3670, in worker_process\n self.assertEqual(\n File \"/Users/acoles/.pyenv/versions/3.8.15/lib/python3.8/unittest/case.py\", line 912, in assertEqual\n assertion_func(first, second, msg\u003dmsg)\n File \"/Users/acoles/.pyenv/versions/3.8.15/lib/python3.8/unittest/case.py\", line 1292, in assertMultiLineEqual\n self.fail(self._formatMessage(msg, standardMsg))\n File \"/Users/acoles/.pyenv/versions/3.8.15/lib/python3.8/unittest/case.py\", line 753, in fail\n raise self.failureException(msg)\nAssertionError: \u0027backend data\u0027 !\u003d \u0027NOT the backend data\u0027\n- backend data\n+ NOT the backend data\n? ++++++++\n\nTraceback (most recent call last):\n File \"/Users/acoles/.pyenv/versions/3.8.15/envs/swift-3.8.15/lib/python3.8/site-packages/eventlet/hubs/hub.py\", line 471, in fire_timers\n timer()\n File \"/Users/acoles/.pyenv/versions/3.8.15/envs/swift-3.8.15/lib/python3.8/site-packages/eventlet/hubs/timer.py\", line 59, in __call__\n cb(*args, **kw)\n File \"/Users/acoles/.pyenv/versions/3.8.15/envs/swift-3.8.15/lib/python3.8/site-packages/eventlet/greenthread.py\", line 265, in main\n result \u003d function(*args, **kwargs)\n File \"/Users/acoles/0dev/openstack/swift/test/unit/common/test_utils.py\", line 3670, in worker_process\n self.assertEqual(\n File \"/Users/acoles/.pyenv/versions/3.8.15/lib/python3.8/unittest/case.py\", line 912, in assertEqual\n assertion_func(first, second, msg\u003dmsg)\n File \"/Users/acoles/.pyenv/versions/3.8.15/lib/python3.8/unittest/case.py\", line 1292, in assertMultiLineEqual\n self.fail(self._formatMessage(msg, standardMsg))\n File \"/Users/acoles/.pyenv/versions/3.8.15/lib/python3.8/unittest/case.py\", line 753, in fail\n raise self.failureException(msg)\nAssertionError: \u0027backend data\u0027 !\u003d \u0027NOT the backend data\u0027\n- backend data\n+ NOT the backend data\n? ++++++++\n\nTraceback (most recent call last):\n File \"/Users/acoles/.pyenv/versions/3.8.15/envs/swift-3.8.15/lib/python3.8/site-packages/eventlet/hubs/hub.py\", line 471, in fire_timers\n timer()\n File \"/Users/acoles/.pyenv/versions/3.8.15/envs/swift-3.8.15/lib/python3.8/site-packages/eventlet/hubs/timer.py\", line 59, in __call__\n cb(*args, **kw)\n File \"/Users/acoles/.pyenv/versions/3.8.15/envs/swift-3.8.15/lib/python3.8/site-packages/eventlet/greenthread.py\", line 265, in main\n result \u003d function(*args, **kwargs)\n File \"/Users/acoles/0dev/openstack/swift/test/unit/common/test_utils.py\", line 3670, in worker_process\n self.assertEqual(\n File \"/Users/acoles/.pyenv/versions/3.8.15/lib/python3.8/unittest/case.py\", line 912, in assertEqual\n assertion_func(first, second, msg\u003dmsg)\n File \"/Users/acoles/.pyenv/versions/3.8.15/lib/python3.8/unittest/case.py\", line 1292, in assertMultiLineEqual\n self.fail(self._formatMessage(msg, standardMsg))\n File \"/Users/acoles/.pyenv/versions/3.8.15/lib/python3.8/unittest/case.py\", line 753, in fail\n raise self.failureException(msg)\nAssertionError: \u0027backend data\u0027 !\u003d \u0027NOT the backend data\u0027\n- backend data\n+ NOT the backend data\n? ++++++++\n\nPASSED\n\n\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d 1 passed in 1.27s \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\n\n```\n\nto make it more obvious, switch off the eventlet debugging\n```\ndiff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py\nindex 2a13403f4..727bb4a87 100644\n--- a/test/unit/common/test_utils.py\n+++ b/test/unit/common/test_utils.py\n@@ -3646,7 +3646,7 @@ class TestCooperativeCachePopulator(unittest.TestCase):\n # \"fetch_data\" request cooperatively.\n self.avg_backend_fetch_time \u003d 0.01\n num_processes \u003d 100\n-\n+ eventlet.hubs.get_hub().debug_exceptions \u003d False\n def worker_process():\n # Initialize new populator instance in each process.\n populator \u003d self.DelayedCachePopulator(\n@@ -3668,7 +3668,7 @@ class TestCooperativeCachePopulator(unittest.TestCase):\n if populator.set_cache_state \u003d\u003d \u0027set\u0027:\n self.assertTrue(populator.token_acquired)\n self.assertEqual(\n- populator._infocache[self.cache_key], \"backend data\")\n+ populator._infocache[self.cache_key], \"NOT the backend data\")\n self.assertEqual(populator.backend_resp, self.backend_resp)\n else:\n self.assertEqual(populator._infocache, {})\n\n```\n\ngives:\n\n```\n(swift-3.8.15) (acoles) ~/0dev/openstack/swift{review/jianjian_huo/token} % pytest ./test/unit/common/test_utils.py::TestCooperativeCachePopulator::test_concurrent_requests -s\n\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d test session starts \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\nplatform darwin -- Python 3.8.15, pytest-6.2.5, py-1.11.0, pluggy-1.2.0 -- /Users/acoles/.pyenv/versions/3.8.15/envs/swift-3.8.15/bin/python\ncachedir: .pytest_cache\nrootdir: /Users/acoles/0dev/openstack/swift, configfile: tox.ini\nplugins: cov-2.12.1, repeat-0.9.3, subtests-0.13.1, flake8-1.1.0\ncollecting ... Unable to read test config /etc/swift/test.conf - file not found\ncollected 1 item\n\ntest/unit/common/test_utils.py::TestCooperativeCachePopulator::test_concurrent_requests PASSED\n\n\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d 1 passed in 0.62s \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\n\n```\n\nI think what you need to do is have each worker append its results to a list and then assert the list *in the main thread* once waitall() returns","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3672,"context_line":" self.assertEqual(populator.backend_resp, self.backend_resp)"},{"line_number":3673,"context_line":" else:"},{"line_number":3674,"context_line":" self.assertEqual(populator._infocache, {})"},{"line_number":3675,"context_line":" self.assertIsNone(populator.backend_resp)"},{"line_number":3676,"context_line":""},{"line_number":3677,"context_line":" # Issue those parallel requests \"at the same time\"."},{"line_number":3678,"context_line":" pool \u003d eventlet.GreenPool()"}],"source_content_type":"text/x-python","patch_set":58,"id":"f05829bf_3dd606f1","line":3675,"in_reply_to":"07d2fad1_3556cfcf","updated":"2025-09-22 05:47:05.000000000","message":"wow...thanks for pointing it out, TIL for me as well!","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"7eb3d31e196a6e5a6a8c76ca8e59df24d2bd878f","unresolved":true,"context_lines":[{"line_number":3712,"context_line":" # Simulate multiple concurrent threads issued into a cooperative token"},{"line_number":3713,"context_line":" # session, each thread will issue a \"fetch_data\" request cooperatively."},{"line_number":3714,"context_line":" # And the first three requests will acquire the token, but fail to get"},{"line_number":3715,"context_line":" # data from the backend. This test also demostrates that even though"},{"line_number":3716,"context_line":" # all token requests fail to go through, other requests who arrive at"},{"line_number":3717,"context_line":" # the late stage of same token session and won\u0027t get a token still"},{"line_number":3718,"context_line":" # could be served out of the memcached."}],"source_content_type":"text/x-python","patch_set":58,"id":"ddf18665_e70bbb5b","line":3715,"range":{"start_line":3715,"start_character":48,"end_line":3715,"end_character":59},"updated":"2025-09-09 12:30:04.000000000","message":"typo: demonstrates","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3712,"context_line":" # Simulate multiple concurrent threads issued into a cooperative token"},{"line_number":3713,"context_line":" # session, each thread will issue a \"fetch_data\" request cooperatively."},{"line_number":3714,"context_line":" # And the first three requests will acquire the token, but fail to get"},{"line_number":3715,"context_line":" # data from the backend. This test also demostrates that even though"},{"line_number":3716,"context_line":" # all token requests fail to go through, other requests who arrive at"},{"line_number":3717,"context_line":" # the late stage of same token session and won\u0027t get a token still"},{"line_number":3718,"context_line":" # could be served out of the memcached."}],"source_content_type":"text/x-python","patch_set":58,"id":"d9340f7f_db0e3836","line":3715,"range":{"start_line":3715,"start_character":48,"end_line":3715,"end_character":59},"in_reply_to":"ddf18665_e70bbb5b","updated":"2025-09-22 05:47:05.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"7eb3d31e196a6e5a6a8c76ca8e59df24d2bd878f","unresolved":true,"context_lines":[{"line_number":3757,"context_line":" self.assertEqual(populator._infocache, {})"},{"line_number":3758,"context_line":" self.assertIsNone(populator.backend_resp)"},{"line_number":3759,"context_line":" else:"},{"line_number":3760,"context_line":" counts[\u0027num_backend_failures\u0027] +\u003d 1"},{"line_number":3761,"context_line":""},{"line_number":3762,"context_line":" # Issue those parallel requests at different time within this"},{"line_number":3763,"context_line":" # cooperative token session."}],"source_content_type":"text/x-python","patch_set":58,"id":"efb6f1b9_a2074849","line":3760,"updated":"2025-09-09 12:30:04.000000000","message":"I found myself feeling that DelayedCachePopulator.fetch_backend_data ought to be recording whether it is called or not, and then we make assertions about *that*. In this test and the other concurrent ones. IDK, it feels like we\u0027re relying on attributes of the unit under test (e.g. populator.set_cache_state) to make conditional assertions.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"612655d3eb285f86ae1aa4e296980f7f239fa23f","unresolved":true,"context_lines":[{"line_number":3757,"context_line":" self.assertEqual(populator._infocache, {})"},{"line_number":3758,"context_line":" self.assertIsNone(populator.backend_resp)"},{"line_number":3759,"context_line":" else:"},{"line_number":3760,"context_line":" counts[\u0027num_backend_failures\u0027] +\u003d 1"},{"line_number":3761,"context_line":""},{"line_number":3762,"context_line":" # Issue those parallel requests at different time within this"},{"line_number":3763,"context_line":" # cooperative token session."}],"source_content_type":"text/x-python","patch_set":58,"id":"5635898d_0065e754","line":3760,"in_reply_to":"efb6f1b9_a2074849","updated":"2025-09-29 21:14:23.000000000","message":"I think `populator.backend_resp is not None` is THE canonical way to ask if `CooperativeCachePopulator` called the subclass\u0027 `do_fetch_backend`\n\nthe whole `set_cache_state` attribute as a concept is as best I can tell just an unfortunate requirement that grew out of trying to maintain some of the current cache related legacy-metrics for updating-shard-ranges.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"7eb3d31e196a6e5a6a8c76ca8e59df24d2bd878f","unresolved":true,"context_lines":[{"line_number":3803,"context_line":" \u0027num_backend_failures\u0027: 0,"},{"line_number":3804,"context_line":" }"},{"line_number":3805,"context_line":""},{"line_number":3806,"context_line":" def worker_process(exec_delay\u003d0, backend_delay\u003d0,"},{"line_number":3807,"context_line":" fetch_backend_failure\u003dFalse):"},{"line_number":3808,"context_line":" # Initialize new populator instance in each process."},{"line_number":3809,"context_line":" populator \u003d self.DelayedCachePopulator("}],"source_content_type":"text/x-python","patch_set":58,"id":"eb6406bb_7de9139f","line":3806,"range":{"start_line":3806,"start_character":27,"end_line":3806,"end_character":56},"updated":"2025-09-09 12:30:04.000000000","message":"AFAICT the default values are never used?","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3803,"context_line":" \u0027num_backend_failures\u0027: 0,"},{"line_number":3804,"context_line":" }"},{"line_number":3805,"context_line":""},{"line_number":3806,"context_line":" def worker_process(exec_delay\u003d0, backend_delay\u003d0,"},{"line_number":3807,"context_line":" fetch_backend_failure\u003dFalse):"},{"line_number":3808,"context_line":" # Initialize new populator instance in each process."},{"line_number":3809,"context_line":" populator \u003d self.DelayedCachePopulator("}],"source_content_type":"text/x-python","patch_set":58,"id":"686eb5cd_b72d1f67","line":3806,"range":{"start_line":3806,"start_character":27,"end_line":3806,"end_character":56},"in_reply_to":"eb6406bb_7de9139f","updated":"2025-09-22 05:47:05.000000000","message":"Done","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"7eb3d31e196a6e5a6a8c76ca8e59df24d2bd878f","unresolved":true,"context_lines":[{"line_number":3843,"context_line":" pool \u003d eventlet.GreenPool()"},{"line_number":3844,"context_line":" for i in range(3):"},{"line_number":3845,"context_line":" pool.spawn("},{"line_number":3846,"context_line":" worker_process, 0, self.avg_backend_fetch_time * 15, True)"},{"line_number":3847,"context_line":" for i in range(17):"},{"line_number":3848,"context_line":" pool.spawn("},{"line_number":3849,"context_line":" worker_process,"}],"source_content_type":"text/x-python","patch_set":58,"id":"c4f7a4f8_d9f6d523","line":3846,"updated":"2025-09-09 12:30:04.000000000","message":"these exec immediately but take a long time to respond, then respond with failure","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3843,"context_line":" pool \u003d eventlet.GreenPool()"},{"line_number":3844,"context_line":" for i in range(3):"},{"line_number":3845,"context_line":" pool.spawn("},{"line_number":3846,"context_line":" worker_process, 0, self.avg_backend_fetch_time * 15, True)"},{"line_number":3847,"context_line":" for i in range(17):"},{"line_number":3848,"context_line":" pool.spawn("},{"line_number":3849,"context_line":" worker_process,"}],"source_content_type":"text/x-python","patch_set":58,"id":"2445cc12_2f1ad833","line":3846,"in_reply_to":"c4f7a4f8_d9f6d523","updated":"2025-09-22 05:47:05.000000000","message":"Acknowledged","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"7eb3d31e196a6e5a6a8c76ca8e59df24d2bd878f","unresolved":true,"context_lines":[{"line_number":3848,"context_line":" pool.spawn("},{"line_number":3849,"context_line":" worker_process,"},{"line_number":3850,"context_line":" random.uniform(0, self.avg_backend_fetch_time * 10),"},{"line_number":3851,"context_line":" self.avg_backend_fetch_time"},{"line_number":3852,"context_line":" )"},{"line_number":3853,"context_line":""},{"line_number":3854,"context_line":" # Issue the parallel requests for the second token session."}],"source_content_type":"text/x-python","patch_set":58,"id":"e39765cb_e5a471d8","line":3851,"updated":"2025-09-09 12:30:04.000000000","message":"these exec during the token_ttl period","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3848,"context_line":" pool.spawn("},{"line_number":3849,"context_line":" worker_process,"},{"line_number":3850,"context_line":" random.uniform(0, self.avg_backend_fetch_time * 10),"},{"line_number":3851,"context_line":" self.avg_backend_fetch_time"},{"line_number":3852,"context_line":" )"},{"line_number":3853,"context_line":""},{"line_number":3854,"context_line":" # Issue the parallel requests for the second token session."}],"source_content_type":"text/x-python","patch_set":58,"id":"104aa0c3_0e6fea44","line":3851,"in_reply_to":"e39765cb_e5a471d8","updated":"2025-09-22 05:47:05.000000000","message":"Acknowledged","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"7eb3d31e196a6e5a6a8c76ca8e59df24d2bd878f","unresolved":true,"context_lines":[{"line_number":3857,"context_line":" worker_process,"},{"line_number":3858,"context_line":" self.avg_backend_fetch_time * 10,"},{"line_number":3859,"context_line":" self.avg_backend_fetch_time * 5"},{"line_number":3860,"context_line":" )"},{"line_number":3861,"context_line":" for i in range(17):"},{"line_number":3862,"context_line":" pool.spawn("},{"line_number":3863,"context_line":" worker_process,"}],"source_content_type":"text/x-python","patch_set":58,"id":"a8f7f455_770082de","line":3860,"updated":"2025-09-09 12:30:04.000000000","message":"these exec after the first token_ttl time, and respond with success","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3857,"context_line":" worker_process,"},{"line_number":3858,"context_line":" self.avg_backend_fetch_time * 10,"},{"line_number":3859,"context_line":" self.avg_backend_fetch_time * 5"},{"line_number":3860,"context_line":" )"},{"line_number":3861,"context_line":" for i in range(17):"},{"line_number":3862,"context_line":" pool.spawn("},{"line_number":3863,"context_line":" worker_process,"}],"source_content_type":"text/x-python","patch_set":58,"id":"8082541e_ef9b6e8c","line":3860,"in_reply_to":"a8f7f455_770082de","updated":"2025-09-22 05:47:05.000000000","message":"Acknowledged","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":7847,"name":"Alistair Coles","email":"alistairncoles@gmail.com","username":"acoles"},"change_message_id":"7eb3d31e196a6e5a6a8c76ca8e59df24d2bd878f","unresolved":true,"context_lines":[{"line_number":3879,"context_line":" self.assertEqual(len(self.memcache.incr_calls), 40)"},{"line_number":3880,"context_line":" # The first three requests of the second token session will delete the"},{"line_number":3881,"context_line":" # token after fetching data from the backend and set it in cache."},{"line_number":3882,"context_line":" self.assertEqual(len(self.memcache.del_calls), 3)"},{"line_number":3883,"context_line":""},{"line_number":3884,"context_line":""},{"line_number":3885,"context_line":"class TestUnlinkOlder(unittest.TestCase):"}],"source_content_type":"text/x-python","patch_set":58,"id":"4581f198_23a7a899","line":3882,"updated":"2025-09-09 12:30:04.000000000","message":"how are we sure that one of the no-token requests that exits the first token_ttl and goes to the backend won\u0027t also set cache and delete the token, if its exec_delay is randomly chosen to be 0?\n\nIf A \u003d avg_backend_fetch_time:\n\n* we have 17 non-token requests in the first session that will stop waiting possibly as soon as 10*A, and then go to the backend.\n* we have first 3 requests in second session also at 10*A that will go to the backend\n\nso why don\u0027t we expect more than 3 backend requests? what stopped the 17 going to the backend?","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"cb30366c3b2ccada28b2a16e0775ced3bcd09787","unresolved":false,"context_lines":[{"line_number":3879,"context_line":" self.assertEqual(len(self.memcache.incr_calls), 40)"},{"line_number":3880,"context_line":" # The first three requests of the second token session will delete the"},{"line_number":3881,"context_line":" # token after fetching data from the backend and set it in cache."},{"line_number":3882,"context_line":" self.assertEqual(len(self.memcache.del_calls), 3)"},{"line_number":3883,"context_line":""},{"line_number":3884,"context_line":""},{"line_number":3885,"context_line":"class TestUnlinkOlder(unittest.TestCase):"}],"source_content_type":"text/x-python","patch_set":58,"id":"a869c3dc_58d1346e","line":3882,"in_reply_to":"4581f198_23a7a899","updated":"2025-09-22 05:47:05.000000000","message":"no-token requests won\u0027t delete the token after setting cache, only complete token requests can.","commit_id":"255e867d3b64796a9d646b3d2b152167c378f577"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"f3c176a0b2b9e61da8a19a7513ee2e08b18fc30b","unresolved":true,"context_lines":[{"line_number":3057,"context_line":" self.cache_key, self.cache_ttl,"},{"line_number":3058,"context_line":" avg_backend_fetch_time\u003d.1,"},{"line_number":3059,"context_line":" num_tokens\u003d0, labels\u003d{"},{"line_number":3060,"context_line":" \u0027resource\u0027: \"updating_shard\","},{"line_number":3061,"context_line":" }"},{"line_number":3062,"context_line":" )"},{"line_number":3063,"context_line":" data \u003d populator.fetch_data()"}],"source_content_type":"text/x-python","patch_set":60,"id":"194a36c2_eb7911d4","line":3060,"updated":"2025-09-25 22:35:07.000000000","message":"FWIW in the follow-on change this resource is always \"shard_updating\"\n\nit might be better to use \"test\" to avoid confusion.","commit_id":"b74296ef8a4902726852bae1a0e80eb15061efa8"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"10f897d87faa2a035331daf6011424bc796d7cfc","unresolved":false,"context_lines":[{"line_number":3057,"context_line":" self.cache_key, self.cache_ttl,"},{"line_number":3058,"context_line":" avg_backend_fetch_time\u003d.1,"},{"line_number":3059,"context_line":" num_tokens\u003d0, labels\u003d{"},{"line_number":3060,"context_line":" \u0027resource\u0027: \"updating_shard\","},{"line_number":3061,"context_line":" }"},{"line_number":3062,"context_line":" )"},{"line_number":3063,"context_line":" data \u003d populator.fetch_data()"}],"source_content_type":"text/x-python","patch_set":60,"id":"2595b536_9a1fb5b1","line":3060,"in_reply_to":"194a36c2_eb7911d4","updated":"2025-09-26 18:29:20.000000000","message":"Done","commit_id":"b74296ef8a4902726852bae1a0e80eb15061efa8"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"612655d3eb285f86ae1aa4e296980f7f239fa23f","unresolved":true,"context_lines":[{"line_number":3631,"context_line":" self.assertEqual(populator._infocache, {})"},{"line_number":3632,"context_line":" self.assertIsNone(populator.backend_resp)"},{"line_number":3633,"context_line":" except Exception as e:"},{"line_number":3634,"context_line":" exceptions.append(e)"},{"line_number":3635,"context_line":""},{"line_number":3636,"context_line":" # Issue those parallel requests \"at the same time\"."},{"line_number":3637,"context_line":" pool \u003d eventlet.GreenPool()"}],"source_content_type":"text/x-python","patch_set":61,"id":"aece218a_ff394957","line":3634,"updated":"2025-09-29 21:14:23.000000000","message":"please don\u0027t make assertions as part of the crank turn - it makes it harder to debug/maintain when things go wrong. IMHO, better as:\n\n```\ndiff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py\nindex af64c6023..949c5209e 100644\n--- a/test/unit/common/test_utils.py\n+++ b/test/unit/common/test_utils.py\n@@ -3011,8 +3011,10 @@ class TestCooperativeCachePopulator(unittest.TestCase):\n )\n self._backend_delay \u003d backend_delay\n self._fetch_backend_failure \u003d fetch_backend_failure\n+ self.my_fetch_was_called \u003d False\n \n def do_fetch_backend(self):\n+ self.my_fetch_was_called \u003d True\n if self._backend_delay:\n eventlet.sleep(self._backend_delay)\n if self._fetch_backend_failure:\n@@ -3603,7 +3605,7 @@ class TestCooperativeCachePopulator(unittest.TestCase):\n # \"fetch_data\" request cooperatively.\n self.avg_backend_fetch_time \u003d 0.01\n num_processes \u003d 100\n- exceptions \u003d []\n+ captured_results \u003d []\n \n def worker_process():\n # Initialize new populator instance in each process.\n@@ -3618,20 +3620,7 @@ class TestCooperativeCachePopulator(unittest.TestCase):\n }\n )\n data \u003d populator.fetch_data()\n-\n- try:\n- # Data retrieved successfully\n- self.assertEqual(data, \"backend data\")\n- if populator.set_cache_state \u003d\u003d \u0027set\u0027:\n- self.assertTrue(populator.token_acquired)\n- self.assertEqual(\n- populator._infocache[self.cache_key], \"backend data\")\n- self.assertEqual(populator.backend_resp, self.backend_resp)\n- else:\n- self.assertEqual(populator._infocache, {})\n- self.assertIsNone(populator.backend_resp)\n- except Exception as e:\n- exceptions.append(e)\n+ captured_results.append((data, populator))\n \n # Issue those parallel requests \"at the same time\".\n pool \u003d eventlet.GreenPool()\n@@ -3640,8 +3629,18 @@ class TestCooperativeCachePopulator(unittest.TestCase):\n \n # Wait for all requests to complete\n pool.waitall()\n- if exceptions:\n- self.fail(f\"Greenthread assertions failed: {exceptions}\")\n+ self.assertEqual(len(captured_results), num_processes)\n+ for data, populator in captured_results:\n+ self.assertEqual(data, \"backend data\")\n+ if populator.my_fetch_was_called:\n+ self.assertTrue(populator.set_cache_state \u003d\u003d \u0027set\u0027)\n+ self.assertTrue(populator.token_acquired)\n+ self.assertEqual(\n+ populator._infocache[self.cache_key], \"backend data\")\n+ self.assertEqual(populator.backend_resp, self.backend_resp)\n+ else:\n+ self.assertEqual(populator._infocache, {})\n+ self.assertIsNone(populator.backend_resp)\n stats \u003d self.statsd.get_labeled_stats_counts()\n self.assertEqual({\n (\u0027swift_coop_cache\u0027, frozenset((\n```","commit_id":"707a65ab3c2150fad093a904c3a3d099d74fa236"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"c8ddb65a54a3410032c70039e6a73a7738b32128","unresolved":true,"context_lines":[{"line_number":3631,"context_line":" self.assertEqual(populator._infocache, {})"},{"line_number":3632,"context_line":" self.assertIsNone(populator.backend_resp)"},{"line_number":3633,"context_line":" except Exception as e:"},{"line_number":3634,"context_line":" exceptions.append(e)"},{"line_number":3635,"context_line":""},{"line_number":3636,"context_line":" # Issue those parallel requests \"at the same time\"."},{"line_number":3637,"context_line":" pool \u003d eventlet.GreenPool()"}],"source_content_type":"text/x-python","patch_set":61,"id":"c5c9c99b_fef4410a","line":3634,"in_reply_to":"aece218a_ff394957","updated":"2025-09-30 18:25:34.000000000","message":"with the existing code, when exception is raised, I see this on my screen:\n```\n if exceptions:\n\u003e self.fail(f\"Greenthread assertions failed: {exceptions}\")\nE AssertionError: Greenthread assertions failed: [AssertionError(\"\u0027backend data\u0027 !\u003d \u0027backend xxx\u0027\\n- backend data\\n+ backend xxx\\n\"), AssertionError(\"\u0027backend data\u0027 !\u003d \u0027backend xxx\u0027\\n- backend data\\n+ backend xxx\\n\"), AssertionError(\"\u0027backend data\u0027 !\u003d \u0027backend xxx\u0027\\n- backend data\\n+ backend xxx\\n\")]\n```\n\nand with suggested changes, the exception looks like this：\n```\n\u003e self.assertEqual(data, \"backend xxx\")\nE AssertionError: \u0027backend data\u0027 !\u003d \u0027backend xxx\u0027\nE - backend data\nE + backend xxx\n\nswift/test/unit/common/test_utils.py:3634: AssertionError\n```\n\nWe need catch assertion exceptions or raw ``data, populator``, because green thread assertions just won\u0027t work within itself. The advantage of catching raw ``data, populator`` is that it will show exactly the first assertion failure, while the existing assertion catching shows all the assertions failure during the executions of all green threads.\n\nFrom debugging point of review, I feel either approach is okay; but catching assertion failures within green threads is more readable I think, especially those assertions will use some of internal variables within lots of different green threads, like the similar test case ``test_concurrent_requests_all_token_requests_fail``.","commit_id":"707a65ab3c2150fad093a904c3a3d099d74fa236"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"612655d3eb285f86ae1aa4e296980f7f239fa23f","unresolved":true,"context_lines":[{"line_number":3729,"context_line":" pool.spawn("},{"line_number":3730,"context_line":" worker_process, 0, self.avg_backend_fetch_time * 15, True)"},{"line_number":3731,"context_line":" # The 4th request won\u0027t get a token, but after it exits its waiting"},{"line_number":3732,"context_line":" # cycles, it will fetch the data from the backend and set the data into"},{"line_number":3733,"context_line":" # the memcached."},{"line_number":3734,"context_line":" pool.spawn(worker_process, 0, 0)"},{"line_number":3735,"context_line":" # The remaining 16 requests won\u0027t get a token, but will be served out"}],"source_content_type":"text/x-python","patch_set":61,"id":"40833c33_47892b2b","line":3732,"updated":"2025-09-29 21:14:23.000000000","message":"\u003e but after it exits its waiting cycles\n\nI love how this test is every bit as slow/bad as the real failure mode:\n\n```\nvagrant@saio:~$ time pytest swift/test/unit/common/test_utils.py::TestCooperativeCachePopulator::test_concurrent_requests_all_token_requests_fail \u003e /dev/null\n\nreal 0m2.137s\nuser 0m0.394s\nsys 0m0.087s\n```\n\nIn fairness, we HAVE to write the code that handles this timeout case for when all the num_token requests die brutally - the code that more \"optimistically\" allows the waiting requests to immediately fetch the backend when all the num_token requests error gracefully is optional - and you might be able to argue that if all three of the num_token requests failed giving the server/database 10s to catch is breath isn\u0027t the WORST behavior.","commit_id":"707a65ab3c2150fad093a904c3a3d099d74fa236"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"612655d3eb285f86ae1aa4e296980f7f239fa23f","unresolved":true,"context_lines":[{"line_number":3739,"context_line":" pool.spawn("},{"line_number":3740,"context_line":" worker_process,"},{"line_number":3741,"context_line":" random.uniform(self.avg_backend_fetch_time * 3,"},{"line_number":3742,"context_line":" self.avg_backend_fetch_time * 6),"},{"line_number":3743,"context_line":" self.avg_backend_fetch_time"},{"line_number":3744,"context_line":" )"},{"line_number":3745,"context_line":""}],"source_content_type":"text/x-python","patch_set":61,"id":"093d7a4a_a5543239","line":3742,"updated":"2025-09-29 21:14:23.000000000","message":"this is important to keep in mind - in my contrived through experiments all 100 requests arrive at *exactly* the same time, 3 try and die, 97 timeout and hit the backend.\n\nIn the REAL world tho they arrive slightly staggered and will hit the backend in the same order/pace they were received (just like on master). But better than master - that flood is *delayed*; and any requests collected up to a token_ttl get unblocked if anyone eventually gets a response and sets memcache.\n\nAn idealized implementation might put all requests in a FIFO and be letting backend requests go out at a decreasing rate until we finish the queue or set the cache - but there might be a trade-off on how much pounding you want to put on memcache beyond \"is the cache key I actually care about there, yet?\"","commit_id":"707a65ab3c2150fad093a904c3a3d099d74fa236"},{"author":{"_account_id":34930,"name":"Jianjian Huo","email":"jhuo@nvidia.com","username":"jhuo"},"change_message_id":"c8ddb65a54a3410032c70039e6a73a7738b32128","unresolved":true,"context_lines":[{"line_number":3739,"context_line":" pool.spawn("},{"line_number":3740,"context_line":" worker_process,"},{"line_number":3741,"context_line":" random.uniform(self.avg_backend_fetch_time * 3,"},{"line_number":3742,"context_line":" self.avg_backend_fetch_time * 6),"},{"line_number":3743,"context_line":" self.avg_backend_fetch_time"},{"line_number":3744,"context_line":" )"},{"line_number":3745,"context_line":""}],"source_content_type":"text/x-python","patch_set":61,"id":"d9fabf60_539b3851","line":3742,"in_reply_to":"093d7a4a_a5543239","updated":"2025-09-30 18:25:34.000000000","message":"this is an interesting thought, I think probably needs more discussion on this. will summarize different ideas and bring them up during PTG or other meetings. Thanks!","commit_id":"707a65ab3c2150fad093a904c3a3d099d74fa236"},{"author":{"_account_id":1179,"name":"Clay Gerrard","email":"clay.gerrard@gmail.com","username":"clay-gerrard"},"change_message_id":"612655d3eb285f86ae1aa4e296980f7f239fa23f","unresolved":true,"context_lines":[{"line_number":3760,"context_line":" # sessions, each thread will issue a \"fetch_data\" request"},{"line_number":3761,"context_line":" # cooperatively. The very first three requests which will acquire the"},{"line_number":3762,"context_line":" # token, but fail to fetch data from backend."},{"line_number":3763,"context_line":" self.avg_backend_fetch_time \u003d 0.1"},{"line_number":3764,"context_line":" counts \u003d {"},{"line_number":3765,"context_line":" \u0027num_backend_success\u0027: 0,"},{"line_number":3766,"context_line":" \u0027num_requests_served_from_cache\u0027: 0,"}],"source_content_type":"text/x-python","patch_set":61,"id":"e77e8649_c0e29068","line":3763,"updated":"2025-09-29 21:14:23.000000000","message":"I think this means the test has to be at least 1s?\n\n```\n1.68s call test/unit/common/test_utils.py::TestCooperativeCachePopulator::test_concurrent_requests_pass_token_ttl\n1.65s call test/unit/common/test_utils.py::TestCooperativeCachePopulator::test_concurrent_requests_all_token_requests_fail\n0.02s call test/unit/common/test_utils.py::TestCooperativeCachePopulator::test_concurrent_requests\n0.01s call test/unit/common/test_utils.py::TestCooperativeCachePopulator::test_fetch_data_cache_miss_without_token\n0.01s call test/unit/common/test_utils.py::TestCooperativeCachePopulator::test_fetch_data_from_cache_connection_error\n0.01s call test/unit/common/test_utils.py::TestCooperativeCachePopulator::test_fetch_data_cache_hit_without_token\n```","commit_id":"707a65ab3c2150fad093a904c3a3d099d74fa236"}]}